美女视频黄a大片欧美,久久国产毛片,欧美视频观看一区

單詞統計

要求：讀取一個文本，然后統計里面出現的單詞，打印每個單詞出現的次數。僅僅考慮英文單詞的情形，不考慮中文

小乓練題：

int main(int argc, char* argv[])
{
    using namespace std;

    ifstream infile("c:\\a.txt",ios::binary );
    if(!infile)
    {
        cout<<"Can not open sourse file!"<<endl;
        return 0;
    }
    //ofstream outfile("out.txt");
    //if(!outfile)
    //{
    //    cout<<"Can not open destination file!"<<endl;
    //}

    int nLength = 0;
    char * pBuffer;

    // get length of file:
    infile.seekg (0, ios::end);
    nLength = infile.tellg();
    infile.seekg (0, ios::beg);


    //read the file to the buffer
    pBuffer = new char[nLength];
    memset(pBuffer, 0, nLength);
    infile.read(pBuffer,nLength);
    infile.close();

    //copy the buffer to the string s
    string s = pBuffer;
    delete[] pBuffer;
    pBuffer = NULL;

    string temp;

    vector<string> vecSubstr;
    vector<int> vecCount;

    int pre=0,next=0;

    while(next<nLength)
    {
        pre=next;
        //find the word
        while((next<nLength)&&isalnum(s[next]))
        {
            next++;
        }
        if(pre!=next)
        {
            //計算當前的單詞個數
            temp = s.substr(pre,next-pre);
            cout<<temp<<endl;
            //std::vector<std::string>::iterator iter = std::find(vecSubstr.begin(), vecSubstr.end(), temp);
            //if (vecSubstr.end() != iter)
            //{
            //    std::cout<<temp<<std::endl;
            //}else
            //{
            //    vecSubstr.push_back(temp);
            //}

            unsigned int iPosition=0;


            while(iPosition<vecSubstr.size())
            {

                if(vecSubstr[iPosition].compare(temp)==0)
                    break;

                iPosition++;

            }

            if (iPosition==vecSubstr.size())
            {
                vecSubstr.push_back(temp);
                vecCount.push_back(1);
            }
            else
            {
                vecCount[iPosition]++;
            }

        }
        next++;
    }
    for (int j=0;j<vecSubstr.size();j++)
    {
        cout<<vecSubstr[j]<<endl<<vecCount[j]<<endl;
    }

    //for(int i=0;i<substr.size();i++)
    //{
    //    cout<<substr[i]<<endl;
    //    cout<<count[i]<<endl;
    //}

    //delete[] pBuffer;
    //pBuffer = NULL;

    system("pause");

    return 0;
}

C++代碼：

int main(int argc, char* argv[])
{

    // 文件路徑
    char* szPath = "C:\\text.txt";

    std::ifstream fin(szPath);
    if (!fin)
    {
        std::cout<<"Can not open file"<<std::endl;
        return -1;
    }
    // 通常我們這樣讀取一個文本文件的全文
    std::string strText = std::string(std::istreambuf_iterator<char>(fin), std::istreambuf_iterator<char>());

    typedef std::map<std::string, int> CountMap;
    CountMap counter;

    int nLength = strText.length();
    int nLeft = 0;
    int nRight = -1;

    while(nRight<nLength)
    {
        nLeft = nRight+1;
        // 找到第一個是字母的位置
        while (nLeft<nLength && !isalnum(strText[nLeft]))
        {
            ++nLeft;
        }
        nRight = nLeft+1;
        // 找到第一個非字母的位置
        while (nRight<nLength && isalnum(strText[nRight]))
        {
            ++nRight;
        }
        // 取nRight-nLeft可以保證取到的是一個word，其中不會含有字符
        if (nRight < nLength)
        {
            // 提取單詞
            std::string strWord = strText.substr(nLeft, nRight - nLeft);
            // 加入記數器
            counter[strWord]+=1;
        }
    }

    // 打印輸出
    for (CountMap::iterator iter = counter.begin(); counter.end()!=iter; ++iter)
    {
        std::cout<<iter->first<<"\t\t"<<iter->second<<std::endl;
    }

    system("pause");
    return 0;
}

python 代碼：

import re

filepath=r'c:/text.txt'
with open(filepath) as file:
    text=file.read()
    text=re.split('\W+', text)
    d={}
    for item in text:
        d[item]=d.get(item, 0) +1
    for key, value in d.items():
        print('%s\t\t%s'%(key, value))

小乓加油！

posted on 2008-11-19 10:29 李現民閱讀(884) 評論(0) 編輯收藏引用所屬分類: 小乓練題

只有注冊用戶登錄后才能發表評論。
【推薦】100%開源！大型工業跨平臺軟件C++源碼提供，建模，組態！

相關文章: 五筆編碼查詢單詞統計

網站導航: 博客園 IT新聞 BlogJava 博問 Chat2DB 管理

清風竹林

導航

統計

常用鏈接

留言簿(5)

隨筆分類

隨筆檔案

相冊

TLink

搜索

最新評論

閱讀排行榜

評論排行榜

單詞統計