• <ins id="pjuwb"></ins>
    <blockquote id="pjuwb"><pre id="pjuwb"></pre></blockquote>
    <noscript id="pjuwb"></noscript>
          <sup id="pjuwb"><pre id="pjuwb"></pre></sup>
            <dd id="pjuwb"></dd>
            <abbr id="pjuwb"></abbr>
            隨筆 - 298  文章 - 377  trackbacks - 0
            <2013年4月>
            31123456
            78910111213
            14151617181920
            21222324252627
            2829301234
            567891011

            常用鏈接

            留言簿(34)

            隨筆分類

            隨筆檔案

            文章檔案

            相冊

            收藏夾

            搜索

            •  

            最新評論

            閱讀排行榜

            評論排行榜

            linux下:

            #define         MASKBITS                0x3F
            #define         MASKBYTE                0x80
            #define         MASK2BYTES                0xC0
            #define         MASK3BYTES                0xE0
            #define         MASK4BYTES                0xF0
            #define         MASK5BYTES                0xF8
            #define         MASK6BYTES                0xFC

            typedef unsigned short   Unicode2Bytes;
            typedef unsigned int     Unicode4Bytes;


            void UTF8Decode2BytesUnicode(const std::string& input, std::wstring& output)
            {
                output = L"";
                BYTE b;
                Unicode2Bytes ch;
                for(size_t i=0; i < input.length();)
                {
                    b = input;
                    // 1110xxxx 10xxxxxx 10xxxxxx
                    if((input & MASK3BYTES) == MASK3BYTES)
                    {
                        ch = ((Unicode2Bytes)(input & 0x0F) << 12) | (
                            (Unicode2Bytes)(input[i+1] & MASKBITS) << 6)
                            | (input[i+2] & MASKBITS);
                        i += 3;
                    }
                    // 110xxxxx 10xxxxxx
                    else if((input & MASK2BYTES) == MASK2BYTES)
                    {
                        ch = ((Unicode2Bytes)(input & 0x1F) << 6) | (input[i+1] & MASKBITS);
                        i += 2;
                    }
                    // 0xxxxxxx
                    else if(input < 0x80)
                    {
                        ch = input;
                        i += 1;
                    }

                    else
                    {
            //            assert(false);
                    }
                   
                    output += ch;
                    //output.push_back(ch);
                }
            }

            void UTF8Decode2BytesAssciChar(const std::string& input, char** output)
            {
                std::wstring wsStrOutput;
                if (input.empty())
                    return;

                if (*output != NULL)
                {
                    free(*output);
                    *output = NULL;
                }
                UTF8Decode2BytesUnicode(input, wsStrOutput);
                char* pChar = (char*)malloc(wsStrOutput.length() * 2 + 1);
                memset(pChar, 0, wsStrOutput.length() * 2 + 1);
            #ifdef WIN32
                WideCharToMultiByte( CP_ACP, 0, wsStrOutput.c_str(), -1,
                    pChar, wsStrOutput.length() * 2 + 1, NULL, NULL );
            #else
                //mbstowcs()  wcstombs()
                assert(false);
            #endif
                *output = pChar;
            }
            //---------------------------------------
            #include <iconv.h>
            #include <iostream>

            #define OUTLEN 255

            using namespace std;

            // 代碼轉換操作類
            class CodeConverter {
            private:
            iconv_t cd;
            public:
            // 構造
            CodeConverter(const char *from_charset,const char *to_charset) {
            cd = iconv_open(to_charset,from_charset);
            }

            // 析構
            ~CodeConverter() {
            iconv_close(cd);
            }

            // 轉換輸出
            int convert(char *inbuf,int inlen,char *outbuf,int outlen) {
            char **pin = &inbuf;
            char **pout = &outbuf;

            memset(outbuf,0,outlen);
            return iconv(cd,pin,(size_t *)&inlen,pout,(size_t *)&outlen);
            }
            };

            int main(int argc, char **argv)
            {
            char *in_utf8 = "姝e?ㄥ??瑁?";
            char *in_gb2312 = "正在安裝";
            char out[OUTLEN];

            // utf-8-->gb2312
            CodeConverter cc = CodeConverter("utf-8","gb2312");
            cc.convert(in_utf8,strlen(in_utf8),out,OUTLEN);
            cout << "utf-8-->gb2312 in=" << in_utf8 << ",out=" << out << endl;

            // gb2312-->utf-8
            CodeConverter cc2 = CodeConverter("gb2312","utf-8");
            cc2.convert(in_gb2312,strlen(in_gb2312),out,OUTLEN);
            cout << "gb2312-->utf-8 in=" << in_gb2312 << ",out=" << out << endl;
            }

            posted on 2007-08-26 02:33 聶文龍 閱讀(1196) 評論(5)  編輯 收藏 引用 所屬分類: c++

            FeedBack:
            # re: 編碼問題 2007-08-26 03:07 聶文龍
            VC 實現 漢字 GBK(GB2312) 轉化為 UTF8 編碼
            void ConvertGBKToUtf8(CString& strGBK)
            {
            int len=MultiByteToWideChar(CP_ACP, 0, (LPCTSTR)strGBK, -1, NULL,0);
            unsigned short * wszUtf8 = new unsigned short[len+1];
            memset(wszUtf8, 0, len * 2 + 2);
            MultiByteToWideChar(CP_ACP, 0, (LPCTSTR)strGBK, -1, wszUtf8, len);

            len = WideCharToMultiByte(CP_UTF8, 0, wszUtf8, -1, NULL, 0, NULL, NULL);
            char *szUtf8=new char[len + 1];
            memset(szUtf8, 0, len + 1);
            WideCharToMultiByte (CP_UTF8, 0, wszUtf8, -1, szUtf8, len, NULL,NULL);

            strGBK = szUtf8;
            delete[] szUtf8;
            delete[] wszUtf8;
            }  回復  更多評論
              
            # re: 編碼問題 2007-08-26 03:43 聶文龍
            //這是個類strCoding (strCoding.h文件)
            #pragma once
            #include <iostream>
            #include <string>
            #include <windows.h>
            using namespace std;

            class strCoding
            {
            public:
            strCoding(void);
            ~strCoding(void);

            void UTF_8ToGB2312(string &pOut, char *pText, int pLen);//utf_8轉為gb2312
            void GB2312ToUTF_8(string& pOut,char *pText, int pLen); //gb2312 轉utf_8
            string UrlGB2312(char * str); //urlgb2312編碼
            string UrlUTF8(char * str); //urlutf8 編碼
            string UrlUTF8Decode(string str); //urlutf8解碼
            string UrlGB2312Decode(string str); //urlgb2312解碼

            private:
            void Gb2312ToUnicode(WCHAR* pOut,char *gbBuffer);
            void UTF_8ToUnicode(WCHAR* pOut,char *pText);
            void UnicodeToUTF_8(char* pOut,WCHAR* pText);
            void UnicodeToGB2312(char* pOut,WCHAR uData);
            char CharToInt(char ch);
            char StrToBin(char *str);

            };
            //這是個類strCoding (strCoding.cpp文件)
            #include "StdAfx.h"

            #include ".\strcoding.h"



            strCoding::strCoding(void)
            {
            }

            strCoding::~strCoding(void)
            {
            }
            void strCoding::Gb2312ToUnicode(WCHAR* pOut,char *gbBuffer)
            {
            ::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,pOut,1);
            return;
            }
            void strCoding::UTF_8ToUnicode(WCHAR* pOut,char *pText)
            {
            char* uchar = (char *)pOut;

            uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F);
            uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F);

            return;
            }

            void strCoding::UnicodeToUTF_8(char* pOut,WCHAR* pText)
            {
            // 注意 WCHAR高低字的順序,低字節在前,高字節在后
            char* pchar = (char *)pText;

            pOut[0] = (0xE0 | ((pchar[1] & 0xF0) >> 4));
            pOut[1] = (0x80 | ((pchar[1] & 0x0F) << 2)) + ((pchar[0] & 0xC0) >> 6);
            pOut[2] = (0x80 | (pchar[0] & 0x3F));

            return;
            }
            void strCoding::UnicodeToGB2312(char* pOut,WCHAR uData)
            {
            WideCharToMultiByte(CP_ACP,NULL,&uData,1,pOut,sizeof(WCHAR),NULL,NULL);
            return;
            }

            //做為解Url使用
            char strCoding:: CharToInt(char ch){
            if(ch>='0' && ch<='9')return (char)(ch-'0');
            if(ch>='a' && ch<='f')return (char)(ch-'a'+10);
            if(ch>='A' && ch<='F')return (char)(ch-'A'+10);
            return -1;
            }
            char strCoding::StrToBin(char *str){
            char tempWord[2];
            char chn;

            tempWord[0] = CharToInt(str[0]); //make the B to 11 -- 00001011
            tempWord[1] = CharToInt(str[1]); //make the 0 to 0 -- 00000000

            chn = (tempWord[0] << 4) | tempWord[1]; //to change the BO to 10110000

            return chn;
            }


            //UTF_8 轉gb2312
            void strCoding::UTF_8ToGB2312(string &pOut, char *pText, int pLen)
            {
            char buf[4];
            char* rst = new char[pLen + (pLen >> 2) + 2];
            memset(buf,0,4);
            memset(rst,0,pLen + (pLen >> 2) + 2);

            int i =0;
            int j = 0;

            while(i < pLen)
            {
            if(*(pText + i) >= 0)
            {

            rst[j++] = pText[i++];
            }
            else
            {
            WCHAR Wtemp;


            UTF_8ToUnicode(&Wtemp,pText + i);

            UnicodeToGB2312(buf,Wtemp);

            unsigned short int tmp = 0;
            tmp = rst[j] = buf[0];
            tmp = rst[j+1] = buf[1];
            tmp = rst[j+2] = buf[2];

            //newBuf[j] = Ctemp[0];
            //newBuf[j + 1] = Ctemp[1];

            i += 3;
            j += 2;
            }

            }
            rst[j]='\0';
            pOut = rst;
            delete []rst;
            }

            //GB2312 轉為 UTF-8
            void strCoding::GB2312ToUTF_8(string& pOut,char *pText, int pLen)
            {
            char buf[4];
            memset(buf,0,4);

            pOut.clear();

            int i = 0;
            while(i < pLen)
            {
            //如果是英文直接復制就可以
            if( pText[i] >= 0)
            {
            char asciistr[2]={0};
            asciistr[0] = (pText[i++]);
            pOut.append(asciistr);
            }
            else
            {
            WCHAR pbuffer;
            Gb2312ToUnicode(&pbuffer,pText+i);

            UnicodeToUTF_8(buf,&pbuffer);

            pOut.append(buf);

            i += 2;
            }
            }

            return;
            }
            //把str編碼為網頁中的 GB2312 url encode ,英文不變,漢字雙字節 如%3D%AE%88
            string strCoding::UrlGB2312(char * str)
            {
            string dd;
            size_t len = strlen(str);
            for (size_t i=0;i<len;i++)
            {
            if(isalnum((BYTE)str[i]))
            {
            char tempbuff[2];
            sprintf(tempbuff,"%c",str[i]);
            dd.append(tempbuff);
            }
            else if (isspace((BYTE)str[i]))
            {
            dd.append("+");
            }
            else
            {
            char tempbuff[4];
            sprintf(tempbuff,"%%%X%X",((BYTE*)str)[i] >>4,((BYTE*)str)[i] %16);
            dd.append(tempbuff);
            }

            }
            return dd;
            }

            //把str編碼為網頁中的 UTF-8 url encode ,英文不變,漢字三字節 如%3D%AE%88

            string strCoding::UrlUTF8(char * str)
            {
            string tt;
            string dd;
            GB2312ToUTF_8(tt,str,(int)strlen(str));

            size_t len=tt.length();
            for (size_t i=0;i<len;i++)
            {
            if(isalnum((BYTE)tt.at(i)))
            {
            char tempbuff[2]={0};
            sprintf(tempbuff,"%c",(BYTE)tt.at(i));
            dd.append(tempbuff);
            }
            else if (isspace((BYTE)tt.at(i)))
            {
            dd.append("+");
            }
            else
            {
            char tempbuff[4];
            sprintf(tempbuff,"%%%X%X",((BYTE)tt.at(i)) >>4,((BYTE)tt.at(i)) %16);
            dd.append(tempbuff);
            }

            }
            return dd;
            }
            //把url GB2312解碼
            string strCoding::UrlGB2312Decode(string str)
            {
            string output="";
            char tmp[2];
            int i=0,idx=0,ndx,len=str.length();

            while(i<len){
            if(str[i]=='%'){
            tmp[0]=str[i+1];
            tmp[1]=str[i+2];
            output += StrToBin(tmp);
            i=i+3;
            }
            else if(str[i]=='+'){
            output+=' ';
            i++;
            }
            else{
            output+=str[i];
            i++;
            }
            }

            return output;
            }
            //把url utf8解碼
            string strCoding::UrlUTF8Decode(string str)
            {
            string output="";

            string temp =UrlGB2312Decode(str);//

            UTF_8ToGB2312(output,(char *)temp.data(),strlen(temp.data()));

            return output;

            }

            //test
            #include "stdafx.h"
            #include "strCoding.h"

            using namespace std;


            int main()
            {

            strCoding cfm;
            string keyword="大家好,歡迎你";
            string Temp="";
            string Output="";

            //把關鍵字做url的utf8編碼
            Temp= cfm.UrlUTF8((char *)keyword.data());
            cout<<Temp<<endl;

            //把url的utf8編碼的結果解碼
            Temp =cfm.UrlUTF8Decode(Temp);
            cout<<Temp<<endl;

            //把關鍵字做url的gb2312編碼
            Temp =cfm.UrlGB2312((char *)keyword.data());
            cout<<Temp<<endl;

            //把url的gb2312編碼的結果解碼
            Temp =cfm.UrlGB2312Decode(Temp);
            cout<<Temp<<endl;


            //把關鍵字GB2312轉UTF_8

            cfm.GB2312ToUTF_8(Output,(char *)keyword.data(),strlen(keyword.data()));
            cout<<Output<<endl;

            //把GB2312轉UTF_8轉為中文
            cfm.UTF_8ToGB2312(Temp,(char *)Output.data(),strlen(Output.data()));
            cout<<Temp<<endl;


            //system("pasue");
            getchar();

            return 0;
            //
            }


            在VC7win32下調試通過  回復  更多評論
              
            # re: 編碼問題 2007-08-26 03:44 聶文龍
            // ChineseCodeLib.h: interface for the CChineseCodeLib class.
            //
            //////////////////////////////////////////////////////////////////////
            #include<string>
            using namespace std;

            /*
            功能:漢字GB2312與UTF-8編碼互轉
            作者:litz
            Email:mycro@163.com
            參考:吳康彬先生的文章《UTF-8與GB2312之間的互換》
            http://www.vckbase.com/document/viewdoc/?id=1397
            */


            #if !defined(__CCHINESECODELIB_H_)
            #define __CCHINESECODELIB_H_

            class CChineseCodeLib
            {
            public:
            static void UTF_8ToGB2312(string& pOut,char *pText, int pLen);
            static void GB2312ToUTF_8(string& pOut,char *pText, int pLen);
            // Unicode 轉換成UTF-8
            static void UnicodeToUTF_8(char* pOut,WCHAR* pText);
            // GB2312 轉換成 Unicode
            static void Gb2312ToUnicode(WCHAR* pOut,char *gbBuffer);
            // 把Unicode 轉換成 GB2312
            static void UnicodeToGB2312(char* pOut,unsigned short uData);
            // 把UTF-8轉換成Unicode
            static void UTF_8ToUnicode(WCHAR* pOut,char* pText);

            CChineseCodeLib();
            virtual ~CChineseCodeLib();
            };

            #endif // !defined(__CCHINESECODELIB_H_)











            // ChineseCodeLib.cpp: implementation of the CChineseCodeLib class.
            //
            //////////////////////////////////////////////////////////////////////

            #include "stdafx.h"
            #include "ChineseCodeLib.h"

            //////////////////////////////////////////////////////////////////////
            // Construction/Destruction
            //////////////////////////////////////////////////////////////////////

            CChineseCodeLib::CChineseCodeLib()
            {

            }

            CChineseCodeLib::~CChineseCodeLib()
            {

            }


            void CChineseCodeLib::UTF_8ToUnicode(WCHAR* pOut,char *pText)
            {
            char* uchar = (char *)pOut;

            uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F);
            uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F);

            return;
            }

            void CChineseCodeLib::UnicodeToGB2312(char* pOut,unsigned short uData)
            {
            WideCharToMultiByte(CP_ACP,NULL,&uData,1,pOut,sizeof(WCHAR),NULL,NULL);
            return;
            }

            void CChineseCodeLib::Gb2312ToUnicode(WCHAR* pOut,char *gbBuffer)
            {
            ::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,pOut,1);
            return;
            }

            void CChineseCodeLib::UnicodeToUTF_8(char* pOut,WCHAR* pText)
            {
            // 注意 WCHAR高低字的順序,低字節在前,高字節在后
            char* pchar = (char *)pText;

            pOut[0] = (0xE0 | ((pchar[1] & 0xF0) >> 4));
            pOut[1] = (0x80 | ((pchar[1] & 0x0F) << 2)) + ((pchar[1] & 0xC0) >> 6);
            pOut[2] = (0x80 | (pchar[0] & 0x3F));

            return;
            }

            void CChineseCodeLib::GB2312ToUTF_8(string& pOut,char *pText, int pLen)
            {
            char buf[4];
            char* rst = new char[pLen + (pLen >> 2) + 2];

            memset(buf,0,4);
            memset(rst,0,pLen + (pLen >> 2) + 2);

            int i = 0;
            int j = 0;
            while(i < pLen)
            {
            //如果是英文直接復制就可以
            if( *(pText + i) >= 0)
            {
            rst[j++] = pText[i++];
            }
            else
            {
            WCHAR pbuffer;
            Gb2312ToUnicode(&pbuffer,pText+i);

            UnicodeToUTF_8(buf,&pbuffer);

            unsigned short int tmp = 0;
            tmp = rst[j] = buf[0];
            tmp = rst[j+1] = buf[1];
            tmp = rst[j+2] = buf[2];


            j += 3;
            i += 2;
            }
            }
            rst[j] = '\0';

            //返回結果
            pOut = rst;
            delete []rst;

            return;
            }

            void CChineseCodeLib::UTF_8ToGB2312(string &pOut, char *pText, int pLen)
            {
            char * newBuf = new char[pLen];
            char Ctemp[4];
            memset(Ctemp,0,4);

            int i =0;
            int j = 0;

            while(i < pLen)
            {
            if(pText[i] > 0)
            {
            newBuf[j++] = pText[i++];
            }
            else
            {
            WCHAR Wtemp;
            UTF_8ToUnicode(&Wtemp,pText + i);

            UnicodeToGB2312(Ctemp,Wtemp);

            newBuf[j] = Ctemp[0];
            newBuf[j + 1] = Ctemp[1];

            i += 3;
            j += 2;
            }
            }
            newBuf[j] = '\0';

            pOut = newBuf;
            delete []newBuf;

            return;
            }

              回復  更多評論
              
            # re: 編碼問題 2007-09-07 17:13 
            怎么我的不能通過編譯呀  回復  更多評論
              
            # re: 編碼問題 2007-09-07 17:15 
            我用的是帶MAIN函數的那個
            為什么老是說d:\oo\oo.cpp(5) : error C2143: syntax error : missing ';' before 'using' 錯誤呀  回復  更多評論
              
            国内精品久久久久影院日本| 久久久久久精品成人免费图片| 久久久久综合网久久| 99久久精品无码一区二区毛片| 亚洲精品美女久久久久99小说| 欧美伊人久久大香线蕉综合| 久久久久久久99精品免费观看| 色老头网站久久网| 国产精品免费久久久久电影网| 色综合久久久久综合体桃花网| 国产福利电影一区二区三区久久久久成人精品综合 | 久久精品亚洲AV久久久无码| 7777久久亚洲中文字幕| 2021国内精品久久久久久影院| 久久99免费视频| 久久亚洲春色中文字幕久久久| 麻豆久久| 久久久91人妻无码精品蜜桃HD| 99久久99久久| 久久夜色精品国产欧美乱| 亚洲另类欧美综合久久图片区| 久久精品国产精品亚洲精品| 无码久久精品国产亚洲Av影片| 中文成人久久久久影院免费观看 | 久久黄视频| 国产精品99久久不卡| 91亚洲国产成人久久精品网址| 国内精品久久久久久野外| 日韩人妻无码一区二区三区久久| 久久天天躁夜夜躁狠狠 | 久久国产精品久久国产精品| 久久国产免费直播| 久久精品国产亚洲AV香蕉| 久久久久久毛片免费看| 久久精品中文字幕第23页| 国产成人精品久久综合| 久久国产高清一区二区三区| 久久AⅤ人妻少妇嫩草影院| 青草影院天堂男人久久| 国产精品午夜久久| 亚洲性久久久影院|