目前的互聯網,是屬于世界的。越來越多的郵件,編碼都是用UTF8。但對于我們習慣還是用ASCII/GB2312來顯示/編輯。所以UTF8的編碼內容需要轉換,特地寫了一個類,來封裝解析的部分。

/**//*?UTF8Charset.h?*/
class?CUTF8Charset??


{
public:
????CUTF8Charset();
????virtual?~CUTF8Charset();

public:
????static?void?UTF_8ToGB2312(CString?&strOut,?char?*pText,?int?pLen);
????static?void?GB2312ToUTF_8(CString?&strOut,?char?*pText,?int?pLen);

????static?void?UTF_8ToGB2312(char?*pOut,?char?*pText,?int?pLen);
????static?void?GB2312ToUTF_8(char?*pOut,?char?*pText,?int?pLen);

????//?Unicode?轉換成UTF-8
????static?void?UnicodeToUTF_8(char*?pOut,WCHAR*?pText);
????//?GB2312?轉換成 Unicode
????static?void?Gb2312ToUnicode(WCHAR*?pOut,char?*gbBuffer);
????//?把Unicode?轉換成?GB2312
????static?void?UnicodeToGB2312(char*?pOut,?WCHAR*?pText);
????//?把UTF-8轉換成Unicode
????static?void?UTF_8ToUnicode(WCHAR*?pOut,char*?pText);
};


/**//*?UTF8Charset.cpp?*/
#include?"UTF8Charset.h"

CUTF8Charset::CUTF8Charset()


{

}

CUTF8Charset::~CUTF8Charset()


{

}

void?CUTF8Charset::UTF_8ToUnicode(WCHAR*?pOut,?char?*pText)


{
????char*?uchar?=?(char?*)pOut;

????uchar[1]?=?((pText[0]?&?0x0F)?<<?4)?+?((pText[1]?>>?2)?&?0x0F);
????uchar[0]?=?((pText[1]?&?0x03)?<<?6)?+?(pText[2]?&?0x3F);
}

void?CUTF8Charset::UnicodeToGB2312(char*?pOut,?WCHAR*?pText)


{
????::WideCharToMultiByte(CP_ACP,NULL,pText,1,pOut,sizeof(WCHAR),NULL,NULL);
}

void?CUTF8Charset::Gb2312ToUnicode(WCHAR*?pOut,?char?*gbBuffer)


{
????::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,pOut,1);
}

void?CUTF8Charset::UnicodeToUTF_8(char*?pOut,?WCHAR*?pText)


{
????//?注意?WCHAR高低字的順序,低字節在前,高字節在后
????char*?pchar?=?(char?*)pText;

????pOut[0]?=?(0xE0?|?((pchar[1]?&?0xF0)?>>?4));
????pOut[1]?=?(0x80?|?((pchar[1]?&?0x0F)?<<?2))?+?((pchar[0]?&?0xC0)?>>?6);
????pOut[2]?=?(0x80?|?(pchar[0]?&?0x3F));
}

void?CUTF8Charset::GB2312ToUTF_8(char?*pOut,?char?*pText,?int?pLen)


{
????char?buf[4];
????memset(buf,0,4);

????int?i?=?0;
????int?j?=?0;????
????while(i?<?pLen)

????
{
????????//如果是英文直接復制就可以
????????if(?*(pText?+?i)?>=?0)

????????
{
????????????pOut[j++]?=?pText[i++];
????????}
????????else

????????
{
????????????WCHAR?pbuffer;
????????????Gb2312ToUnicode(&pbuffer,?pText+i);????????????
????????????UnicodeToUTF_8(buf,&pbuffer);

????????????unsigned?short?int?tmp?=?0;
????????????tmp?=?pOut[j]?=?buf[0];
????????????tmp?=?pOut[j+1]?=?buf[1];
????????????tmp?=?pOut[j+2]?=?buf[2];????????????

????????????j?+=?3;
????????????i?+=?2;
????????}
????}
????pOut[j]?=?'\0';
}

void?CUTF8Charset::UTF_8ToGB2312(char?*pOut,?char?*pText,?int?pLen)


{
????int?i?=0;
????int?j?=?0;
????char?Ctemp[3]?=?"";

????while(i?<?pLen)

????
{
????????if(pText[i]?>?0)

????????
{
????????????pOut[j++]?=?pText[i++];????????????
????????}
????????else?????????????????

????????
{
????????????WCHAR?Wtemp;
????????????UTF_8ToUnicode(&Wtemp,pText+i);
????????????UnicodeToGB2312(Ctemp,&Wtemp);

????????????pOut[j]?=?Ctemp[0];
????????????pOut[j?+?1]?=?Ctemp[1];

????????????i?+=?3;????
????????????j?+=?2;???
????????}
????}
????pOut[j]?=?'\0';
}

void?CUTF8Charset::GB2312ToUTF_8(CString?&strOut,?char?*pText,?int?pLen)


{????
????int?nBufferLen?=?pLen?/?2?*?3;????//按照全是漢字的情況預留空間
????char?*rst?=?new?char[nBufferLen];
????memset(rst,0,nBufferLen);

????GB2312ToUTF_8(rst,?pText,?pLen);

????strOut?=?rst;
????delete?[]?rst;
????rst?=?NULL;
}

void?CUTF8Charset::UTF_8ToGB2312(CString?&strOut,?char?*pText,?int?pLen)


{
????char?*rst?=?new?char[pLen];
????memset(rst,0,pLen);

????UTF_8ToGB2312(rst,?pText,?pLen);

????strOut?=?rst;
????delete?[]?rst;
????rst?=?NULL;
}
posted on 2006-05-12 15:07
雙魚座的程序員 閱讀(676)
評論(0) 編輯 收藏 引用