A Pisces Programmer
漂亮的程序，開心地工作

隨筆-3 評論-13 文章-10 trackbacks-0

ＵＴＦ８解析 for Windows mobile

目前的互聯網，是屬于世界的。越來越多的郵件，編碼都是用UTF8。但對于我們習慣還是用ASCII/GB2312來顯示／編輯。所以ＵＴＦ８的編碼內容需要轉換，特地寫了一個類，來封裝解析的部分。

/*?UTF8Charset.h?*/

class?CUTF8Charset??

{

public:

????CUTF8Charset();

????virtual?~CUTF8Charset();

public:

????static?void?UTF_8ToGB2312(CString?&strOut,?char?*pText,?int?pLen);

????static?void?GB2312ToUTF_8(CString?&strOut,?char?*pText,?int?pLen);

????static?void?UTF_8ToGB2312(char?*pOut,?char?*pText,?int?pLen);

????static?void?GB2312ToUTF_8(char?*pOut,?char?*pText,?int?pLen);

????//?Unicode?轉換成UTF-8

????static?void?UnicodeToUTF_8(char*?pOut,WCHAR*?pText);

????//?GB2312?轉換成　Unicode

????static?void?Gb2312ToUnicode(WCHAR*?pOut,char?*gbBuffer);

????//?把Unicode?轉換成?GB2312

????static?void?UnicodeToGB2312(char*?pOut,?WCHAR*?pText);

????//?把UTF-8轉換成Unicode

????static?void?UTF_8ToUnicode(WCHAR*?pOut,char*?pText);

};

/*?UTF8Charset.cpp?*/

#include?"UTF8Charset.h"

CUTF8Charset::CUTF8Charset()

{

}

CUTF8Charset::~CUTF8Charset()

{

}

void?CUTF8Charset::UTF_8ToUnicode(WCHAR*?pOut,?char?*pText)

{

????char*?uchar?=?(char?*)pOut;

????uchar[1]?=?((pText[0]?&?0x0F)?<<?4)?+?((pText[1]?>>?2)?&?0x0F);

????uchar[0]?=?((pText[1]?&?0x03)?<<?6)?+?(pText[2]?&?0x3F);

}

void?CUTF8Charset::UnicodeToGB2312(char*?pOut,?WCHAR*?pText)

{

????::WideCharToMultiByte(CP_ACP,NULL,pText,1,pOut,sizeof(WCHAR),NULL,NULL);

}

void?CUTF8Charset::Gb2312ToUnicode(WCHAR*?pOut,?char?*gbBuffer)

{

????::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,pOut,1);

}

void?CUTF8Charset::UnicodeToUTF_8(char*?pOut,?WCHAR*?pText)

{

????//?注意?WCHAR高低字的順序,低字節在前，高字節在后

????char*?pchar?=?(char?*)pText;

????pOut[0]?=?(0xE0?|?((pchar[1]?&?0xF0)?>>?4));

????pOut[1]?=?(0x80?|?((pchar[1]?&?0x0F)?<<?2))?+?((pchar[0]?&?0xC0)?>>?6);

????pOut[2]?=?(0x80?|?(pchar[0]?&?0x3F));

}

void?CUTF8Charset::GB2312ToUTF_8(char?*pOut,?char?*pText,?int?pLen)

{

????char?buf[4];

????memset(buf,0,4);

????int?i?=?0;

????int?j?=?0;????

????while(i?<?pLen)

????{

????????//如果是英文直接復制就可以

????????if(?*(pText?+?i)?>=?0)

????????{

????????????pOut[j++]?=?pText[i++];

????????}

????????else

????????{

????????????WCHAR?pbuffer;

????????????Gb2312ToUnicode(&pbuffer,?pText+i);????????????

????????????UnicodeToUTF_8(buf,&pbuffer);

????????????unsigned?short?int?tmp?=?0;

????????????tmp?=?pOut[j]?=?buf[0];

????????????tmp?=?pOut[j+1]?=?buf[1];

????????????tmp?=?pOut[j+2]?=?buf[2];????????????

????????????j?+=?3;

????????????i?+=?2;

????????}

????}

????pOut[j]?=?'\0';

}

void?CUTF8Charset::UTF_8ToGB2312(char?*pOut,?char?*pText,?int?pLen)

{

????int?i?=0;

????int?j?=?0;

????char?Ctemp[3]?=?"";

????while(i?<?pLen)

????{

????????if(pText[i]?>?0)

????????{

????????????pOut[j++]?=?pText[i++];????????????

????????}

????????else?????????????????

????????{

????????????WCHAR?Wtemp;

????????????UTF_8ToUnicode(&Wtemp,pText+i);

????????????UnicodeToGB2312(Ctemp,&Wtemp);

????????????pOut[j]?=?Ctemp[0];

????????????pOut[j?+?1]?=?Ctemp[1];

????????????i?+=?3;????

????????????j?+=?2;???

????????}

????}

????pOut[j]?=?'\0';

}

void?CUTF8Charset::GB2312ToUTF_8(CString?&strOut,?char?*pText,?int?pLen)

{????

????int?nBufferLen?=?pLen?/?2?*?3;????//按照全是漢字的情況預留空間

????char?*rst?=?new?char[nBufferLen];

????memset(rst,0,nBufferLen);

????GB2312ToUTF_8(rst,?pText,?pLen);

????strOut?=?rst;

????delete?[]?rst;

????rst?=?NULL;

}

void?CUTF8Charset::UTF_8ToGB2312(CString?&strOut,?char?*pText,?int?pLen)

{

????char?*rst?=?new?char[pLen];

????memset(rst,0,pLen);

????UTF_8ToGB2312(rst,?pText,?pLen);

????strOut?=?rst;

????delete?[]?rst;

????rst?=?NULL;

}

posted on 2006-05-12 15:07 雙魚座的程序員閱讀(689) 評論(0) 編輯收藏引用

只有注冊用戶登錄后才能發表評論。




網站導航: 博客園 IT新聞 BlogJava 博問 Chat2DB 管理

<

2025年12月

>

日

一

二

三

四

五

六

30

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

1

2

3

4

5

6

7

8

9

10

青青草原综合久久大伊人导航_色综合久久天天综合_日日噜噜夜夜狠狠久久丁香五月_热久久这里只有精品

常用鏈接

留言簿(2)

隨筆檔案

文章分類

文章檔案

搜索

最新評論

閱讀排行榜

評論排行榜