• <ins id="pjuwb"></ins>
    <blockquote id="pjuwb"><pre id="pjuwb"></pre></blockquote>
    <noscript id="pjuwb"></noscript>
          <sup id="pjuwb"><pre id="pjuwb"></pre></sup>
            <dd id="pjuwb"></dd>
            <abbr id="pjuwb"></abbr>

            戰魂小筑

            討論群:309800774 知乎關注:http://zhihu.com/people/sunicdavy 開源項目:https://github.com/davyxu

               :: 首頁 :: 新隨筆 :: 聯系 :: 聚合  :: 管理 ::
              257 隨筆 :: 0 文章 :: 506 評論 :: 0 Trackbacks

            參考來源:http://blog.csdn.net/flying8127/article/details/1598521

            在原來原基礎上,將代碼整理,并加強安全性. 并按照WindowsAPI設計, 添加輸出緩沖長度探測功能

            當OutUTFString為NULL時, 可以進行輸出的UTF8字符串長度探測

               1:  uint32 UniCharToUTF8(wchar_t UniChar, char *OutUTFString)
               2:      {
               3:   
               4:          uint32 UTF8CharLength = 0;
               5:   
               6:          if (UniChar < 0x80)
               7:          {  
               8:              if ( OutUTFString )
               9:                  OutUTFString[UTF8CharLength++] = (char)UniChar;
              10:              else
              11:                  UTF8CharLength++;
              12:          }
              13:          else if(UniChar < 0x800)
              14:          {
              15:              if ( OutUTFString )
              16:              {
              17:                  OutUTFString[UTF8CharLength++] = 0xc0 | ( UniChar >> 6 );
              18:                  OutUTFString[UTF8CharLength++] = 0x80 | ( UniChar & 0x3f );
              19:              }
              20:              else
              21:              {
              22:                  UTF8CharLength += 2;
              23:              }
              24:          }
              25:          else if(UniChar < 0x10000 )
              26:          {
              27:              if ( OutUTFString )
              28:              {
              29:                  OutUTFString[UTF8CharLength++] = 0xe0 | ( UniChar >> 12 );
              30:                  OutUTFString[UTF8CharLength++] = 0x80 | ( (UniChar >> 6) & 0x3f );
              31:                  OutUTFString[UTF8CharLength++] = 0x80 | ( UniChar & 0x3f );
              32:              }
              33:              else
              34:              {
              35:                  UTF8CharLength += 3;
              36:              }
              37:          }
              38:          else if( UniChar < 0x200000 ) 
              39:          {
              40:              if ( OutUTFString )
              41:              {
              42:                  OutUTFString[UTF8CharLength++] = 0xf0 | ( (int)UniChar >> 18 );
              43:                  OutUTFString[UTF8CharLength++] = 0x80 | ( (UniChar >> 12) & 0x3f );
              44:                  OutUTFString[UTF8CharLength++] = 0x80 | ( (UniChar >> 6) & 0x3f );
              45:                  OutUTFString[UTF8CharLength++] = 0x80 | ( UniChar & 0x3f );
              46:              }
              47:              else
              48:              {
              49:                  UTF8CharLength += 4;
              50:              }
              51:   
              52:          }
              53:   
              54:          return UTF8CharLength;
              55:      }

             

            當OutUnicodeString為NULL時, 可以進行輸出的Unicode字符串長度探測

             

               1:  uint32 UTF8StrToUnicode( const char* UTF8String, uint32 UTF8StringLength, wchar_t* OutUnicodeString, uint32 UnicodeStringBufferSize )
               2:      {
               3:          uint32 UTF8Index = 0;
               4:          uint32 UniIndex = 0;
               5:   
               6:          while ( UTF8Index < UTF8StringLength )
               7:          {
               8:              unsigned char UTF8Char = UTF8String[UTF8Index];
               9:   
              10:              if ( UnicodeStringBufferSize != 0 && UniIndex >= UnicodeStringBufferSize )
              11:                  break;
              12:   
              13:              if ((UTF8Char & 0x80) == 0) 
              14:              {
              15:                  const uint32 cUTF8CharRequire = 1;
              16:   
              17:                  // UTF8字碼不足
              18:                  if ( UTF8Index + cUTF8CharRequire > UTF8StringLength )
              19:                      break;
              20:   
              21:                  if ( OutUnicodeString )
              22:                  {
              23:                      wchar_t& WideChar = OutUnicodeString[UniIndex]; 
              24:   
              25:                      WideChar = UTF8Char;
              26:                  }
              27:   
              28:                  UTF8Index++;
              29:                  
              30:              } 
              31:              else if((UTF8Char & 0xE0) == 0xC0)  ///< 110x-xxxx 10xx-xxxx
              32:              {
              33:                  const uint32 cUTF8CharRequire = 2;
              34:   
              35:                  // UTF8字碼不足
              36:                  if ( UTF8Index + cUTF8CharRequire > UTF8StringLength )
              37:                      break;
              38:   
              39:                  if ( OutUnicodeString )
              40:                  {
              41:                      wchar_t& WideChar = OutUnicodeString[UniIndex]; 
              42:                      WideChar  = (UTF8String[UTF8Index + 0] & 0x3F) << 6;
              43:                      WideChar |= (UTF8String[UTF8Index + 1] & 0x3F);
              44:                  }
              45:                  
              46:                  UTF8Index += cUTF8CharRequire;
              47:              }
              48:              else if((UTF8Char & 0xF0) == 0xE0)  ///< 1110-xxxx 10xx-xxxx 10xx-xxxx
              49:              {
              50:                  const uint32 cUTF8CharRequire = 3;
              51:   
              52:                  // UTF8字碼不足
              53:                  if ( UTF8Index + cUTF8CharRequire > UTF8StringLength )
              54:                      break;
              55:   
              56:                  if ( OutUnicodeString )
              57:                  {
              58:                      wchar_t& WideChar = OutUnicodeString[UniIndex]; 
              59:   
              60:                      WideChar  = (UTF8String[UTF8Index + 0] & 0x1F) << 12;
              61:                      WideChar |= (UTF8String[UTF8Index + 1] & 0x3F) << 6;
              62:                      WideChar |= (UTF8String[UTF8Index + 2] & 0x3F);
              63:                  }
              64:                  
              65:   
              66:                  UTF8Index += cUTF8CharRequire;
              67:              } 
              68:              else if((UTF8Char & 0xF8) == 0xF0)  ///< 1111-0xxx 10xx-xxxx 10xx-xxxx 10xx-xxxx 
              69:              {
              70:                  const uint32 cUTF8CharRequire = 4;
              71:   
              72:                  // UTF8字碼不足
              73:                  if ( UTF8Index + cUTF8CharRequire > UTF8StringLength )
              74:                      break;
              75:   
              76:                  if ( OutUnicodeString )
              77:                  {
              78:                      wchar_t& WideChar = OutUnicodeString[UniIndex]; 
              79:   
              80:                      WideChar  = (UTF8String[UTF8Index + 0] & 0x0F) << 18;
              81:                      WideChar  = (UTF8String[UTF8Index + 1] & 0x3F) << 12;
              82:                      WideChar |= (UTF8String[UTF8Index + 2] & 0x3F) << 6;
              83:                      WideChar |= (UTF8String[UTF8Index + 3] & 0x3F);
              84:                  }
              85:   
              86:                  UTF8Index += cUTF8CharRequire;
              87:              } 
              88:              else ///< 1111-10xx 10xx-xxxx 10xx-xxxx 10xx-xxxx 10xx-xxxx 
              89:              {
              90:                  const uint32 cUTF8CharRequire = 5;
              91:   
              92:                  // UTF8字碼不足
              93:                  if ( UTF8Index + cUTF8CharRequire > UTF8StringLength )
              94:                      break;
              95:   
              96:                  if ( OutUnicodeString )
              97:                  {
              98:                      wchar_t& WideChar = OutUnicodeString[UniIndex]; 
              99:   
             100:                      WideChar  = (UTF8String[UTF8Index + 0] & 0x07) << 24;
             101:                      WideChar  = (UTF8String[UTF8Index + 1] & 0x3F) << 18;
             102:                      WideChar  = (UTF8String[UTF8Index + 2] & 0x3F) << 12;
             103:                      WideChar |= (UTF8String[UTF8Index + 3] & 0x3F) << 6;
             104:                      WideChar |= (UTF8String[UTF8Index + 4] & 0x3F);
             105:                  }
             106:   
             107:                  UTF8Index += cUTF8CharRequire;
             108:              }
             109:   
             110:   
             111:              UniIndex++;
             112:          }
             113:   
             114:          return UniIndex;
             115:      }

            療效: 用了此代碼啊, 再也不用被iconv折磨了

            posted on 2012-02-27 14:21 戰魂小筑 閱讀(4694) 評論(9)  編輯 收藏 引用 所屬分類: 網絡 服務器技術C++/ 編程語言

            評論

            # re: 跨平臺Unicode與UTF8互轉代碼 2012-02-27 21:09 我要去拯救世界
            謝謝分享了!  回復  更多評論
              

            # re: 跨平臺Unicode與UTF8互轉代碼 2012-02-28 17:30 天下
            跨平臺使用挺好,
            在WIN32下,只要CW2A,CA2W宏全部搞定


              回復  更多評論
              

            # re: 跨平臺Unicode與UTF8互轉代碼 2012-03-02 10:08 pillaridge
            這個可能有點兒問題,wchar_t在windows下是16位,在*nix下是32位。  回復  更多評論
              

            # re: 跨平臺Unicode與UTF8互轉代碼 2012-03-02 10:13 pillaridge
            可以借用CLANG源代碼 basic下的ConvertUTF.h和ConvertUTF.c  回復  更多評論
              

            # re: 跨平臺Unicode與UTF8互轉代碼 2012-03-09 18:04 戰魂小筑
            @pillaridge
            感謝提醒
              回復  更多評論
              

            # re: 跨平臺Unicode與UTF8互轉代碼 2012-11-19 14:22 陳成
            100: WideChar = (UTF8String[UTF8Index + 0] & 0x07) << 24;
            101: WideChar = (UTF8String[UTF8Index + 1] & 0x3F) << 18;
            102: WideChar = (UTF8String[UTF8Index + 2] & 0x3F) << 12;
            這三行不是只有最后一行才起作用嗎?  回復  更多評論
              

            # re: 跨平臺Unicode與UTF8互轉代碼 2012-11-20 10:09 戰魂小筑
            @陳成
            這段代碼是從其他地方拷貝過來的  回復  更多評論
              

            # re: 跨平臺Unicode與UTF8互轉代碼 2013-06-07 17:03 zibbleR
            UniCharToUTF8 在轉換中文的時候出來的貌似還是亂碼,  回復  更多評論
              

            # re: 跨平臺Unicode與UTF8互轉代碼 2013-10-15 14:57 zozo
            關鍵是要gb2312-utf8的轉換,有木有  回復  更多評論
              

            久久人人爽爽爽人久久久| 日韩久久无码免费毛片软件| 亚洲午夜久久久久久久久电影网| 亚洲日本va中文字幕久久| 国产午夜免费高清久久影院 | 久久久久久久99精品免费观看| 狠狠色丁香久久综合婷婷| 久久99国产精品成人欧美| 日本欧美久久久久免费播放网| 2022年国产精品久久久久| 亚洲成av人片不卡无码久久| 久久亚洲私人国产精品vA| 国内精品久久久久久不卡影院| 久久99久久99精品免视看动漫| 久久国产香蕉视频| 国产一级做a爰片久久毛片| 无码任你躁久久久久久老妇| 久久青青草原综合伊人| 亚洲中文字幕无码久久综合网| 久久青青国产| 国产精品女同一区二区久久| 亚洲av成人无码久久精品| 欧美伊人久久大香线蕉综合69| 国产精品久久久久久搜索| 精品久久久久久无码不卡| 伊人久久综在合线亚洲2019| 久久天堂AV综合合色蜜桃网 | 久久亚洲色一区二区三区| 久久狠狠高潮亚洲精品| 2020国产成人久久精品| 久久久久18| 国内精品伊人久久久久网站| 91久久香蕉国产熟女线看| 久久国产精品77777| 久久精品无码午夜福利理论片| 99久久香蕉国产线看观香| 久久久久久免费视频| 久久久久久久久久久久久久| 久久午夜夜伦鲁鲁片免费无码影视| 色综合合久久天天给综看| 久久亚洲国产最新网站|