• <ins id="pjuwb"></ins>
    <blockquote id="pjuwb"><pre id="pjuwb"></pre></blockquote>
    <noscript id="pjuwb"></noscript>
          <sup id="pjuwb"><pre id="pjuwb"></pre></sup>
            <dd id="pjuwb"></dd>
            <abbr id="pjuwb"></abbr>

            coreBugZJ

            此 blog 已棄。

            Modified UTF-8 與 UTF-32 相互轉換


            自己的實現,經過一定的測試。

            頭文件
             1/*
             2Convert Modified UTF-8  <==>  UTF-32.
             3*/

             4
             5
             6/*
             7function : Convert Modified UTF-8 to UTF-32.
             8input : str_mutf8, a null terminated string in Modified UTF-8.
             9output : str_utf32, a null terminated string in UTF-32.
            10input : str_utf32_limit, the max length(character count) 
            11        of str_utf32 plus one(for 'null'), str_utf32 must have enough space 
            12        for str_utf32_limit characters.
            13return : -1 for errors; 
            14        else the length(character count) of str_utf32, 
            15                maybe larger than (str_utf32_limit-1) if the space 
            16                of str_utf32 isn't enougn.
            17note : convert 0xc080 to U+0000 字符串未結束
            18        convert 0x00 to U+0000 字符串結束
            19*/

            20int mutf8_to_utf32( const unsigned char *str_mutf8, 
            21                unsigned int *str_utf32, int str_utf32_limit );
            22
            23/*
            24function : Convert UTF-32 to Modified UTF-8.
            25input : str_utf32, a null terminated string in UTF-32.
            26output : str_mutf8, a null terminated string in Modified UTF-8.
            27input : str_mutf8_limit, the max length(byte count) 
            28        of str_mutf8 plus one(for 'null'), str_mutf8 must have enough space 
            29        for str_mutf8_limit bytes.
            30return : -1 for errors; 
            31        else the length(byte count) of str_mutf8, 
            32                maybe larger than (str_mutf8_limit-1) if the space 
            33                of str_mutf8 isn't enougn.
            34note : convet U+0000 to 0x00, not 0xc080 字符串結束
            35*/

            36int utf32_to_mutf8( const unsigned int *str_utf32, 
            37                unsigned char *str_mutf8, int str_mutf8_limit );
            38
            39


            C代碼
              1/*
              2Convert Modified UTF-8  <==>  UTF-32.
              3*/

              4
              5
              6#include "cvt_mutf8_utf32.h"
              7#include <stdio.h> 
              8
              9
             10/*
             11A U+0001 to U+007F
             120+++ ++++ u &0x80 => 0x00
             13
             14B U+0080 to U+07FF, and null character (U+0000)
             15110+ ++++ u &0xe0 => 0xc0
             1610++ ++++ v &0xc0 => 0x80
             17((u & 0x1f) << 6) + (v & 0x3f)
             18
             19C U+0800 to U+FFFF
             201110 ++++ u &0xf0 => 0xe0
             2110++ ++++ v &0xc0 => 0x80
             2210++ ++++ w &0xc0 => 0x80
             23((u & 0xf) << 12) + ((v & 0x3f) << 6) + (w & 0x3f)
             24
             25D above U+FFFF (U+10000 to U+10FFFF)
             261110 1101 u &0xff => 0xed
             271010 ++++ v &0xf0 => 0xa0
             2810++ ++++ w &0xc0 => 0x80
             291110 1101 x &0xff => 0xed
             301011 ++++ y &0xf0 => 0xb0
             3110++ ++++ z &0xc0 => 0x80
             320x10000+((v&0x0f)<<16)+((w&0x3f)<<10)+(y&0x0f)<<6)+(z&0x3f) 
             33*/

             34
             35int mutf8_to_utf32( const unsigned char *str_mutf8, 
             36                unsigned int *str_utf32, int str_utf32_limit ) {
             37        unsigned int cod, u, v, w, x, y, z;
             38        int len32 = 0;
             39        if ( (NULL == str_mutf8) || (0 > str_utf32_limit) ) {
             40                return (-1);
             41        }

             42
             43#define  __ADD_UTF32_COD_Z__   do {\
             44                if ( (NULL != str_utf32) && (len32 < str_utf32_limit) ) {\
             45                        str_utf32[ len32 ] = cod;\
             46                }
            \
             47                ++len32;\
             48        }
             while ( 0 )
             49
             50        for ( ; ; ) {
             51                u = *str_mutf8++;
             52
             53                if ( 0 == u ) {
             54                        break;
             55                }

             56
             57                if ( 0x00 == (0x80 & u)  ) {
             58                        cod = u;
             59                        __ADD_UTF32_COD_Z__;
             60                        continue;
             61                }

             62
             63                if ( 0xc0 == (0xe0 & u) ) {
             64                        v = *str_mutf8++;
             65                        if ( 0x80 != (0xc0 & v) ) {
             66                                return (-1);
             67                        }

             68                        cod =   ((u&0x1f)<<6| 
             69                                (v&0x3f);
             70                        __ADD_UTF32_COD_Z__;
             71                        continue;
             72                }

             73
             74                if ( 0xe0 == (0xf0 & u) ) {
             75                        v = *str_mutf8++;
             76                        if ( 0x80 != (0xc0 & v) ) {
             77                                return (-1);
             78                        }

             79                        w = *str_mutf8++;
             80                        if ( 0x80 != (0xc0 & w) ) {
             81                                return (-1);
             82                        }

             83                        if (    (0xed == (0xff & u)) && 
             84                                (0xa0 == (0xf0 & v)) && 
             85                                (0x80 == (0xc0 & w)) 
             86                        ) {
             87                                x = *str_mutf8++;
             88                                if ( 0xed != (0xff & x) ) {
             89                                        return (-1);
             90                                }

             91                                y = *str_mutf8++;
             92                                if ( 0xb0 != (0xf0 & y) ) {
             93                                        return (-1);
             94                                }

             95                                z = *str_mutf8++;
             96                                if ( 0x80 != (0xc0 & z) ) {
             97                                        return (-1);
             98                                }

             99                                cod =   0x10000 + (
            100                                        ((v&0x0f)<<16| 
            101                                        ((w&0x3f)<<10| 
            102                                        ((y&0x0f)<<6| 
            103                                        (z&0x3f) );
            104                                __ADD_UTF32_COD_Z__;
            105                                continue;
            106                        }

            107                        cod =   ((u&0xf)<<12| 
            108                                ((v&0x3f)<<6| 
            109                                (w&0x3f);
            110                        __ADD_UTF32_COD_Z__;
            111                        continue;
            112                }

            113
            114                return (-1);
            115        }

            116
            117        if ( NULL == str_utf32 ) {
            118        }

            119        else if ( len32 < str_utf32_limit ) {
            120                str_utf32[ len32 ] = 0;
            121        }

            122        else {
            123                str_utf32[ str_utf32_limit-1 ] = 0;
            124        }

            125
            126        return len32;
            127#undef __ADD_UTF32_COD_Z__
            128}
            129
            130int utf32_to_mutf8( const unsigned int *str_utf32, 
            131                unsigned char *str_mutf8, int str_mutf8_limit ) {
            132        unsigned int cod;
            133        int len8 = 0;
            134        if ( (NULL == str_utf32) || (0 > str_mutf8_limit) ) {
            135                return (-1);
            136        }

            137
            138#define __ADD_MUTF8_B_Z__(b)   do {\
            139                if ( (NULL != str_mutf8) && (len8 < str_mutf8_limit) ) {\
            140                        str_mutf8[ len8 ] = (unsigned char)(b);\
            141                }
            \
            142                ++len8;\
            143        }
             while ( 0 )
            144
            145        for ( ; ; ) {
            146                cod = *str_utf32++;
            147
            148                if ( 0 == cod ) {
            149                        break;
            150                }

            151
            152                if ( 0x007f >= cod ) {
            153                        __ADD_MUTF8_B_Z__(cod);
            154                        continue;
            155                }

            156
            157                if ( 0x07ff >= cod ) {
            158                        __ADD_MUTF8_B_Z__(0xc0|((cod>>6)&0x1f));
            159                        __ADD_MUTF8_B_Z__(0x80|(cod&0x3f));
            160                        continue;
            161                }

            162
            163                if ( 0xffff >= cod ) {
            164                        __ADD_MUTF8_B_Z__(0xe0|((cod>>12)&0x0f));
            165                        __ADD_MUTF8_B_Z__(0x80|((cod>>6)&0x3f));
            166                        __ADD_MUTF8_B_Z__(0x80|(cod&0x3f));
            167                        continue;
            168                }

            169
            170                if ( 0x10ffff >= cod ) {
            171                        cod -= 0x10000;
            172                        __ADD_MUTF8_B_Z__(0xed);
            173                        __ADD_MUTF8_B_Z__(0xa0|((cod>>16)&0x0f));
            174                        __ADD_MUTF8_B_Z__(0x80|((cod>>10)&0x3f));
            175                        __ADD_MUTF8_B_Z__(0xed);
            176                        __ADD_MUTF8_B_Z__(0xb0|((cod>>6)&0x0f));
            177                        __ADD_MUTF8_B_Z__(0x80|(cod&0x3f));
            178                        continue;
            179                }

            180
            181                return (-1);
            182        }

            183
            184        if ( NULL == str_mutf8 ) {
            185        }

            186        else if ( len8 < str_mutf8_limit ) {
            187                str_mutf8[ len8 ] = 0;
            188        }

            189        else {
            190                str_mutf8[ str_mutf8_limit-1 ] = 0;
            191        }

            192
            193        return len8;
            194#undef __ADD_MUTF8_B_Z__
            195}
            196
            197

            posted on 2014-04-13 19:42 coreBugZJ 閱讀(972) 評論(0)  編輯 收藏 引用 所屬分類: 技術視野

            久久无码国产| 亚洲午夜精品久久久久久人妖| 久久99国产精一区二区三区| 国内精品伊人久久久久777| 久久亚洲天堂| 中文字幕久久亚洲一区| 欧美成a人片免费看久久| 久久综合视频网站| 国产欧美久久久精品影院| 久久久久久久综合狠狠综合| 亚洲人成无码www久久久| 亚洲&#228;v永久无码精品天堂久久 | 欧美午夜精品久久久久久浪潮| 婷婷综合久久狠狠色99h| 精品久久久久久久久中文字幕| 欧美亚洲国产精品久久蜜芽| segui久久国产精品| 久久久久久A亚洲欧洲AV冫 | 久久亚洲AV成人出白浆无码国产| 久久久SS麻豆欧美国产日韩| 久久强奷乱码老熟女网站| 久久99热只有频精品8| 久久久国产精品网站| 久久久久久久国产免费看| 久久亚洲精品无码aⅴ大香| 亚洲精品无码专区久久久| 国产成人无码久久久精品一| 国产免费久久精品99久久| 久久久精品久久久久影院| 久久综合丁香激情久久| 亚洲第一永久AV网站久久精品男人的天堂AV | 久久免费99精品国产自在现线| 久久精品国产亚洲AV香蕉| 国产精品久久久久9999| 亚洲国产精品无码久久久久久曰| 人妻少妇久久中文字幕 | 国内精品伊人久久久久网站| 亚洲欧美一区二区三区久久| 99久久er这里只有精品18| 欧美久久久久久精选9999| 久久国产乱子伦精品免费强|