• <ins id="pjuwb"></ins>
    <blockquote id="pjuwb"><pre id="pjuwb"></pre></blockquote>
    <noscript id="pjuwb"></noscript>
          <sup id="pjuwb"><pre id="pjuwb"></pre></sup>
            <dd id="pjuwb"></dd>
            <abbr id="pjuwb"></abbr>

            coreBugZJ

            此 blog 已棄。

            Modified UTF-8 與 UTF-32 相互轉換


            自己的實現,經過一定的測試。

            頭文件
             1/*
             2Convert Modified UTF-8  <==>  UTF-32.
             3*/

             4
             5
             6/*
             7function : Convert Modified UTF-8 to UTF-32.
             8input : str_mutf8, a null terminated string in Modified UTF-8.
             9output : str_utf32, a null terminated string in UTF-32.
            10input : str_utf32_limit, the max length(character count) 
            11        of str_utf32 plus one(for 'null'), str_utf32 must have enough space 
            12        for str_utf32_limit characters.
            13return : -1 for errors; 
            14        else the length(character count) of str_utf32, 
            15                maybe larger than (str_utf32_limit-1) if the space 
            16                of str_utf32 isn't enougn.
            17note : convert 0xc080 to U+0000 字符串未結束
            18        convert 0x00 to U+0000 字符串結束
            19*/

            20int mutf8_to_utf32( const unsigned char *str_mutf8, 
            21                unsigned int *str_utf32, int str_utf32_limit );
            22
            23/*
            24function : Convert UTF-32 to Modified UTF-8.
            25input : str_utf32, a null terminated string in UTF-32.
            26output : str_mutf8, a null terminated string in Modified UTF-8.
            27input : str_mutf8_limit, the max length(byte count) 
            28        of str_mutf8 plus one(for 'null'), str_mutf8 must have enough space 
            29        for str_mutf8_limit bytes.
            30return : -1 for errors; 
            31        else the length(byte count) of str_mutf8, 
            32                maybe larger than (str_mutf8_limit-1) if the space 
            33                of str_mutf8 isn't enougn.
            34note : convet U+0000 to 0x00, not 0xc080 字符串結束
            35*/

            36int utf32_to_mutf8( const unsigned int *str_utf32, 
            37                unsigned char *str_mutf8, int str_mutf8_limit );
            38
            39


            C代碼
              1/*
              2Convert Modified UTF-8  <==>  UTF-32.
              3*/

              4
              5
              6#include "cvt_mutf8_utf32.h"
              7#include <stdio.h> 
              8
              9
             10/*
             11A U+0001 to U+007F
             120+++ ++++ u &0x80 => 0x00
             13
             14B U+0080 to U+07FF, and null character (U+0000)
             15110+ ++++ u &0xe0 => 0xc0
             1610++ ++++ v &0xc0 => 0x80
             17((u & 0x1f) << 6) + (v & 0x3f)
             18
             19C U+0800 to U+FFFF
             201110 ++++ u &0xf0 => 0xe0
             2110++ ++++ v &0xc0 => 0x80
             2210++ ++++ w &0xc0 => 0x80
             23((u & 0xf) << 12) + ((v & 0x3f) << 6) + (w & 0x3f)
             24
             25D above U+FFFF (U+10000 to U+10FFFF)
             261110 1101 u &0xff => 0xed
             271010 ++++ v &0xf0 => 0xa0
             2810++ ++++ w &0xc0 => 0x80
             291110 1101 x &0xff => 0xed
             301011 ++++ y &0xf0 => 0xb0
             3110++ ++++ z &0xc0 => 0x80
             320x10000+((v&0x0f)<<16)+((w&0x3f)<<10)+(y&0x0f)<<6)+(z&0x3f) 
             33*/

             34
             35int mutf8_to_utf32( const unsigned char *str_mutf8, 
             36                unsigned int *str_utf32, int str_utf32_limit ) {
             37        unsigned int cod, u, v, w, x, y, z;
             38        int len32 = 0;
             39        if ( (NULL == str_mutf8) || (0 > str_utf32_limit) ) {
             40                return (-1);
             41        }

             42
             43#define  __ADD_UTF32_COD_Z__   do {\
             44                if ( (NULL != str_utf32) && (len32 < str_utf32_limit) ) {\
             45                        str_utf32[ len32 ] = cod;\
             46                }
            \
             47                ++len32;\
             48        }
             while ( 0 )
             49
             50        for ( ; ; ) {
             51                u = *str_mutf8++;
             52
             53                if ( 0 == u ) {
             54                        break;
             55                }

             56
             57                if ( 0x00 == (0x80 & u)  ) {
             58                        cod = u;
             59                        __ADD_UTF32_COD_Z__;
             60                        continue;
             61                }

             62
             63                if ( 0xc0 == (0xe0 & u) ) {
             64                        v = *str_mutf8++;
             65                        if ( 0x80 != (0xc0 & v) ) {
             66                                return (-1);
             67                        }

             68                        cod =   ((u&0x1f)<<6| 
             69                                (v&0x3f);
             70                        __ADD_UTF32_COD_Z__;
             71                        continue;
             72                }

             73
             74                if ( 0xe0 == (0xf0 & u) ) {
             75                        v = *str_mutf8++;
             76                        if ( 0x80 != (0xc0 & v) ) {
             77                                return (-1);
             78                        }

             79                        w = *str_mutf8++;
             80                        if ( 0x80 != (0xc0 & w) ) {
             81                                return (-1);
             82                        }

             83                        if (    (0xed == (0xff & u)) && 
             84                                (0xa0 == (0xf0 & v)) && 
             85                                (0x80 == (0xc0 & w)) 
             86                        ) {
             87                                x = *str_mutf8++;
             88                                if ( 0xed != (0xff & x) ) {
             89                                        return (-1);
             90                                }

             91                                y = *str_mutf8++;
             92                                if ( 0xb0 != (0xf0 & y) ) {
             93                                        return (-1);
             94                                }

             95                                z = *str_mutf8++;
             96                                if ( 0x80 != (0xc0 & z) ) {
             97                                        return (-1);
             98                                }

             99                                cod =   0x10000 + (
            100                                        ((v&0x0f)<<16| 
            101                                        ((w&0x3f)<<10| 
            102                                        ((y&0x0f)<<6| 
            103                                        (z&0x3f) );
            104                                __ADD_UTF32_COD_Z__;
            105                                continue;
            106                        }

            107                        cod =   ((u&0xf)<<12| 
            108                                ((v&0x3f)<<6| 
            109                                (w&0x3f);
            110                        __ADD_UTF32_COD_Z__;
            111                        continue;
            112                }

            113
            114                return (-1);
            115        }

            116
            117        if ( NULL == str_utf32 ) {
            118        }

            119        else if ( len32 < str_utf32_limit ) {
            120                str_utf32[ len32 ] = 0;
            121        }

            122        else {
            123                str_utf32[ str_utf32_limit-1 ] = 0;
            124        }

            125
            126        return len32;
            127#undef __ADD_UTF32_COD_Z__
            128}
            129
            130int utf32_to_mutf8( const unsigned int *str_utf32, 
            131                unsigned char *str_mutf8, int str_mutf8_limit ) {
            132        unsigned int cod;
            133        int len8 = 0;
            134        if ( (NULL == str_utf32) || (0 > str_mutf8_limit) ) {
            135                return (-1);
            136        }

            137
            138#define __ADD_MUTF8_B_Z__(b)   do {\
            139                if ( (NULL != str_mutf8) && (len8 < str_mutf8_limit) ) {\
            140                        str_mutf8[ len8 ] = (unsigned char)(b);\
            141                }
            \
            142                ++len8;\
            143        }
             while ( 0 )
            144
            145        for ( ; ; ) {
            146                cod = *str_utf32++;
            147
            148                if ( 0 == cod ) {
            149                        break;
            150                }

            151
            152                if ( 0x007f >= cod ) {
            153                        __ADD_MUTF8_B_Z__(cod);
            154                        continue;
            155                }

            156
            157                if ( 0x07ff >= cod ) {
            158                        __ADD_MUTF8_B_Z__(0xc0|((cod>>6)&0x1f));
            159                        __ADD_MUTF8_B_Z__(0x80|(cod&0x3f));
            160                        continue;
            161                }

            162
            163                if ( 0xffff >= cod ) {
            164                        __ADD_MUTF8_B_Z__(0xe0|((cod>>12)&0x0f));
            165                        __ADD_MUTF8_B_Z__(0x80|((cod>>6)&0x3f));
            166                        __ADD_MUTF8_B_Z__(0x80|(cod&0x3f));
            167                        continue;
            168                }

            169
            170                if ( 0x10ffff >= cod ) {
            171                        cod -= 0x10000;
            172                        __ADD_MUTF8_B_Z__(0xed);
            173                        __ADD_MUTF8_B_Z__(0xa0|((cod>>16)&0x0f));
            174                        __ADD_MUTF8_B_Z__(0x80|((cod>>10)&0x3f));
            175                        __ADD_MUTF8_B_Z__(0xed);
            176                        __ADD_MUTF8_B_Z__(0xb0|((cod>>6)&0x0f));
            177                        __ADD_MUTF8_B_Z__(0x80|(cod&0x3f));
            178                        continue;
            179                }

            180
            181                return (-1);
            182        }

            183
            184        if ( NULL == str_mutf8 ) {
            185        }

            186        else if ( len8 < str_mutf8_limit ) {
            187                str_mutf8[ len8 ] = 0;
            188        }

            189        else {
            190                str_mutf8[ str_mutf8_limit-1 ] = 0;
            191        }

            192
            193        return len8;
            194#undef __ADD_MUTF8_B_Z__
            195}
            196
            197

            posted on 2014-04-13 19:42 coreBugZJ 閱讀(974) 評論(0)  編輯 收藏 引用 所屬分類: 技術視野

            狠狠色丁香婷婷综合久久来| 国产精品美女久久久免费| 偷窥少妇久久久久久久久| 99国产精品久久久久久久成人热| 久久亚洲AV成人无码国产| 99久久99久久精品国产| 欧洲成人午夜精品无码区久久| 久久精品国产久精国产思思 | 色综合久久中文字幕无码| 91精品久久久久久无码| 久久大香萑太香蕉av| 精品久久久久久国产| 国产成年无码久久久免费| 亚洲精品国精品久久99热| 99国产精品久久| 99久久精品国产一区二区| 国内精品伊人久久久久网站| 51久久夜色精品国产| 91精品国产91久久久久福利| 色99久久久久高潮综合影院| 久久久精品视频免费观看| 久久精品国产亚洲av麻豆小说 | 欧美一区二区三区久久综合| 久久久久久久女国产乱让韩| 久久免费小视频| 91精品国产91久久久久久| 久久精品国产亚洲av水果派| 2021国内精品久久久久久影院| 青青久久精品国产免费看| 久久国产精品-国产精品| 久久精品亚洲精品国产色婷| 国产亚洲美女精品久久久2020| 亚洲精品乱码久久久久久蜜桃| 婷婷久久五月天| 亚洲中文字幕无码一久久区| 久久精品中文字幕一区| 精品久久久久一区二区三区| 狠狠精品干练久久久无码中文字幕| 久久久久国产一级毛片高清板| 日韩欧美亚洲综合久久影院d3| 久久99精品久久久久久齐齐|