• <ins id="pjuwb"></ins>
    <blockquote id="pjuwb"><pre id="pjuwb"></pre></blockquote>
    <noscript id="pjuwb"></noscript>
          <sup id="pjuwb"><pre id="pjuwb"></pre></sup>
            <dd id="pjuwb"></dd>
            <abbr id="pjuwb"></abbr>

            Kisser Leon

            這個kisser不太冷
            posts - 100, comments - 102, trackbacks - 0, articles - 0

            ConvertUTF.c

            Posted on 2007-03-08 16:02 kk 閱讀(3314) 評論(0)  編輯 收藏 引用 所屬分類: IT
            UTF8和UTF16和UTF32之間的相互轉(zhuǎn)化

            /* ================================================================ */
            /*
            File:?? ?ConvertUTF.C
            Author: Mark E. Davis
            Copyright (C) 1994 Taligent, Inc. All rights reserved.

            This code is copyrighted. Under the copyright laws, this code may not
            be copied, in whole or part, without prior written consent of Taligent.

            Taligent grants the right to use or reprint this code as long as this
            ENTIRE copyright notice is reproduced in the code or reproduction.
            The code is provided AS-IS, AND TALIGENT DISCLAIMS ALL WARRANTIES,
            EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO IMPLIED
            WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.? IN
            NO EVENT WILL TALIGENT BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING,
            WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS
            INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY
            LOSS) ARISING OUT OF THE USE OR INABILITY TO USE THIS CODE, EVEN
            IF TALIGENT HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
            BECAUSE SOME STATES DO NOT ALLOW THE EXCLUSION OR LIMITATION OF
            LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE
            LIMITATION MAY NOT APPLY TO YOU.

            RESTRICTED RIGHTS LEGEND: Use, duplication, or disclosure by the
            government is subject to restrictions as set forth in subparagraph
            (c)(l)(ii) of the Rights in Technical Data and Computer Software
            clause at DFARS 252.227-7013 and FAR 52.227-19.

            This code may be protected by one or more U.S. and International
            Patents.

            TRADEMARKS: Taligent and the Taligent Design Mark are registered
            trademarks of Taligent, Inc.
            */
            /* ================================================================ */

            #include "ConvertUTF.h"

            /* ================================================================ */

            const int halfShift?? ??? ??? ??? ?= 10;
            const UCS4 halfBase?? ??? ??? ??? ?= 0x0010000UL;
            const UCS4 halfMask?? ??? ??? ??? ?= 0x3FFUL;
            const UCS4 kSurrogateHighStart?? ?= 0xD800UL;
            const UCS4 kSurrogateHighEnd?? ?= 0xDBFFUL;
            const UCS4 kSurrogateLowStart?? ?= 0xDC00UL;
            const UCS4 kSurrogateLowEnd?? ??? ?= 0xDFFFUL;

            /* ================================================================ */

            ConversionResult?? ?ConvertUCS4toUTF16 (
            ?? ??? ?UCS4** sourceStart, const UCS4* sourceEnd,
            ?? ??? ?UTF16** targetStart, const UTF16* targetEnd) {
            ?? ?ConversionResult result = kUTFConversionOK;
            ?? ?register UCS4* source = *sourceStart;
            ?? ?register UTF16* target = *targetStart;
            ?? ?while (source < sourceEnd) {
            ?? ??? ?register UCS4 ch;
            ?? ??? ?if (target >= targetEnd) {
            ?? ??? ??? ?result = kUTFConversionTargetExhausted; break;
            ?? ??? ?};
            ?? ??? ?ch = *source++;
            ?? ??? ?if (ch <= kMaximumUCS2) {
            ?? ??? ??? ?*target++ = ch;
            ?? ??? ?} else if (ch > kMaximumUTF16) {
            ?? ??? ??? ?*target++ = kReplacementCharacter;
            ?? ??? ?} else {
            ?? ??? ??? ?if (target + 1 >= targetEnd) {
            ?? ??? ??? ??? ?result = kUTFConversionTargetExhausted; break;
            ?? ??? ??? ?};
            ?? ??? ??? ?ch -= halfBase;
            ?? ??? ??? ?*target++ = (ch >> halfShift) + kSurrogateHighStart;
            ?? ??? ??? ?*target++ = (ch & halfMask) + kSurrogateLowStart;
            ?? ??? ?};
            ?? ?};
            ?? ?*sourceStart = source;
            ?? ?*targetStart = target;
            ?? ?return result;
            };

            /* ================================================================ */

            ConversionResult?? ?ConvertUTF16toUCS4 (
            ?? ??? ?UTF16** sourceStart, UTF16* sourceEnd,
            ?? ??? ?UCS4** targetStart, const UCS4* targetEnd) {
            ?? ?ConversionResult result = kUTFConversionOK;
            ?? ?register UTF16* source = *sourceStart;
            ?? ?register UCS4* target = *targetStart;
            ?? ?while (source < sourceEnd) {
            ?? ??? ?register UCS4 ch;
            ?? ??? ?ch = *source++;
            ?? ??? ?if (ch >= kSurrogateHighStart && ch <= kSurrogateHighEnd && source < sourceEnd) {
            ?? ??? ??? ?register UCS4 ch2 = *source;
            ?? ??? ??? ?if (ch2 >= kSurrogateLowStart && ch2 <= kSurrogateLowEnd) {
            ?? ??? ??? ??? ?ch = ((ch - kSurrogateHighStart) << halfShift)
            ?? ??? ??? ??? ??? ?+ (ch2 - kSurrogateLowStart) + halfBase;
            ?? ??? ??? ??? ?++source;
            ?? ??? ??? ?};
            ?? ??? ?};
            ?? ??? ?if (target >= targetEnd) {
            ?? ??? ??? ?result = kUTFConversionTargetExhausted; break;
            ?? ??? ?};
            ?? ??? ?*target++ = ch;
            ?? ?};
            ?? ?*sourceStart = source;
            ?? ?*targetStart = target;
            ?? ?return result;
            };

            /* ================================================================ */

            UCS4 offsetsFromUTF8[6] =?? ?{0x00000000UL, 0x00003080UL, 0x000E2080UL,
            ?? ??? ??? ??? ??? ? ?? ? ?? ? 0x03C82080UL, 0xFA082080UL, 0x82082080UL};
            char bytesFromUTF8[256] = {
            ?? ?0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
            ?? ?0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
            ?? ?0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
            ?? ?0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
            ?? ?0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
            ?? ?0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
            ?? ?1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
            ?? ?2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5};

            UTF8 firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC};

            /* ================================================================ */
            /*?? ?This code is similar in effect to making successive calls on the
            mbtowc and wctomb routines in FSS-UTF. However, it is considerably
            different in code:
            * it is adapted to be consistent with UTF16,
            * the interface converts a whole buffer to avoid function-call overhead
            * constants have been gathered.
            * loops & conditionals have been removed as much as possible for
            efficiency, in favor of drop-through switch statements.
            */

            /* ================================================================ */
            ConversionResult?? ?ConvertUTF16toUTF8 (
            ?? ??? ?UTF16** sourceStart, const UTF16* sourceEnd,
            ?? ??? ?UTF8** targetStart, const UTF8* targetEnd)
            {
            ?? ?ConversionResult result = kUTFConversionOK;
            ?? ?register UTF16* source = *sourceStart;
            ?? ?register UTF8* target = *targetStart;
            ?? ?while (source < sourceEnd) {
            ?? ??? ?register UCS4 ch;
            ?? ??? ?register unsigned short bytesToWrite = 0;
            ?? ??? ?register const UCS4 byteMask = 0xBF;
            ?? ??? ?register const UCS4 byteMark = 0x80;
            ?? ??? ?ch = *source++;
            ?? ??? ?if (ch >= kSurrogateHighStart && ch <= kSurrogateHighEnd
            ?? ??? ??? ??? ?&& source < sourceEnd) {
            ?? ??? ??? ?register UCS4 ch2 = *source;
            ?? ??? ??? ?if (ch2 >= kSurrogateLowStart && ch2 <= kSurrogateLowEnd) {
            ?? ??? ??? ??? ?ch = ((ch - kSurrogateHighStart) << halfShift)
            ?? ??? ??? ??? ??? ?+ (ch2 - kSurrogateLowStart) + halfBase;
            ?? ??? ??? ??? ?++source;
            ?? ??? ??? ?};
            ?? ??? ?};
            ?? ??? ?if (ch < 0x80) {?? ??? ??? ??? ?bytesToWrite = 1;
            ?? ??? ?} else if (ch < 0x800) {?? ??? ?bytesToWrite = 2;
            ?? ??? ?} else if (ch < 0x10000) {?? ??? ?bytesToWrite = 3;
            ?? ??? ?} else if (ch < 0x200000) {?? ??? ?bytesToWrite = 4;
            ?? ??? ?} else if (ch < 0x4000000) {?? ?bytesToWrite = 5;
            ?? ??? ?} else if (ch <= kMaximumUCS4){?? ?bytesToWrite = 6;
            ?? ??? ?} else {?? ??? ??? ??? ??? ??? ?bytesToWrite = 2;
            ?? ??? ??? ??? ??? ??? ??? ??? ??? ??? ?ch = kReplacementCharacter;
            ?? ??? ?}; /* I wish there were a smart way to avoid this conditional */
            ?? ??? ?
            ?? ??? ?target += bytesToWrite;
            ?? ??? ?if (target > targetEnd) {
            ?? ??? ??? ?target -= bytesToWrite; result = kUTFConversionTargetExhausted; break;
            ?? ??? ?};
            ?? ??? ?switch (bytesToWrite) {?? ?/* note: code falls through cases! */
            ?? ??? ??? ?case 6:?? ?*--target = (ch | byteMark) & byteMask; ch >>= 6;
            ?? ??? ??? ?case 5:?? ?*--target = (ch | byteMark) & byteMask; ch >>= 6;
            ?? ??? ??? ?case 4:?? ?*--target = (ch | byteMark) & byteMask; ch >>= 6;
            ?? ??? ??? ?case 3:?? ?*--target = (ch | byteMark) & byteMask; ch >>= 6;
            ?? ??? ??? ?case 2:?? ?*--target = (ch | byteMark) & byteMask; ch >>= 6;
            ?? ??? ??? ?case 1:?? ?*--target =? ch | firstByteMark[bytesToWrite];
            ?? ??? ?};
            ?? ??? ?target += bytesToWrite;
            ?? ?};
            ?? ?*sourceStart = source;
            ?? ?*targetStart = target;
            ?? ?return result;
            };

            /* ================================================================ */

            ConversionResult?? ?ConvertUTF8toUTF16 (
            ?? ??? ?UTF8** sourceStart, const UTF8* sourceEnd,
            ?? ??? ?UTF16** targetStart, const UTF16* targetEnd)
            {
            ?? ?ConversionResult result = kUTFConversionOK;
            ?? ?register UTF8* source = *sourceStart;
            ?? ?register UTF16* target = *targetStart;
            ?? ?while (source < sourceEnd) {
            ?? ??? ?register UCS4 ch = 0;
            ?? ??? ?register unsigned short extraBytesToWrite = bytesFromUTF8[*source];
            ?? ??? ?if (source + extraBytesToWrite > sourceEnd) {
            ?? ??? ??? ?result = kUTFConversionSourceExhausted; break;
            ?? ??? ?};
            ?? ??? ?switch(extraBytesToWrite) {?? ?/* note: code falls through cases! */
            ?? ??? ??? ?case 5:?? ?ch += *source++; ch <<= 6;
            ?? ??? ??? ?case 4:?? ?ch += *source++; ch <<= 6;
            ?? ??? ??? ?case 3:?? ?ch += *source++; ch <<= 6;
            ?? ??? ??? ?case 2:?? ?ch += *source++; ch <<= 6;
            ?? ??? ??? ?case 1:?? ?ch += *source++; ch <<= 6;
            ?? ??? ??? ?case 0:?? ?ch += *source++;
            ?? ??? ?};
            ?? ??? ?ch -= offsetsFromUTF8[extraBytesToWrite];

            ?? ??? ?if (target >= targetEnd) {
            ?? ??? ??? ?result = kUTFConversionTargetExhausted; break;
            ?? ??? ?};
            ?? ??? ?if (ch <= kMaximumUCS2) {
            ?? ??? ??? ?*target++ = ch;
            ?? ??? ?} else if (ch > kMaximumUTF16) {
            ?? ??? ??? ?*target++ = kReplacementCharacter;
            ?? ??? ?} else {
            ?? ??? ??? ?if (target + 1 >= targetEnd) {
            ?? ??? ??? ??? ?result = kUTFConversionTargetExhausted; break;
            ?? ??? ??? ?};
            ?? ??? ??? ?ch -= halfBase;
            ?? ??? ??? ?*target++ = (ch >> halfShift) + kSurrogateHighStart;
            ?? ??? ??? ?*target++ = (ch & halfMask) + kSurrogateLowStart;
            ?? ??? ?};
            ?? ?};
            ?? ?*sourceStart = source;
            ?? ?*targetStart = target;
            ?? ?return result;
            };

            /* ================================================================ */
            ConversionResult?? ?ConvertUCS4toUTF8 (
            ?? ??? ?UCS4** sourceStart, const UCS4* sourceEnd,
            ?? ??? ?UTF8** targetStart, const UTF8* targetEnd)
            {
            ?? ?ConversionResult result = kUTFConversionOK;
            ?? ?register UCS4* source = *sourceStart;
            ?? ?register UTF8* target = *targetStart;
            ?? ?while (source < sourceEnd) {
            ?? ??? ?register UCS4 ch;
            ?? ??? ?register unsigned short bytesToWrite = 0;
            ?? ??? ?register const UCS4 byteMask = 0xBF;
            ?? ??? ?register const UCS4 byteMark = 0x80;
            ?? ??? ?ch = *source++;
            ?? ??? ?if (ch >= kSurrogateHighStart && ch <= kSurrogateHighEnd
            ?? ??? ??? ??? ?&& source < sourceEnd) {
            ?? ??? ??? ?register UCS4 ch2 = *source;
            ?? ??? ??? ?if (ch2 >= kSurrogateLowStart && ch2 <= kSurrogateLowEnd) {
            ?? ??? ??? ??? ?ch = ((ch - kSurrogateHighStart) << halfShift)
            ?? ??? ??? ??? ??? ?+ (ch2 - kSurrogateLowStart) + halfBase;
            ?? ??? ??? ??? ?++source;
            ?? ??? ??? ?};
            ?? ??? ?};
            ?? ??? ?if (ch < 0x80) {?? ??? ??? ??? ?bytesToWrite = 1;
            ?? ??? ?} else if (ch < 0x800) {?? ??? ?bytesToWrite = 2;
            ?? ??? ?} else if (ch < 0x10000) {?? ??? ?bytesToWrite = 3;
            ?? ??? ?} else if (ch < 0x200000) {?? ??? ?bytesToWrite = 4;
            ?? ??? ?} else if (ch < 0x4000000) {?? ?bytesToWrite = 5;
            ?? ??? ?} else if (ch <= kMaximumUCS4){?? ?bytesToWrite = 6;
            ?? ??? ?} else {?? ??? ??? ??? ??? ??? ?bytesToWrite = 2;
            ?? ??? ??? ??? ??? ??? ??? ??? ??? ??? ?ch = kReplacementCharacter;
            ?? ??? ?}; /* I wish there were a smart way to avoid this conditional */
            ?? ??? ?
            ?? ??? ?target += bytesToWrite;
            ?? ??? ?if (target > targetEnd) {
            ?? ??? ??? ?target -= bytesToWrite; result = kUTFConversionTargetExhausted; break;
            ?? ??? ?};
            ?? ??? ?switch (bytesToWrite) {?? ?/* note: code falls through cases! */
            ?? ??? ??? ?case 6:?? ?*--target = (ch | byteMark) & byteMask; ch >>= 6;
            ?? ??? ??? ?case 5:?? ?*--target = (ch | byteMark) & byteMask; ch >>= 6;
            ?? ??? ??? ?case 4:?? ?*--target = (ch | byteMark) & byteMask; ch >>= 6;
            ?? ??? ??? ?case 3:?? ?*--target = (ch | byteMark) & byteMask; ch >>= 6;
            ?? ??? ??? ?case 2:?? ?*--target = (ch | byteMark) & byteMask; ch >>= 6;
            ?? ??? ??? ?case 1:?? ?*--target =? ch | firstByteMark[bytesToWrite];
            ?? ??? ?};
            ?? ??? ?target += bytesToWrite;
            ?? ?};
            ?? ?*sourceStart = source;
            ?? ?*targetStart = target;
            ?? ?return result;
            };

            /* ================================================================ */

            ConversionResult?? ?ConvertUTF8toUCS4 (
            ?? ??? ?UTF8** sourceStart, const UTF8* sourceEnd,
            ?? ??? ?UCS4** targetStart, const UCS4* targetEnd)
            {
            ?? ?ConversionResult result = kUTFConversionOK;
            ?? ?register UTF8* source = *sourceStart;
            ?? ?register UCS4* target = *targetStart;
            ?? ?while (source < sourceEnd) {
            ?? ??? ?register UCS4 ch = 0;
            ?? ??? ?register unsigned short extraBytesToWrite = bytesFromUTF8[*source];
            ?? ??? ?if (source + extraBytesToWrite > sourceEnd) {
            ?? ??? ??? ?result = kUTFConversionSourceExhausted; break;
            ?? ??? ?};
            ?? ??? ?switch(extraBytesToWrite) {?? ?/* note: code falls through cases! */
            ?? ??? ??? ?case 5:?? ?ch += *source++; ch <<= 6;
            ?? ??? ??? ?case 4:?? ?ch += *source++; ch <<= 6;
            ?? ??? ??? ?case 3:?? ?ch += *source++; ch <<= 6;
            ?? ??? ??? ?case 2:?? ?ch += *source++; ch <<= 6;
            ?? ??? ??? ?case 1:?? ?ch += *source++; ch <<= 6;
            ?? ??? ??? ?case 0:?? ?ch += *source++;
            ?? ??? ?};
            ?? ??? ?ch -= offsetsFromUTF8[extraBytesToWrite];

            ?? ??? ?if (target >= targetEnd) {
            ?? ??? ??? ?result = kUTFConversionTargetExhausted; break;
            ?? ??? ?};
            ?? ??? ?if (ch <= kMaximumUCS2) {
            ?? ??? ??? ?*target++ = ch;
            ?? ??? ?} else if (ch > kMaximumUCS4) {
            ?? ??? ??? ?*target++ = kReplacementCharacter;
            ?? ??? ?} else {
            ?? ??? ??? ?if (target + 1 >= targetEnd) {
            ?? ??? ??? ??? ?result = kUTFConversionTargetExhausted; break;
            ?? ??? ??? ?};
            ?? ??? ??? ?ch -= halfBase;
            ?? ??? ??? ?*target++ = (ch >> halfShift) + kSurrogateHighStart;
            ?? ??? ??? ?*target++ = (ch & halfMask) + kSurrogateLowStart;
            ?? ??? ?};
            ?? ?};
            ?? ?*sourceStart = source;
            ?? ?*targetStart = target;
            ?? ?return result;
            };

            久久久噜噜噜久久中文字幕色伊伊 | 国产成年无码久久久免费| 午夜精品久久影院蜜桃| 日本亚洲色大成网站WWW久久| 99久久伊人精品综合观看| 四虎国产精品成人免费久久| 中文字幕无码av激情不卡久久| 久久婷婷人人澡人人爽人人爱| 久久精品国产亚洲AV大全| 久久av免费天堂小草播放| 97久久国产综合精品女不卡| 精品久久人妻av中文字幕| 久久国产精品免费| 日韩久久久久久中文人妻| 久久综合九色综合久99| 2021国产精品久久精品| 亚洲国产二区三区久久| 久久精品国产色蜜蜜麻豆| 蜜桃麻豆www久久| 国产aⅴ激情无码久久| 国产精品美女久久久网AV| 久久天天躁狠狠躁夜夜躁2014| 久久777国产线看观看精品| 尹人香蕉久久99天天拍| 久久九九久精品国产| 国产精品久久久久影视不卡| 久久婷婷色香五月综合激情| 国产亚洲色婷婷久久99精品91| 久久A级毛片免费观看| 久久狠狠爱亚洲综合影院| 久久人人爽人人爽人人片AV麻豆| 99久久超碰中文字幕伊人| 色婷婷综合久久久久中文一区二区| 天堂无码久久综合东京热| 国产—久久香蕉国产线看观看| 成人久久久观看免费毛片| 久久精品国产亚洲av日韩| 欧美亚洲色综久久精品国产| 久久精品国产亚洲AV蜜臀色欲| 亚洲午夜精品久久久久久浪潮| 欧美粉嫩小泬久久久久久久 |