• <ins id="pjuwb"></ins>
    <blockquote id="pjuwb"><pre id="pjuwb"></pre></blockquote>
    <noscript id="pjuwb"></noscript>
          <sup id="pjuwb"><pre id="pjuwb"></pre></sup>
            <dd id="pjuwb"></dd>
            <abbr id="pjuwb"></abbr>
            posts - 76,  comments - 621,  trackbacks - 0

            一 C++ 中 string與wstring互轉

            方法一:

            string WideToMutilByte(const wstring& _src)
            {
            int nBufSize = WideCharToMultiByte(GetACP(), 0, _src.c_str(),-1, NULL, 0, 0, FALSE);

            char *szBuf = new char[nBufSize];

            WideCharToMultiByte(GetACP(), 0, _src.c_str(),-1, szBuf, nBufSize, 0, FALSE);

            string strRet(szBuf);

            delete []szBuf;
            szBuf = NULL;

            return strRet;
            }

            wstring MutilByteToWide(const string& _src)
            {
            //計算字符串 string 轉成 wchar_t 之后占用的內存字節數
            int nBufSize = MultiByteToWideChar(GetACP(),0,_src.c_str(),-1,NULL,0);

            //為 wsbuf 分配內存 BufSize 個字節
            wchar_t *wsBuf = new wchar_t[nBufSize];

            //轉化為 unicode 的 WideString
            MultiByteToWideChar(GetACP(),0,_src.c_str(),-1,wsBuf,nBufSize);

            wstring wstrRet(wsBuf);

            delete []wsBuf;
            wsBuf = NULL;

            return wstrRet;
            }

             


            轉載:csdn

            這篇文章里,我將給出幾種C++ std::string和std::wstring相互轉換的轉換方法。
             
            第一種方法:調用WideCharToMultiByte()和MultiByteToWideChar(),代碼如下(關于詳細的解釋,可以參考《windows核心編程》):
             

            #include <string>
            #include <windows.h>
            using namespace std;
            //Converting a WChar string to a Ansi string
            std::string WChar2Ansi(LPCWSTR pwszSrc)
            {
                     int nLen = WideCharToMultiByte(CP_ACP, 0, pwszSrc, -1, NULL, 0, NULL, NULL);
             
                     if (nLen<= 0) return std::string("");
             
                     char* pszDst = new char[nLen];
                     if (NULL == pszDst) return std::string("");
             
                     WideCharToMultiByte(CP_ACP, 0, pwszSrc, -1, pszDst, nLen, NULL, NULL);
                     pszDst[nLen -1] = 0;
             
                     std::string strTemp(pszDst);
                     delete [] pszDst;
             
                     return strTemp;
            }

             
            string ws2s(wstring& inputws)
            {
                    return WChar2Ansi(inputws.c_str());
            }

             

             
            //Converting a Ansi string to WChar string


            std::wstring Ansi2WChar(LPCSTR pszSrc, int nLen)
             
            {
                int nSize = MultiByteToWideChar(CP_ACP, 0, (LPCSTR)pszSrc, nLen, 0, 0);
                if(nSize <= 0) return NULL;
             
                     WCHAR *pwszDst = new WCHAR[nSize+1];
                if( NULL == pwszDst) return NULL;
             
                MultiByteToWideChar(CP_ACP, 0,(LPCSTR)pszSrc, nLen, pwszDst, nSize);
                pwszDst[nSize] = 0;
             
                if( pwszDst[0] == 0xFEFF)                    // skip Oxfeff
                    for(int i = 0; i < nSize; i ++)
                                        pwszDst[i] = pwszDst[i+1];
             
                wstring wcharString(pwszDst);
                     delete pwszDst;
             
                return wcharString;
            }

             
            std::wstring s2ws(const string& s)
            {
                 return Ansi2WChar(s.c_str(),s.size());
            }


             
             
            第二種方法:采用ATL封裝_bstr_t的過渡:(注,_bstr_是Microsoft Specific的,所以下面代碼可以在VS2005通過,無移植性);


            #include <string>
            #include <comutil.h>
            using namespace std;
            #pragma comment(lib, "comsuppw.lib")
             
            string ws2s(const wstring& ws);
            wstring s2ws(const string& s);
             
            string ws2s(const wstring& ws)
            {
                     _bstr_t t = ws.c_str();
                     char* pchar = (char*)t;
                     string result = pchar;
                     return result;
            }

             
            wstring s2ws(const string& s)
            {
                     _bstr_t t = s.c_str();
                     wchar_t* pwchar = (wchar_t*)t;
                     wstring result = pwchar;
                     return result;
            }


             
            第三種方法:使用CRT庫的mbstowcs()函數和wcstombs()函數,平臺無關,需設定locale。


            #include <string>
            #include <locale.h>
            using namespace std;
            string ws2s(const wstring& ws)
            {
                     string curLocale = setlocale(LC_ALL, NULL);        // curLocale = "C";
             
                     setlocale(LC_ALL, "chs");
             
                     const wchar_t* _Source = ws.c_str();
                     size_t _Dsize = 2 * ws.size() + 1;
                     char *_Dest = new char[_Dsize];
                     memset(_Dest,0,_Dsize);
                     wcstombs(_Dest,_Source,_Dsize);
                     string result = _Dest;
                     delete []_Dest;
             
                     setlocale(LC_ALL, curLocale.c_str());
             
                     return result;
            }

             
            wstring s2ws(const string& s)
            {
                     setlocale(LC_ALL, "chs");
             
                     const char* _Source = s.c_str();
                     size_t _Dsize = s.size() + 1;
                     wchar_t *_Dest = new wchar_t[_Dsize];
                     wmemset(_Dest, 0, _Dsize);
                     mbstowcs(_Dest,_Source,_Dsize);
                     wstring result = _Dest;
                     delete []_Dest;
             
                     setlocale(LC_ALL, "C");
             
                     return result;
            }


            二 utf8.utf16.utf32的相互轉化

            可以參考Unicode.org 上有ConvertUTF.c和ConvertUTF.h (下載地址:http://www.unicode.org/Public/PROGRAMS/CVTUTF/

            實現文件ConvertUTF.c:(.h省)
            /**//*
             * Copyright 2001-2004 Unicode, Inc.
             *
             * Disclaimer
             *
             * This source code is provided as is by Unicode, Inc. No claims are
             * made as to fitness for any particular purpose. No warranties of any
             * kind are expressed or implied. The recipient agrees to determine
             * applicability of information provided. If this file has been
             * purchased on magnetic or optical media from Unicode, Inc., the
             * sole remedy for any claim will be exchange of defective media
             * within 90 days of receipt.
             *
             * Limitations on Rights to Redistribute This Code
             *
             * Unicode, Inc. hereby grants the right to freely use the information
             * supplied in this file in the creation of products supporting the
             * Unicode Standard, and to make copies of this file in any form
             * for internal or external distribution as long as this notice
             * remains attached.
             */

            /**//* ---------------------------------------------------------------------

                Conversions between UTF32, UTF-16, and UTF-8. Source code file.
                Author: Mark E. Davis, 1994.
                Rev History: Rick McGowan, fixes & updates May 2001.
                Sept 2001: fixed const & error conditions per
                mods suggested by S. Parent & A. Lillich.
                June 2002: Tim Dodd added detection and handling of incomplete
                source sequences, enhanced error detection, added casts
                to eliminate compiler warnings.
                July 2003: slight mods to back out aggressive FFFE detection.
                Jan 2004: updated switches in from-UTF8 conversions.
                Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.

                See the header file "ConvertUTF.h" for complete documentation.

            ------------------------------------------------------------------------ */


            #include "ConvertUTF.h"
            #ifdef CVTUTF_DEBUG
            #include <stdio.h>
            #endif

            static const int halfShift  = 10; /**//* used for shifting by 10 bits */

            static const UTF32 halfBase = 0x0010000UL;
            static const UTF32 halfMask = 0x3FFUL;

            #define UNI_SUR_HIGH_START  (UTF32)0xD800
            #define UNI_SUR_HIGH_END    (UTF32)0xDBFF
            #define UNI_SUR_LOW_START   (UTF32)0xDC00
            #define UNI_SUR_LOW_END     (UTF32)0xDFFF
            #define false       0
            #define true        1

            /**//* --------------------------------------------------------------------- */

            ConversionResult ConvertUTF32toUTF16 (
                const UTF32** sourceStart, const UTF32* sourceEnd,
                UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
                ConversionResult result = conversionOK;
                const UTF32* source = *sourceStart;
                UTF16* target = *targetStart;
                while (source < sourceEnd) {
                UTF32 ch;
                if (target >= targetEnd) {
                    result = targetExhausted; break;
                }
                ch = *source++;
                if (ch <= UNI_MAX_BMP) { /**//* Target is a character <= 0xFFFF */
                    /**//* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
                    if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
                    if (flags == strictConversion) {
                        --source; /**//* return to the illegal value itself */
                        result = sourceIllegal;
                        break;
                    } else {
                        *target++ = UNI_REPLACEMENT_CHAR;
                    }
                    } else {
                    *target++ = (UTF16)ch; /**//* normal case */
                    }
                } else if (ch > UNI_MAX_LEGAL_UTF32) {
                    if (flags == strictConversion) {
                    result = sourceIllegal;
                    } else {
                    *target++ = UNI_REPLACEMENT_CHAR;
                    }
                } else {
                    /**//* target is a character in range 0xFFFF - 0x10FFFF. */
                    if (target + 1 >= targetEnd) {
                    --source; /**//* Back up source pointer! */
                    result = targetExhausted; break;
                    }
                    ch -= halfBase;
                    *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
                    *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
                }
                }
                *sourceStart = source;
                *targetStart = target;
                return result;
            }

            /**//* --------------------------------------------------------------------- */

            ConversionResult ConvertUTF16toUTF32 (
                const UTF16** sourceStart, const UTF16* sourceEnd,
                UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
                ConversionResult result = conversionOK;
                const UTF16* source = *sourceStart;
                UTF32* target = *targetStart;
                UTF32 ch, ch2;
                while (source < sourceEnd) {
                const UTF16* oldSource = source; /**//*  In case we have to back up because of target overflow. */
                ch = *source++;
                /**//* If we have a surrogate pair, convert to UTF32 first. */
                if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
                    /**//* If the 16 bits following the high surrogate are in the source buffer */
                    if (source < sourceEnd) {
                    ch2 = *source;
                    /**//* If it's a low surrogate, convert to UTF32. */
                    if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
                        ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
                        + (ch2 - UNI_SUR_LOW_START) + halfBase;
                        ++source;
                    } else if (flags == strictConversion) { /**//* it's an unpaired high surrogate */
                        --source; /**//* return to the illegal value itself */
                        result = sourceIllegal;
                        break;
                    }
                    } else { /**//* We don't have the 16 bits following the high surrogate. */
                    --source; /**//* return to the high surrogate */
                    result = sourceExhausted;
                    break;
                    }
                } else if (flags == strictConversion) {
                    /**//* UTF-16 surrogate values are illegal in UTF-32 */
                    if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
                    --source; /**//* return to the illegal value itself */
                    result = sourceIllegal;
                    break;
                    }
                }
                if (target >= targetEnd) {
                    source = oldSource; /**//* Back up source pointer! */
                    result = targetExhausted; break;
                }
                *target++ = ch;
                }
                *sourceStart = source;
                *targetStart = target;
            #ifdef CVTUTF_DEBUG
            if (result == sourceIllegal) {
                fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
                fflush(stderr);
            }
            #endif
                return result;
            }

            /**//* --------------------------------------------------------------------- */

            /**//*
             * Index into the table below with the first byte of a UTF-8 sequence to
             * get the number of trailing bytes that are supposed to follow it.
             * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
             * left as-is for anyone who may want to do such conversion, which was
             * allowed in earlier algorithms.
             */
            static const char trailingBytesForUTF8[256] = {
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
                2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
            };

            /**//*
             * Magic values subtracted from a buffer value during UTF8 conversion.
             * This table contains as many values as there might be trailing bytes
             * in a UTF-8 sequence.
             */
            static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
                         0x03C82080UL, 0xFA082080UL, 0x82082080UL };

            /**//*
             * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
             * into the first byte, depending on how many bytes follow.  There are
             * as many entries in this table as there are UTF-8 sequence types.
             * (I.e., one byte sequence, two byte etc.). Remember that sequencs
             * for *legal* UTF-8 will be 4 or fewer bytes total.
             */
            static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };

            /**//* --------------------------------------------------------------------- */

            /**//* The interface converts a whole buffer to avoid function-call overhead.
             * Constants have been gathered. Loops & conditionals have been removed as
             * much as possible for efficiency, in favor of drop-through switches.
             * (See "Note A" at the bottom of the file for equivalent code.)
             * If your compiler supports it, the "isLegalUTF8" call can be turned
             * into an inline function.
             */

            /**//* --------------------------------------------------------------------- */

            ConversionResult ConvertUTF16toUTF8 (
                const UTF16** sourceStart, const UTF16* sourceEnd,
                UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
                ConversionResult result = conversionOK;
                const UTF16* source = *sourceStart;
                UTF8* target = *targetStart;
                while (source < sourceEnd) {
                UTF32 ch;
                unsigned short bytesToWrite = 0;
                const UTF32 byteMask = 0xBF;
                const UTF32 byteMark = 0x80;
                const UTF16* oldSource = source; /**//* In case we have to back up because of target overflow. */
                ch = *source++;
                /**//* If we have a surrogate pair, convert to UTF32 first. */
                if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
                    /**//* If the 16 bits following the high surrogate are in the source buffer */
                    if (source < sourceEnd) {
                    UTF32 ch2 = *source;
                    /**//* If it's a low surrogate, convert to UTF32. */
                    if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
                        ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
                        + (ch2 - UNI_SUR_LOW_START) + halfBase;
                        ++source;
                    } else if (flags == strictConversion) { /**//* it's an unpaired high surrogate */
                        --source; /**//* return to the illegal value itself */
                        result = sourceIllegal;
                        break;
                    }
                    } else { /**//* We don't have the 16 bits following the high surrogate. */
                    --source; /**//* return to the high surrogate */
                    result = sourceExhausted;
                    break;
                    }
                } else if (flags == strictConversion) {
                    /**//* UTF-16 surrogate values are illegal in UTF-32 */
                    if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
                    --source; /**//* return to the illegal value itself */
                    result = sourceIllegal;
                    break;
                    }
                }
                /**//* Figure out how many bytes the result will require */
                if (ch < (UTF32)0x80) {         bytesToWrite = 1;
                } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
                } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
                } else if (ch < (UTF32)0x110000) {  bytesToWrite = 4;
                } else {                bytesToWrite = 3;
                                    ch = UNI_REPLACEMENT_CHAR;
                }

                target += bytesToWrite;
                if (target > targetEnd) {
                    source = oldSource; /**//* Back up source pointer! */
                    target -= bytesToWrite; result = targetExhausted; break;
                }
                switch (bytesToWrite) { /**//* note: everything falls through. */
                    case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
                    case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
                    case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
                    case 1: *--target =  (UTF8)(ch | firstByteMark[bytesToWrite]);
                }
                target += bytesToWrite;
                }
                *sourceStart = source;
                *targetStart = target;
                return result;
            }

            /**//* --------------------------------------------------------------------- */

            /**//*
             * Utility routine to tell whether a sequence of bytes is legal UTF-8.
             * This must be called with the length pre-determined by the first byte.
             * If not calling this from ConvertUTF8to*, then the length can be set by:
             *  length = trailingBytesForUTF8[*source]+1;
             * and the sequence is illegal right away if there aren't that many bytes
             * available.
             * If presented with a length > 4, this returns false.  The Unicode
             * definition of UTF-8 goes up to 4-byte sequences.
             */

            static Boolean isLegalUTF8(const UTF8 *source, int length) {
                UTF8 a;
                const UTF8 *srcptr = source+length;
                switch (length) {
                default: return false;
                /**//* Everything else falls through when "true" */
                case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
                case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
                case 2: if ((a = (*--srcptr)) > 0xBF) return false;

                switch (*source) {
                    /**//* no fall-through in this inner switch */
                    case 0xE0: if (a < 0xA0) return false; break;
                    case 0xED: if (a > 0x9F) return false; break;
                    case 0xF0: if (a < 0x90) return false; break;
                    case 0xF4: if (a > 0x8F) return false; break;
                    default:   if (a < 0x80) return false;
                }

                case 1: if (*source >= 0x80 && *source < 0xC2) return false;
                }
                if (*source > 0xF4) return false;
                return true;
            }

            /**//* --------------------------------------------------------------------- */

            /**//*
             * Exported function to return whether a UTF-8 sequence is legal or not.
             * This is not used here; it's just exported.
             */
            Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
                int length = trailingBytesForUTF8[*source]+1;
                if (source+length > sourceEnd) {
                return false;
                }
                return isLegalUTF8(source, length);
            }

            /**//* --------------------------------------------------------------------- */

            ConversionResult ConvertUTF8toUTF16 (
                const UTF8** sourceStart, const UTF8* sourceEnd,
                UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
                ConversionResult result = conversionOK;
                const UTF8* source = *sourceStart;
                UTF16* target = *targetStart;
                while (source < sourceEnd) {
                UTF32 ch = 0;
                unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
                if (source + extraBytesToRead >= sourceEnd) {
                    result = sourceExhausted; break;
                }
                /**//* Do this check whether lenient or strict */
                if (! isLegalUTF8(source, extraBytesToRead+1)) {
                    result = sourceIllegal;
                    break;
                }
                /**//*
                 * The cases all fall through. See "Note A" below.
                 */
                switch (extraBytesToRead) {
                    case 5: ch += *source++; ch <<= 6; /**//* remember, illegal UTF-8 */
                    case 4: ch += *source++; ch <<= 6; /**//* remember, illegal UTF-8 */
                    case 3: ch += *source++; ch <<= 6;
                    case 2: ch += *source++; ch <<= 6;
                    case 1: ch += *source++; ch <<= 6;
                    case 0: ch += *source++;
                }
                ch -= offsetsFromUTF8[extraBytesToRead];

                if (target >= targetEnd) {
                    source -= (extraBytesToRead+1); /**//* Back up source pointer! */
                    result = targetExhausted; break;
                }
                if (ch <= UNI_MAX_BMP) { /**//* Target is a character <= 0xFFFF */
                    /**//* UTF-16 surrogate values are illegal in UTF-32 */
                    if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
                    if (flags == strictConversion) {
                        source -= (extraBytesToRead+1); /**//* return to the illegal value itself */
                        result = sourceIllegal;
                        break;
                    } else {
                        *target++ = UNI_REPLACEMENT_CHAR;
                    }
                    } else {
                    *target++ = (UTF16)ch; /**//* normal case */
                    }
                } else if (ch > UNI_MAX_UTF16) {
                    if (flags == strictConversion) {
                    result = sourceIllegal;
                    source -= (extraBytesToRead+1); /**//* return to the start */
                    break; /**//* Bail out; shouldn't continue */
                    } else {
                    *target++ = UNI_REPLACEMENT_CHAR;
                    }
                } else {
                    /**//* target is a character in range 0xFFFF - 0x10FFFF. */
                    if (target + 1 >= targetEnd) {
                    source -= (extraBytesToRead+1); /**//* Back up source pointer! */
                    result = targetExhausted; break;
                    }
                    ch -= halfBase;
                    *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
                    *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
                }
                }
                *sourceStart = source;
                *targetStart = target;
                return result;
            }

            /**//* --------------------------------------------------------------------- */

            ConversionResult ConvertUTF32toUTF8 (
                const UTF32** sourceStart, const UTF32* sourceEnd,
                UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
                ConversionResult result = conversionOK;
                const UTF32* source = *sourceStart;
                UTF8* target = *targetStart;
                while (source < sourceEnd) {
                UTF32 ch;
                unsigned short bytesToWrite = 0;
                const UTF32 byteMask = 0xBF;
                const UTF32 byteMark = 0x80;
                ch = *source++;
                if (flags == strictConversion ) {
                    /**//* UTF-16 surrogate values are illegal in UTF-32 */
                    if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
                    --source; /**//* return to the illegal value itself */
                    result = sourceIllegal;
                    break;
                    }
                }
                /**//*
                 * Figure out how many bytes the result will require. Turn any
                 * illegally large UTF32 things (> Plane 17) into replacement chars.
                 */
                if (ch < (UTF32)0x80) {         bytesToWrite = 1;
                } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
                } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
                } else if (ch <= UNI_MAX_LEGAL_UTF32) {  bytesToWrite = 4;
                } else {                bytesToWrite = 3;
                                    ch = UNI_REPLACEMENT_CHAR;
                                    result = sourceIllegal;
                }
               
                target += bytesToWrite;
                if (target > targetEnd) {
                    --source; /**//* Back up source pointer! */
                    target -= bytesToWrite; result = targetExhausted; break;
                }
                switch (bytesToWrite) { /**//* note: everything falls through. */
                    case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
                    case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
                    case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
                    case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
                }
                target += bytesToWrite;
                }
                *sourceStart = source;
                *targetStart = target;
                return result;
            }

            /**//* --------------------------------------------------------------------- */

            ConversionResult ConvertUTF8toUTF32 (
                const UTF8** sourceStart, const UTF8* sourceEnd,
                UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
                ConversionResult result = conversionOK;
                const UTF8* source = *sourceStart;
                UTF32* target = *targetStart;
                while (source < sourceEnd) {
                UTF32 ch = 0;
                unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
                if (source + extraBytesToRead >= sourceEnd) {
                    result = sourceExhausted; break;
                }
                /**//* Do this check whether lenient or strict */
                if (! isLegalUTF8(source, extraBytesToRead+1)) {
                    result = sourceIllegal;
                    break;
                }
                /**//*
                 * The cases all fall through. See "Note A" below.
                 */
                switch (extraBytesToRead) {
                    case 5: ch += *source++; ch <<= 6;
                    case 4: ch += *source++; ch <<= 6;
                    case 3: ch += *source++; ch <<= 6;
                    case 2: ch += *source++; ch <<= 6;
                    case 1: ch += *source++; ch <<= 6;
                    case 0: ch += *source++;
                }
                ch -= offsetsFromUTF8[extraBytesToRead];

                if (target >= targetEnd) {
                    source -= (extraBytesToRead+1); /**//* Back up the source pointer! */
                    result = targetExhausted; break;
                }
                if (ch <= UNI_MAX_LEGAL_UTF32) {
                    /**//*
                     * UTF-16 surrogate values are illegal in UTF-32, and anything
                     * over Plane 17 (> 0x10FFFF) is illegal.
                     */
                    if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
                    if (flags == strictConversion) {
                        source -= (extraBytesToRead+1); /**//* return to the illegal value itself */
                        result = sourceIllegal;
                        break;
                    } else {
                        *target++ = UNI_REPLACEMENT_CHAR;
                    }
                    } else {
                    *target++ = ch;
                    }
                } else { /**//* i.e., ch > UNI_MAX_LEGAL_UTF32 */
                    result = sourceIllegal;
                    *target++ = UNI_REPLACEMENT_CHAR;
                }
                }
                *sourceStart = source;
                *targetStart = target;
                return result;
            }

            /**//* ---------------------------------------------------------------------

                Note A.
                The fall-through switches in UTF-8 reading code save a
                temp variable, some decrements & conditionals.  The switches
                are equivalent to the following loop:
                {
                    int tmpBytesToRead = extraBytesToRead+1;
                    do {
                    ch += *source++;
                    --tmpBytesToRead;
                    if (tmpBytesToRead) ch <<= 6;
                    } while (tmpBytesToRead > 0);
                }
                In UTF-8 writing code, the switches on "bytesToWrite" are
                similarly unrolled loops.

               --------------------------------------------------------------------- */

             

            三 C++ 的字符串與C#的轉化

            1)將system::String 轉化為C++的string:
            // convert_system_string.cpp
            // compile with: /clr
            #include <string>
            #include <iostream>
            using namespace std;
            using namespace System;

            void MarshalString ( String ^ s, string& os ) {
               using namespace Runtime::InteropServices;
               const char* chars =
                  (const char*)(Marshal::StringToHGlobalAnsi(s)).ToPointer();
               os = chars;
               Marshal::FreeHGlobal(IntPtr((void*)chars));
            }

            void MarshalString ( String ^ s, wstring& os ) {
               using namespace Runtime::InteropServices;
               const wchar_t* chars =
                  (const wchar_t*)(Marshal::StringToHGlobalUni(s)).ToPointer();
               os = chars;
               Marshal::FreeHGlobal(IntPtr((void*)chars));
            }

            int main() {
               string a = "test";
               wstring b = L"test2";
               String ^ c = gcnew String("abcd");

               cout << a << endl;
               MarshalString(c, a);
               c = "efgh";
               MarshalString(c, b);
               cout << a << endl;
               wcout << b << endl;
            }


            2)將System::String轉化為char*或w_char*
            // convert_string_to_wchar.cpp
            // compile with: /clr
            #include < stdio.h >
            #include < stdlib.h >
            #include < vcclr.h >

            using namespace System;

            int main() {
               String ^str = "Hello";

               // Pin memory so GC can't move it while native function is called
               pin_ptr<const wchar_t> wch = PtrToStringChars(str);
               printf_s("%S\n", wch);

               // Conversion to char* :
               // Can just convert wchar_t* to char* using one of the
               // conversion functions such as:
               // WideCharToMultiByte()
               // wcstombs_s()
               //  etc
               size_t convertedChars = 0;
               size_t  sizeInBytes = ((str->Length + 1) * 2);
               errno_t err = 0;
               char    *ch = (char *)malloc(sizeInBytes);

               err = wcstombs_s(&convertedChars,
                                ch, sizeInBytes,
                                wch, sizeInBytes);
               if (err != 0)
                  printf_s("wcstombs_s  failed!\n");

                printf_s("%s\n", ch);
            }


             

            posted on 2008-08-07 20:18 megax 閱讀(371) 評論(0)  編輯 收藏 引用 所屬分類: 轉貼收集
            久久久久国色AV免费观看| 久久久久99精品成人片欧美 | 亚洲va久久久久| 四虎国产精品成人免费久久| 午夜精品久久影院蜜桃| 狠狠色丁香婷婷久久综合| 国产精品乱码久久久久久软件| 久久综合给合久久狠狠狠97色69| 国产99久久久国产精免费| 亚洲国产天堂久久综合网站| 久久国产精品一区二区| 国内精品久久久久久久涩爱 | 久久精品二区| 久久免费的精品国产V∧| 狠狠色丁香婷综合久久| 精产国品久久一二三产区区别| 色婷婷久久综合中文久久蜜桃av| 2020最新久久久视精品爱| 青青热久久综合网伊人| 欧美va久久久噜噜噜久久| 91久久精品电影| 精品国产福利久久久| 一本久久a久久精品综合香蕉| 中文字幕亚洲综合久久2| 一本色道久久HEZYO无码| 一本久久a久久精品综合香蕉 | 久久国产成人精品麻豆| 久久久久久久久久久久久久| 成人久久久观看免费毛片| 国内精品人妻无码久久久影院| 国产精品99久久久久久宅男 | 蜜桃麻豆www久久| 国产成人精品久久| 人妻精品久久无码专区精东影业| 久久久久亚洲AV综合波多野结衣 | 国产 亚洲 欧美 另类 久久 | 伊人色综合久久天天网| 久久精品成人| 精品久久久久一区二区三区| 欧美日韩中文字幕久久伊人| 久久99国产综合精品免费|