• <ins id="pjuwb"></ins>
    <blockquote id="pjuwb"><pre id="pjuwb"></pre></blockquote>
    <noscript id="pjuwb"></noscript>
          <sup id="pjuwb"><pre id="pjuwb"></pre></sup>
            <dd id="pjuwb"></dd>
            <abbr id="pjuwb"></abbr>

              C++博客 :: 首頁 :: 聯(lián)系 ::  :: 管理
              163 Posts :: 4 Stories :: 350 Comments :: 0 Trackbacks

            常用鏈接

            留言簿(48)

            我參與的團(tuán)隊

            搜索

            •  

            積分與排名

            • 積分 - 398977
            • 排名 - 59

            最新評論

            閱讀排行榜

            評論排行榜

            很久以來,我也想總結(jié)一下c++字符串相互轉(zhuǎn)換問題,還沒等我總結(jié),高手已經(jīng)推出了,這個我珍藏了!

            聲明:轉(zhuǎn)自CPPBLOG夢在天涯

            一 C++ 中 string與wstring互轉(zhuǎn)
            方法一:

            string WideToMutilByte(const wstring& _src)
            {
            int nBufSize = WideCharToMultiByte(GetACP(), 0, _src.c_str(),-1, NULL, 0, 0, FALSE);

            char *szBuf = new char[nBufSize];

            WideCharToMultiByte(GetACP(), 0, _src.c_str(),-1, szBuf, nBufSize, 0, FALSE);

            string strRet(szBuf);

            delete []szBuf;
            szBuf = NULL;

            return strRet;
            }

            wstring MutilByteToWide(const string& _src)
            {
            //計算字符串 string 轉(zhuǎn)成 wchar_t 之后占用的內(nèi)存字節(jié)數(shù)
            int nBufSize = MultiByteToWideChar(GetACP(),0,_src.c_str(),-1,NULL,0);

            //為 wsbuf 分配內(nèi)存 BufSize 個字節(jié)
            wchar_t *wsBuf = new wchar_t[nBufSize];

            //轉(zhuǎn)化為 unicode 的 WideString
            MultiByteToWideChar(GetACP(),0,_src.c_str(),-1,wsBuf,nBufSize);

            wstring wstrRet(wsBuf);

            delete []wsBuf;
            wsBuf = NULL;

            return wstrRet;
            }

            轉(zhuǎn)載:csdn
            這篇文章里,我將給出幾種C++ std::string和std::wstring相互轉(zhuǎn)換的轉(zhuǎn)換方法。
            第一種方法:調(diào)用WideCharToMultiByte()和MultiByteToWideChar(),代碼如下(關(guān)于詳細(xì)的解釋,可以參考《windows核心編程》):

            #include <string>
            #include <windows.h>
            using namespace std;
            //Converting a WChar string to a Ansi string
            std::string WChar2Ansi(LPCWSTR pwszSrc)
            {
            int nLen = WideCharToMultiByte(CP_ACP, 0, pwszSrc, -1, NULL, 0, NULL, NULL);

            if (nLen<= 0) return std::string("");

            char* pszDst = new char[nLen];
            if (NULL == pszDst) return std::string("");

                     WideCharToMultiByte(CP_ACP, 0, pwszSrc, -1, pszDst, nLen, NULL, NULL);
                     pszDst[nLen -1] = 0;

                     std::string strTemp(pszDst);
                     delete [] pszDst;

            return strTemp;
            }


            string ws2s(wstring& inputws)
            {
            return WChar2Ansi(inputws.c_str());
            }

            //Converting a Ansi string to WChar string 

            std::wstring Ansi2WChar(LPCSTR pszSrc, int nLen)

            {
            int nSize = MultiByteToWideChar(CP_ACP, 0, (LPCSTR)pszSrc, nLen, 0, 0);
            if(nSize <= 0) return NULL;

                     WCHAR *pwszDst = new WCHAR[nSize+1];
            if( NULL == pwszDst) return NULL;

                MultiByteToWideChar(CP_ACP, 0,(LPCSTR)pszSrc, nLen, pwszDst, nSize);
                pwszDst[nSize] = 0;

            if( pwszDst[0] == 0xFEFF)                    // skip Oxfeff
            for(int i = 0; i < nSize; i ++)
                                        pwszDst[i] = pwszDst[i+1];

                wstring wcharString(pwszDst);
                     delete pwszDst;

            return wcharString;
            }


            std::wstring s2ws(const string& s)
            {
            return Ansi2WChar(s.c_str(),s.size());
            }

            第二種方法:采用ATL封裝_bstr_t的過渡:(注,_bstr_是Microsoft Specific的,所以下面代碼可以在VS2005通過,無移植性);

            #include <string>
            #include <comutil.h>
            using namespace std;
            #pragma comment(lib, "comsuppw.lib")

            string ws2s(const wstring& ws);
            wstring s2ws(const string& s);

            string ws2s(const wstring& ws)
            {
                     _bstr_t t = ws.c_str();
            char* pchar = (char*)t;
            string result = pchar;
            return result;
            }


            wstring s2ws(const string& s)
            {
                     _bstr_t t = s.c_str();
                     wchar_t* pwchar = (wchar_t*)t;
                     wstring result = pwchar;
            return result;
            }

            第三種方法:使用CRT庫的mbstowcs()函數(shù)和wcstombs()函數(shù),平臺無關(guān),需設(shè)定locale。

            #include <string>
            #include <locale.h>
            using namespace std;
            string ws2s(const wstring& ws)
            {
            string curLocale = setlocale(LC_ALL, NULL);        // curLocale = "C";

                     setlocale(LC_ALL, "chs");

            const wchar_t* _Source = ws.c_str();
                     size_t _Dsize = 2 * ws.size() + 1;
            char *_Dest = new char[_Dsize];
                     memset(_Dest,0,_Dsize);
                     wcstombs(_Dest,_Source,_Dsize);
            string result = _Dest;
                     delete []_Dest;

                     setlocale(LC_ALL, curLocale.c_str());

            return result;
            }


            wstring s2ws(const string& s)
            {
                     setlocale(LC_ALL, "chs");

            const char* _Source = s.c_str();
                     size_t _Dsize = s.size() + 1;
                     wchar_t *_Dest = new wchar_t[_Dsize];
                     wmemset(_Dest, 0, _Dsize);
                     mbstowcs(_Dest,_Source,_Dsize);
                     wstring result = _Dest;
                     delete []_Dest;

                     setlocale(LC_ALL, "C");

            return result;
            }

            二 utf8.utf16.utf32的相互轉(zhuǎn)化
            可以參考Unicode.org 上有ConvertUTF.c和ConvertUTF.h (下載地址:http://www.unicode.org/Public/PROGRAMS/CVTUTF/
            實現(xiàn)文件ConvertUTF.c:(.h省)

            /**//*
            * Copyright 2001-2004 Unicode, Inc.
            *
            * Disclaimer
            *
            * This source code is provided as is by Unicode, Inc. No claims are
            * made as to fitness for any particular purpose. No warranties of any
            * kind are expressed or implied. The recipient agrees to determine
            * applicability of information provided. If this file has been
            * purchased on magnetic or optical media from Unicode, Inc., the
            * sole remedy for any claim will be exchange of defective media
            * within 90 days of receipt.
            *
            * Limitations on Rights to Redistribute This Code
            *
            * Unicode, Inc. hereby grants the right to freely use the information
            * supplied in this file in the creation of products supporting the
            * Unicode Standard, and to make copies of this file in any form
            * for internal or external distribution as long as this notice
            * remains attached.
            */

            /**//* ---------------------------------------------------------------------

                Conversions between UTF32, UTF-16, and UTF-8. Source code file.
                Author: Mark E. Davis, 1994.
                Rev History: Rick McGowan, fixes & updates May 2001.
                Sept 2001: fixed const & error conditions per
                mods suggested by S. Parent & A. Lillich.
                June 2002: Tim Dodd added detection and handling of incomplete
                source sequences, enhanced error detection, added casts
                to eliminate compiler warnings.
                July 2003: slight mods to back out aggressive FFFE detection.
                Jan 2004: updated switches in from-UTF8 conversions.
                Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.

                See the header file "ConvertUTF.h" for complete documentation.

            ------------------------------------------------------------------------ */


            #include "ConvertUTF.h"
            #ifdef CVTUTF_DEBUG
            #include <stdio.h>
            #endif

            static const int halfShift  = 10; /**//* used for shifting by 10 bits */

            static const UTF32 halfBase = 0x0010000UL;
            static const UTF32 halfMask = 0x3FFUL;

            #define UNI_SUR_HIGH_START  (UTF32)0xD800
            #define UNI_SUR_HIGH_END    (UTF32)0xDBFF
            #define UNI_SUR_LOW_START   (UTF32)0xDC00
            #define UNI_SUR_LOW_END     (UTF32)0xDFFF
            #define false       0
            #define true        1

            /**//* --------------------------------------------------------------------- */

            ConversionResult ConvertUTF32toUTF16 (
            const UTF32** sourceStart, const UTF32* sourceEnd,
                UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
                ConversionResult result = conversionOK;
            const UTF32* source = *sourceStart;
                UTF16* target = *targetStart;
            while (source < sourceEnd) {
                UTF32 ch;
            if (target >= targetEnd) {
                    result = targetExhausted; break;
                }
                ch = *source++;
            if (ch <= UNI_MAX_BMP) { /**//* Target is a character <= 0xFFFF */
            /**//* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
            if (flags == strictConversion) {
            --source; /**//* return to the illegal value itself */
                        result = sourceIllegal;
            break;
                    } else {
            *target++ = UNI_REPLACEMENT_CHAR;
                    }
                    } else {
            *target++ = (UTF16)ch; /**//* normal case */
                    }
                } else if (ch > UNI_MAX_LEGAL_UTF32) {
            if (flags == strictConversion) {
                    result = sourceIllegal;
                    } else {
            *target++ = UNI_REPLACEMENT_CHAR;
                    }
                } else {
            /**//* target is a character in range 0xFFFF - 0x10FFFF. */
            if (target + 1 >= targetEnd) {
            --source; /**//* Back up source pointer! */
                    result = targetExhausted; break;
                    }
                    ch -= halfBase;
            *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
            *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
                }
                }
            *sourceStart = source;
            *targetStart = target;
            return result;
            }

            /**//* --------------------------------------------------------------------- */

            ConversionResult ConvertUTF16toUTF32 (
            const UTF16** sourceStart, const UTF16* sourceEnd,
                UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
                ConversionResult result = conversionOK;
            const UTF16* source = *sourceStart;
                UTF32* target = *targetStart;
                UTF32 ch, ch2;
            while (source < sourceEnd) {
            const UTF16* oldSource = source; /**//*  In case we have to back up because of target overflow. */
                ch = *source++;
            /**//* If we have a surrogate pair, convert to UTF32 first. */
            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
            /**//* If the 16 bits following the high surrogate are in the source buffer */
            if (source < sourceEnd) {
                    ch2 = *source;
            /**//* If it's a low surrogate, convert to UTF32. */
            if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
                        ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
            + (ch2 - UNI_SUR_LOW_START) + halfBase;
            ++source;
                    } else if (flags == strictConversion) { /**//* it's an unpaired high surrogate */
            --source; /**//* return to the illegal value itself */
                        result = sourceIllegal;
            break;
                    }
                    } else { /**//* We don't have the 16 bits following the high surrogate. */
            --source; /**//* return to the high surrogate */
                    result = sourceExhausted;
            break;
                    }
                } else if (flags == strictConversion) {
            /**//* UTF-16 surrogate values are illegal in UTF-32 */
            if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
            --source; /**//* return to the illegal value itself */
                    result = sourceIllegal;
            break;
                    }
                }
            if (target >= targetEnd) {
                    source = oldSource; /**//* Back up source pointer! */
                    result = targetExhausted; break;
                }
            *target++ = ch;
                }
            *sourceStart = source;
            *targetStart = target;
            #ifdef CVTUTF_DEBUG
            if (result == sourceIllegal) {
                fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
                fflush(stderr);
            }
            #endif
            return result;
            }

            /**//* --------------------------------------------------------------------- */

            /**//*
            * Index into the table below with the first byte of a UTF-8 sequence to
            * get the number of trailing bytes that are supposed to follow it.
            * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
            * left as-is for anyone who may want to do such conversion, which was
            * allowed in earlier algorithms.
            */
            static const char trailingBytesForUTF8[256] = {
            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
            1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
            2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
            };

            /**//*
            * Magic values subtracted from a buffer value during UTF8 conversion.
            * This table contains as many values as there might be trailing bytes
            * in a UTF-8 sequence.
            */
            static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
            0x03C82080UL, 0xFA082080UL, 0x82082080UL };

            /**//*
            * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
            * into the first byte, depending on how many bytes follow.  There are
            * as many entries in this table as there are UTF-8 sequence types.
            * (I.e., one byte sequence, two byte etc.). Remember that sequencs
            * for *legal* UTF-8 will be 4 or fewer bytes total.
            */
            static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };

            /**//* --------------------------------------------------------------------- */

            /**//* The interface converts a whole buffer to avoid function-call overhead.
            * Constants have been gathered. Loops & conditionals have been removed as
            * much as possible for efficiency, in favor of drop-through switches.
            * (See "Note A" at the bottom of the file for equivalent code.)
            * If your compiler supports it, the "isLegalUTF8" call can be turned
            * into an inline function.
            */

            /**//* --------------------------------------------------------------------- */

            ConversionResult ConvertUTF16toUTF8 (
            const UTF16** sourceStart, const UTF16* sourceEnd,
                UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
                ConversionResult result = conversionOK;
            const UTF16* source = *sourceStart;
                UTF8* target = *targetStart;
            while (source < sourceEnd) {
                UTF32 ch;
                unsigned short bytesToWrite = 0;
            const UTF32 byteMask = 0xBF;
            const UTF32 byteMark = 0x80;
            const UTF16* oldSource = source; /**//* In case we have to back up because of target overflow. */
                ch = *source++;
            /**//* If we have a surrogate pair, convert to UTF32 first. */
            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
            /**//* If the 16 bits following the high surrogate are in the source buffer */
            if (source < sourceEnd) {
                    UTF32 ch2 = *source;
            /**//* If it's a low surrogate, convert to UTF32. */
            if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
                        ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
            + (ch2 - UNI_SUR_LOW_START) + halfBase;
            ++source;
                    } else if (flags == strictConversion) { /**//* it's an unpaired high surrogate */
            --source; /**//* return to the illegal value itself */
                        result = sourceIllegal;
            break;
                    }
                    } else { /**//* We don't have the 16 bits following the high surrogate. */
            --source; /**//* return to the high surrogate */
                    result = sourceExhausted;
            break;
                    }
                } else if (flags == strictConversion) {
            /**//* UTF-16 surrogate values are illegal in UTF-32 */
            if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
            --source; /**//* return to the illegal value itself */
                    result = sourceIllegal;
            break;
                    }
                }
            /**//* Figure out how many bytes the result will require */
            if (ch < (UTF32)0x80) {         bytesToWrite = 1;
                } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
                } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
                } else if (ch < (UTF32)0x110000) {  bytesToWrite = 4;
                } else {                bytesToWrite = 3;
                                    ch = UNI_REPLACEMENT_CHAR;
                }

                target += bytesToWrite;
            if (target > targetEnd) {
                    source = oldSource; /**//* Back up source pointer! */
                    target -= bytesToWrite; result = targetExhausted; break;
                }
            switch (bytesToWrite) { /**//* note: everything falls through. */
            case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
            case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
            case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
            case 1: *--target =  (UTF8)(ch | firstByteMark[bytesToWrite]);
                }
                target += bytesToWrite;
                }
            *sourceStart = source;
            *targetStart = target;
            return result;
            }

            /**//* --------------------------------------------------------------------- */

            /**//*
            * Utility routine to tell whether a sequence of bytes is legal UTF-8.
            * This must be called with the length pre-determined by the first byte.
            * If not calling this from ConvertUTF8to*, then the length can be set by:
            *  length = trailingBytesForUTF8[*source]+1;
            * and the sequence is illegal right away if there aren't that many bytes
            * available.
            * If presented with a length > 4, this returns false.  The Unicode
            * definition of UTF-8 goes up to 4-byte sequences.
            */

            static Boolean isLegalUTF8(const UTF8 *source, int length) {
                UTF8 a;
            const UTF8 *srcptr = source+length;
            switch (length) {
            default: return false;
            /**//* Everything else falls through when "true" */
            case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
            case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
            case 2: if ((a = (*--srcptr)) > 0xBF) return false;

            switch (*source) {
            /**//* no fall-through in this inner switch */
            case 0xE0: if (a < 0xA0) return false; break;
            case 0xED: if (a > 0x9F) return false; break;
            case 0xF0: if (a < 0x90) return false; break;
            case 0xF4: if (a > 0x8F) return false; break;
            default:   if (a < 0x80) return false;
                }

            case 1: if (*source >= 0x80 && *source < 0xC2) return false;
                }
            if (*source > 0xF4) return false;
            return true;
            }

            /**//* --------------------------------------------------------------------- */

            /**//*
            * Exported function to return whether a UTF-8 sequence is legal or not.
            * This is not used here; it's just exported.
            */
            Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
            int length = trailingBytesForUTF8[*source]+1;
            if (source+length > sourceEnd) {
            return false;
                }
            return isLegalUTF8(source, length);
            }

            /**//* --------------------------------------------------------------------- */

            ConversionResult ConvertUTF8toUTF16 (
            const UTF8** sourceStart, const UTF8* sourceEnd,
                UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
                ConversionResult result = conversionOK;
            const UTF8* source = *sourceStart;
                UTF16* target = *targetStart;
            while (source < sourceEnd) {
                UTF32 ch = 0;
                unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
            if (source + extraBytesToRead >= sourceEnd) {
                    result = sourceExhausted; break;
                }
            /**//* Do this check whether lenient or strict */
            if (! isLegalUTF8(source, extraBytesToRead+1)) {
                    result = sourceIllegal;
            break;
                }
            /**//*
                 * The cases all fall through. See "Note A" below.
            */
            switch (extraBytesToRead) {
            case 5: ch += *source++; ch <<= 6; /**//* remember, illegal UTF-8 */
            case 4: ch += *source++; ch <<= 6; /**//* remember, illegal UTF-8 */
            case 3: ch += *source++; ch <<= 6;
            case 2: ch += *source++; ch <<= 6;
            case 1: ch += *source++; ch <<= 6;
            case 0: ch += *source++;
                }
                ch -= offsetsFromUTF8[extraBytesToRead];

            if (target >= targetEnd) {
                    source -= (extraBytesToRead+1); /**//* Back up source pointer! */
                    result = targetExhausted; break;
                }
            if (ch <= UNI_MAX_BMP) { /**//* Target is a character <= 0xFFFF */
            /**//* UTF-16 surrogate values are illegal in UTF-32 */
            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
            if (flags == strictConversion) {
                        source -= (extraBytesToRead+1); /**//* return to the illegal value itself */
                        result = sourceIllegal;
            break;
                    } else {
            *target++ = UNI_REPLACEMENT_CHAR;
                    }
                    } else {
            *target++ = (UTF16)ch; /**//* normal case */
                    }
                } else if (ch > UNI_MAX_UTF16) {
            if (flags == strictConversion) {
                    result = sourceIllegal;
                    source -= (extraBytesToRead+1); /**//* return to the start */
            break; /**//* Bail out; shouldn't continue */
                    } else {
            *target++ = UNI_REPLACEMENT_CHAR;
                    }
                } else {
            /**//* target is a character in range 0xFFFF - 0x10FFFF. */
            if (target + 1 >= targetEnd) {
                    source -= (extraBytesToRead+1); /**//* Back up source pointer! */
                    result = targetExhausted; break;
                    }
                    ch -= halfBase;
            *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
            *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
                }
                }
            *sourceStart = source;
            *targetStart = target;
            return result;
            }

            /**//* --------------------------------------------------------------------- */

            ConversionResult ConvertUTF32toUTF8 (
            const UTF32** sourceStart, const UTF32* sourceEnd,
                UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
                ConversionResult result = conversionOK;
            const UTF32* source = *sourceStart;
                UTF8* target = *targetStart;
            while (source < sourceEnd) {
                UTF32 ch;
                unsigned short bytesToWrite = 0;
            const UTF32 byteMask = 0xBF;
            const UTF32 byteMark = 0x80;
                ch = *source++;
            if (flags == strictConversion ) {
            /**//* UTF-16 surrogate values are illegal in UTF-32 */
            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
            --source; /**//* return to the illegal value itself */
                    result = sourceIllegal;
            break;
                    }
                }
            /**//*
                 * Figure out how many bytes the result will require. Turn any
                 * illegally large UTF32 things (> Plane 17) into replacement chars.
            */
            if (ch < (UTF32)0x80) {         bytesToWrite = 1;
                } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
                } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
                } else if (ch <= UNI_MAX_LEGAL_UTF32) {  bytesToWrite = 4;
                } else {                bytesToWrite = 3;
                                    ch = UNI_REPLACEMENT_CHAR;
                                    result = sourceIllegal;
                }

                target += bytesToWrite;
            if (target > targetEnd) {
            --source; /**//* Back up source pointer! */
                    target -= bytesToWrite; result = targetExhausted; break;
                }
            switch (bytesToWrite) { /**//* note: everything falls through. */
            case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
            case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
            case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
            case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
                }
                target += bytesToWrite;
                }
            *sourceStart = source;
            *targetStart = target;
            return result;
            }

            /**//* --------------------------------------------------------------------- */

            ConversionResult ConvertUTF8toUTF32 (
            const UTF8** sourceStart, const UTF8* sourceEnd,
                UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
                ConversionResult result = conversionOK;
            const UTF8* source = *sourceStart;
                UTF32* target = *targetStart;
            while (source < sourceEnd) {
                UTF32 ch = 0;
                unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
            if (source + extraBytesToRead >= sourceEnd) {
                    result = sourceExhausted; break;
                }
            /**//* Do this check whether lenient or strict */
            if (! isLegalUTF8(source, extraBytesToRead+1)) {
                    result = sourceIllegal;
            break;
                }
            /**//*
                 * The cases all fall through. See "Note A" below.
            */
            switch (extraBytesToRead) {
            case 5: ch += *source++; ch <<= 6;
            case 4: ch += *source++; ch <<= 6;
            case 3: ch += *source++; ch <<= 6;
            case 2: ch += *source++; ch <<= 6;
            case 1: ch += *source++; ch <<= 6;
            case 0: ch += *source++;
                }
                ch -= offsetsFromUTF8[extraBytesToRead];

            if (target >= targetEnd) {
                    source -= (extraBytesToRead+1); /**//* Back up the source pointer! */
                    result = targetExhausted; break;
                }
            if (ch <= UNI_MAX_LEGAL_UTF32) {
            /**//*
                     * UTF-16 surrogate values are illegal in UTF-32, and anything
                     * over Plane 17 (> 0x10FFFF) is illegal.
            */
            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
            if (flags == strictConversion) {
                        source -= (extraBytesToRead+1); /**//* return to the illegal value itself */
                        result = sourceIllegal;
            break;
                    } else {
            *target++ = UNI_REPLACEMENT_CHAR;
                    }
                    } else {
            *target++ = ch;
                    }
                } else { /**//* i.e., ch > UNI_MAX_LEGAL_UTF32 */
                    result = sourceIllegal;
            *target++ = UNI_REPLACEMENT_CHAR;
                }
                }
            *sourceStart = source;
            *targetStart = target;
            return result;
            }

            /**//* ---------------------------------------------------------------------

                Note A.
                The fall-through switches in UTF-8 reading code save a
                temp variable, some decrements & conditionals.  The switches
                are equivalent to the following loop:
                {
                    int tmpBytesToRead = extraBytesToRead+1;
                    do {
                    ch += *source++;
                    --tmpBytesToRead;
                    if (tmpBytesToRead) ch <<= 6;
                    } while (tmpBytesToRead > 0);
                }
                In UTF-8 writing code, the switches on "bytesToWrite" are
                similarly unrolled loops.

               --------------------------------------------------------------------- */

            三 C++ 的字符串與C#的轉(zhuǎn)化
            1)將system::String 轉(zhuǎn)化為C++的string:

            // convert_system_string.cpp
            // compile with: /clr
            #include <string>
            #include <iostream>
            using namespace std;
            using namespace System;

            void MarshalString ( String ^ s, string& os ) {
            using namespace Runtime::InteropServices;
            const char* chars =
                  (const char*)(Marshal::StringToHGlobalAnsi(s)).ToPointer();
               os = chars;
               Marshal::FreeHGlobal(IntPtr((void*)chars));
            }

            void MarshalString ( String ^ s, wstring& os ) {
            using namespace Runtime::InteropServices;
            const wchar_t* chars =
                  (const wchar_t*)(Marshal::StringToHGlobalUni(s)).ToPointer();
               os = chars;
               Marshal::FreeHGlobal(IntPtr((void*)chars));
            }

            int main() {
            string a = "test";
               wstring b = L"test2";
               String ^ c = gcnew String("abcd");

               cout << a << endl;
               MarshalString(c, a);
               c = "efgh";
               MarshalString(c, b);
               cout << a << endl;
               wcout << b << endl;
            }

            2)將System::String轉(zhuǎn)化為char*或w_char*

            // convert_string_to_wchar.cpp
            // compile with: /clr
            #include < stdio.h >
            #include < stdlib.h >
            #include < vcclr.h >

            using namespace System;

            int main() {
               String ^str = "Hello";

            // Pin memory so GC can't move it while native function is called
               pin_ptr<const wchar_t> wch = PtrToStringChars(str);
               printf_s("%S\n", wch);

            // Conversion to char* :
            // Can just convert wchar_t* to char* using one of the
            // conversion functions such as:
            // WideCharToMultiByte()
            // wcstombs_s()
            // etc
               size_t convertedChars = 0;
               size_t  sizeInBytes = ((str->Length + 1) * 2);
               errno_t err = 0;
            char *ch = (char *)malloc(sizeInBytes);

               err = wcstombs_s(&convertedChars,
                                ch, sizeInBytes,
                                wch, sizeInBytes);
            if (err != 0)
                  printf_s("wcstombs_s  failed!\n");

                printf_s("%s\n", ch);
            }

            posted on 2008-01-19 08:48 sdfasdf 閱讀(1890) 評論(2)  編輯 收藏 引用

            Feedback

            # re: C++中字符串的相互轉(zhuǎn)換總結(jié)(珍藏) 2012-03-26 16:35 re: C++中字符串的相互轉(zhuǎn)換總結(jié)(珍藏)
            re: C++中字符串的相互轉(zhuǎn)換總結(jié)(珍藏)  回復(fù)  更多評論
              

            # re: C++中字符串的相互轉(zhuǎn)換總結(jié)(珍藏)[未登錄] 2012-05-24 16:19 3
            00100  回復(fù)  更多評論
              


            只有注冊用戶登錄后才能發(fā)表評論。
            網(wǎng)站導(dǎo)航: 博客園   IT新聞   BlogJava   博問   Chat2DB   管理


            久久久久久精品免费看SSS| 久久久久亚洲精品无码网址| 久久青青草原精品影院| 无夜精品久久久久久| 久久精品国产精品青草app| 狠狠色狠狠色综合久久| 亚洲日韩欧美一区久久久久我| 久久久久久免费一区二区三区| 久久不射电影网| 久久人搡人人玩人妻精品首页| 久久这里只有精品首页| 婷婷久久精品国产| 亚洲国产精品无码久久| 久久午夜夜伦鲁鲁片免费无码影视| 亚洲国产精品一区二区久久hs| 久久免费精品视频| 一本大道久久东京热无码AV| 国产精品久久国产精品99盘 | 久久午夜夜伦鲁鲁片免费无码影视| 日日躁夜夜躁狠狠久久AV| 久久午夜无码鲁丝片秋霞| 久久久精品午夜免费不卡| 亚洲精品国产第一综合99久久| 无码任你躁久久久久久| 久久精品国产亚洲av日韩| 国产产无码乱码精品久久鸭| 国产成人综合久久综合| 久久久久久久精品妇女99| 久久综合狠狠综合久久激情 | 久久国产成人午夜AV影院| 欧美激情精品久久久久久| 97久久精品无码一区二区天美 | 91久久精一区二区三区大全| 久久亚洲视频| 国产成人无码精品久久久久免费| 国产精品成人无码久久久久久| 一本久久知道综合久久| 久久午夜综合久久| 国产亚洲精午夜久久久久久| 精品久久久久久成人AV| 麻豆一区二区99久久久久|