1、简介
这几天涉及到一个小项目,Windows 和Linux通过TCP进行通信,但是他们之间的字符类型上是有区别的,所有Windows上的数据发送到Linux上后,如果不经过转换,会出现乱码的情况,Windows 的WCHAR类型实际上是wchar_t,但是它只占用2个字节(既UTF-16),而Linux上wchar_t则使用的是多字节(UTF-8),下面的函数提供了他们之间的相互转换!2、程序代码
#ifdef WINDOWS #include <windows.h> #include <stdio.h> #include <ctype.h> #else #include <wchar.h> #include <string.h> #endif#ifdef WIN32
#define USES_WCSUTF16_CONV #define WCS2UTF16(wcs) (wcs) #define UTF162WCS(utf16) (utf16) #else //WIN32 //wchar_t to char16_t unsigned short * _wcsu16_wcs2utf16(wchar_t *wcs, unsigned short * utf16, int len) { int wc; wchar_t *end = wcs + len; unsigned short * ret = utf16; while(wcs < end) { wc = *(wcs++); if (wc > 0xFFFF) { wc -= 0x00010000L; *utf16++ = 0xD800 | (wc >> 10); *utf16++ = 0xDC00 | (wc & 0x03FF); } else { *utf16++ = wc; } } return ret; }//wchar_t length
int _wcsu16_utf16len(wchar_t *wcs) { int wc; int ret = 0; while(wc = *wcs++) ret += wc > 0xFFFF ? 2 : 1; return ret; }//char16_t length
int _wcsu16_wcslen(unsigned short * utf16) { unsigned short * start = utf16; while(*utf16++) ; return (int)(utf16 - start - 1); }//char16_t to wchar_t
wchar_t * _wcsu16_utf162wcs(unsigned short * utf16, wchar_t *wcs, int len) { if (sizeof(unsigned short) == sizeof(wchar_t)) { memcpy(wcs, utf16, (len + 1) * sizeof(wchar_t)); return wcs; } else { int u32; unsigned short *end = utf16 + len; wchar_t * ret = wcs; int u32low = 0; while (utf16 < end) { u32 = *utf16++; if (u32>=0xD800 && u32<0xDC00) { u32low = *utf16++; u32 &= 0x3FF; u32low &= 0x3FF; u32 <<= 10; u32 += u32low; u32 += 0x10000; } *wcs++ = u32; } *(wcs-1) = 0; return ret; } } #define USES_WCSUTF16_CONV int _len; (_len); wchar_t * _wcs; (_wcs); unsigned short * _utf16; (_utf16)#define WCS2UTF16(wcs) \
(((_wcs = wcs) == NULL) ? NULL : (\ _len = (_wcsu16_utf16len(_wcs)+1),\ _wcsu16_wcs2utf16(_wcs, (unsigned short *) alloca(_len*2), _len)))#define UTF162WCS(utf16) \
(((_utf16 = utf16) == NULL) ? NULL : (\ _len = (_wcsu16_wcslen(_utf16)+1),\ _wcsu16_utf162wcs(_utf16, (wchar_t *) alloca(_len*sizeof(wchar_t)), _len))) #endif //WIN32 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 3、附加说明 Linux下表示Windows下的16位的wchar_t类型可使用unsigned short来表示,也可直接使用C++中的char16_t类型 --------------------- 作者:CMbug 来源:CSDN 原文:https://blog.csdn.net/CMbug/article/details/45719209 版权声明:本文为博主原创文章,转载请附上博文链接!