-
Notifications
You must be signed in to change notification settings - Fork 2
/
ChineseConvert.cpp
177 lines (143 loc) · 3.91 KB
/
ChineseConvert.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
// ChineseConvert.cpp: implementation of the CChineseConvert class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "ChineseConvert.h"
#include <windows.h>
#include <time.h> // for time
//debug and log
FILE* g_fp = NULL;
void Log2File(const char* format_str, ...)
{
if (g_fp)
{
time_t t = time(NULL);
struct tm *ptime = localtime(&t);
va_list p_list;
va_start(p_list, format_str);
fprintf(g_fp, "[%04d%02d%02d-%02d:%02d:%02d]",
(1900+ptime->tm_year), (1+ptime->tm_mon), ptime->tm_mday,
ptime->tm_hour, ptime->tm_min, ptime->tm_sec);
vfprintf(g_fp, format_str, p_list);
va_end(p_list);
fflush(g_fp);
}
}
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
CChineseConvert::CChineseConvert()
{
}
CChineseConvert::~CChineseConvert()
{
}
char CChineseConvert::Char2Int(char ch)
{
if(ch>='0' && ch<='9')return (char)(ch-'0');
if(ch>='a' && ch<='f')return (char)(ch-'a'+10);
if(ch>='A' && ch<='F')return (char)(ch-'A'+10);
return -1;
}
char CChineseConvert::Str2Bin(char *str)
{
char tempWord[2] = {0};
char chn;
tempWord[0] = Char2Int(str[0]); //make the B to 11 -- 00001011
tempWord[1] = Char2Int(str[1]); //make the 0 to 0 -- 00000000
chn = (tempWord[0] << 4) | tempWord[1]; //to change the BO to 10110000
return chn;
}
string CChineseConvert::UrlDecode(const string& str)
{
string output = "";
char tmp[2] = {0};
int i = 0;
int len = str.length();
while(i < len)
{
if(str[i]=='%')
{
tmp[0] = str[i+1];
tmp[1] = str[i+2];
output += Str2Bin(tmp);
i += 3;
}
else if(str[i]=='+')
{
output += ' ';
i++;
}
else
{
output += str[i];
i++;
}
}
return output;
}
char* CChineseConvert::UTF8ToGB2312(const char* pStrUTF8)
{
// 先转成宽字符
int nStrLen = MultiByteToWideChar(CP_UTF8, 0, pStrUTF8, -1, NULL, 0);
wchar_t* pWStr = new wchar_t[nStrLen + 1];
memset(pWStr, 0, nStrLen + 1);
MultiByteToWideChar(CP_UTF8, 0, pStrUTF8, -1, pWStr, nStrLen);
// 再转成GB2312
nStrLen = WideCharToMultiByte(CP_ACP, 0, pWStr, -1, NULL, 0, NULL, NULL);
char* pStr = new char[nStrLen + 1];
memset(pStr, 0, nStrLen + 1);
WideCharToMultiByte(CP_ACP, 0, pWStr, -1, pStr, nStrLen, NULL, NULL);
Log2File("[UTF8ToGB2312]%s -> %s\n", pWStr, pStr);
if(pWStr)
{
delete[] pWStr;
}
return pStr;
}
//输入url_Utf-8 ,输出 gb2312
string CChineseConvert::Url_Utf8ToGB2312(string& instr)
{
string input = UrlDecode(instr);
string output = UTF8ToGB2312(input.c_str());
Log2File("[Url_Utf8ToGB2312]%s -> %s -> %s\n", instr.c_str(), input.c_str(), output.c_str());
return output;
}
bool CChineseConvert::IsUTF8String(const char* pStr)
{
int i = 0;
int nBytes = 0;//UTF8可用1-6个字节编码,ASCII用一个字节
unsigned char chr = 0;
bool bAllAscii = true;//如果全部都是ASCII,说明不是UTF-8
int length = strlen(pStr);
while (i < length)
{
chr = *(pStr + i);
if ((chr & 0x80) != 0)
bAllAscii = false;
if (nBytes == 0)//计算字节数
{
if ((chr & 0x80) != 0)
{
while ((chr & 0x80) != 0)
{
chr <<= 1;
nBytes++;
}
if (nBytes < 2 || nBytes > 6)
return false;//第一个字节最少为110x xxxx
nBytes--;//减去自身占的一个字节
}
}
else//多字节除了第一个字节外剩下的字节
{
if ((chr & 0xc0) != 0x80)
return false;//剩下的字节都是10xx xxxx的形式
nBytes--;
}
++i;
}
if (bAllAscii)
return false;
return nBytes == 0;
}