?? unicodeutf8.c
字號:
#include <stdio.h>
#include <string.h>
/// UTF-8的unicode表示方法到unicode的值轉換函數
int utf82unicode(unsigned int byte[], int index, int count, char *out)
{
int i, len=0;
unsigned short unicode;
for (i=index; i < count; ++i)
{
if (index >= count) return len;
if ( (byte[index] & 0x80) == 0x0) // 一位
{
unicode = byte[index];
index=index+1;
}
else if ((byte[index] & 0xE0) == 0xC0) // 兩位
{
if (index + 1 >= count ) return len;
unicode = (((int)(byte[index] & 0x1F)) << 6)
| (byte[ index + 1] & 0x3F);
index=index+2;
}
else if ((byte[index] & 0xF0) == 0xE0) // 三位
{
if (index + 2 >= count) return len;
unicode = (((int)(byte[index] & 0x0F)) << 12)
| (((int)(byte[index + 1] & 0x3F)) << 6)
| (byte[index + 2] & 0x3F);
index=index+3;
}
else if ((byte[index] & 0xF8) == 0xF0) // 四位
{
if (index + 3 >= count) return len;
unicode = (((int)(byte[index] & 0x07)) << 18)
| (((int)(byte[index + 1] & 0x3F)) << 12)
| (((int)(byte[index + 2] & 0x3F)) << 6)
| (byte[index + 3] & 0x3F);
index=index+4;
}
else if ((byte[index] & 0xFC) == 0xF8) // 五位
{
if (index + 4 >= count) return len;
unicode = (((int)(byte[index] & 0x03)) << 24)
| (((int)(byte[index + 1] & 0x3F)) << 18)
| (((int)(byte[index + 2] & 0x3F)) << 12)
| (((int)(byte[index + 3] & 0x3F)) << 6)
| (byte[index + 4] & 0x3F);
index=index+5;
}
else if ((byte[index] & 0xFE) == 0xFC) // 六位
{
if (index + 5 >= count) return len;
unicode = (((int)(byte[index] & 0x01)) << 30)
| (((int)(byte[index + 1] & 0x3F)) << 24)
| (((int)(byte[index + 2] & 0x3F)) << 18)
| (((int)(byte[index + 3] & 0x3F)) << 12)
| (((int)(byte[index + 4] & 0x3F)) << 6)
| (byte[index + 5] & 0x3F);
index=index+6;
}
else
{
return len;
}
memcpy(&out[len] ,(char *)&unicode , 2 );
len=len+2;
}
return len;
}
int char2digist(char in, int *out)
{
if ('0' <= in && in <= '9')
*out = in - '0' + 0x0;
else if ('A' <= in && in <= 'F')
*out = in - 'A' + 0xA;
else if ('a' <= in && in <= 'f')
*out = in - 'a' + 0xa;
else
return 0;
return 1;
}
int widechar2hexbyte(char* ch, int index, int count, unsigned int *byte)
{
int h, l;
if (index + 1 < count) {
if (char2digist(ch[index], &h) && char2digist(ch[index + 1], &l))
{
*byte = ((unsigned int)(h << 4)) | l;
return 1;
}
} else {
if (char2digist(ch[index], &l))
{
*byte = l;
return 1;
}
}
return 0;
}
int utf8unicode(char *src , char *out)
{
int bi, i, len;
unsigned int bytes[200];
int j=0, outlen =0;
bi = 0, len = strlen(src);
for (i = 0; i < len && bi < 200; ++ i)
{
if (!widechar2hexbyte(src, i++, len, &bytes[bi++]))
return 1;
}
memset(out , 0 , sizeof(out));
outlen =utf82unicode(bytes, 0, bi, out);
return outlen;
}/*---------------------------------------------------*/
typedef unsigned short uchar2;
int Uni2UTF(uchar2 wchar, char *utf8)
{
if (utf8 == NULL) {
return -1;
}
int len = 0;
int size_d = 8;
if (wchar < 0x80)
{ //
//length = 1;
utf8[len++] = (char)wchar;
}
else if(wchar < 0x800)
{
//length = 2;
if (len + 1 >= size_d)
return -1;
utf8[len++] = 0xc0 | ( wchar >> 6 );
utf8[len++] = 0x80 | ( wchar & 0x3f );
}
else if(wchar < 0x10000 )
{
//length = 3;
if (len + 2 >= size_d)
return -1;
utf8[len++] = 0xe0 | ( wchar >> 12 );
utf8[len++] = 0x80 | ( (wchar >> 6) & 0x3f );
utf8[len++] = 0x80 | ( wchar & 0x3f );
}
else if( wchar < 0x200000 )
{
//length = 4;
if (len + 3 >= size_d)
return -1;
utf8[len++] = 0xf0 | ( (int)wchar >> 18 );
utf8[len++] = 0x80 | ( (wchar >> 12) & 0x3f );
utf8[len++] = 0x80 | ( (wchar >> 6) & 0x3f );
utf8[len++] = 0x80 | ( wchar & 0x3f );
}
return len;
}
int unicodeutf8(char *unic, int uniLen, char *utf8)
{
int i, len ;
char *pUtf8 = utf8;
uchar2 *pUni =(uchar2 *)unic;
for (i =0; i< uniLen ; i=i+2 )
{
if( (len=Uni2UTF( *pUni, pUtf8 ) )<0) return -1;
pUtf8 = pUtf8+len;
pUni ++;
}
return (pUtf8 - utf8);//返回utf8長度
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -