?? uni_utf8.h

?? utf-8和unicode的互轉的c代碼

?? H

字號:

unsigned int u2utf8(unsigned short uni)
{
	unsigned int utf8;
	if(uni < 0x80)
	{
		utf8 = uni;
		return utf8;
	}
	if(uni < 0x800)
	{
		utf8 = (0xc0 | (uni >> 6)) << 8
			| (0x80 | (uni & 0x3f));

		return utf8;
	}
	if(uni < 0x10000)
	{
		utf8 = (0xe0 | (uni>>12)) << 16
			| (0x80 | (uni >> 6 & 0x3f)) << 8
			| (0x80 | (uni & 0x3f));
		return utf8;
	}
	if(uni < 0x20000)
	{
		utf8 = (0xf0 | (uni >> 18)) << 24
			| (0x80 | (uni >> 12 & 0x3f)) << 16
			| (0x80 | (uni >> 6 & 0x3f)) << 8
			| (0x80 | (uni & 0x3f));
		return utf8;
	}
	else
	{
		/*we don't deal with it, so we return the unicode.*/
		return uni;
	}

}

intutf82u(char *str, int * chPtr)/* str is the UTF8 next character pointer *//* chPtr is the int for the result */{  int byte;  char *p;  /* HTML4.0 entities in decimal form, e.g. &#197; */  /*           or in hexadecimal form, e.g. &#x6C34; */  byte = *((unsigned char *) str);  if (byte == '&')    {      int i, n = 0;      byte = *((unsigned char *) (str + 1));      if (byte == '#')	{          byte = *((unsigned char *) (str + 2));          if (byte == 'x' || byte == 'X')            {              for (i = 3; i < 8; i++)                {                  byte = *((unsigned char *) (str + i));                  if (byte >= 'A' && byte <= 'F')                    byte = byte - 'A' + 10;                  else if (byte >= 'a' && byte <= 'f')                    byte = byte - 'a' + 10;                  else if (byte >= '0' && byte <= '9')                    byte = byte - '0';                  else                    break;                  n = (n * 16) + byte;                }            }          else            {	      for (i = 2; i < 8; i++)	        {	          byte = *((unsigned char *) (str + i));	          if (byte >= '0' && byte <= '9')	            n = (n * 10) + (byte - '0');	          else		    break;		}	    }	  if (byte == ';')	    {	      *chPtr = (int) n;	      return ++i;	    }	}      else        {		/*fix me*/
		*chPtr = 0;
		return 1;        }    }  /*   * Unroll 1 to 3 byte UTF-8 sequences, use loop to handle longer ones.   */  byte = *((unsigned char *) str);  if (byte < 0xC0)    {      /*       * Handles properly formed UTF-8 characters between       * 0x01 and 0x7F.  Also treats \0 and naked trail       * bytes 0x80 to 0xBF as valid characters representing       * themselves.       */      *chPtr = (int) byte;      return 1;    }  else if (byte < 0xE0)    {      if ((str[1] & 0xC0) == 0x80)	{	  /*	   * Two-byte-character lead-byte followed	   * by a trail-byte.	   */	  *chPtr = (int) (((byte & 0x1F) << 6) | (str[1] & 0x3F));	  return 2;	}      /*       * A two-byte-character lead-byte not followed by trail-byte       * represents itself.       */      *chPtr = (int) byte;      return 1;    }  else if (byte < 0xF0)    {      if (((str[1] & 0xC0) == 0x80) && ((str[2] & 0xC0) == 0x80))	{	  /*	   * Three-byte-character lead byte followed by	   * two trail bytes.	   */	  *chPtr = (int) (((byte & 0x0F) << 12)				  | ((str[1] & 0x3F) << 6) | (str[2] & 0x3F));	  return 3;	}      /*       * A three-byte-character lead-byte not followed by       * two trail-bytes represents itself.       */      *chPtr = (int) byte;      return 1;    }  *chPtr = (int) byte;  return 1;}

?? 文件大小 5 K

?? 上傳用戶 liuhong22008

?? 所屬分類多國語言處理

??? 相關標簽

#unicode #utf #代碼

?? 快捷鍵說明

復制代碼 Ctrl + C

搜索代碼 Ctrl + F

全屏模式 F11

切換主題 Ctrl + Shift + D

顯示快捷鍵 ?

增大字號 Ctrl + =

減小字號 Ctrl + -

亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

?? uni_utf8.h

?? 快捷鍵說明