?? unicode.php
字號:
** Parameters: unicode_array - the array containing unicode character numbers** Returns: output - the UTF-8 encoded string representing the data*******************************************************************************/function unicode_array_to_UTF8( $unicode_array ){ // Create a string to receive the UTF-8 output $output = ""; // Cycle through each Unicode character number foreach( $unicode_array as $unicode_char ) { // Check which range the current unicode character lies in if ( ( $unicode_char >= 0x00 ) && ( $unicode_char <= 0x7F ) ) { // 1 Byte UTF-8 Unicode (7-Bit ASCII) Character $output .= chr($unicode_char); // Output is equal to input for 7-bit ASCII } else if ( ( $unicode_char >= 0x80 ) && ( $unicode_char <= 0x7FF ) ) { // 2 Byte UTF-8 Unicode - binary encode data as : 110xxxxx 10xxxxxx $output .= chr(0xC0 + ($unicode_char/0x40)); $output .= chr(0x80 + ($unicode_char & 0x3F)); } else if ( ( $unicode_char >= 0x800 ) && ( $unicode_char <= 0xFFFF ) ) { // 3 Byte UTF-8 Unicode - binary encode data as : 1110xxxx 10xxxxxx 10xxxxxx $output .= chr(0xE0 + ($unicode_char/0x1000)); $output .= chr(0x80 + (($unicode_char/0x40) & 0x3F)); $output .= chr(0x80 + ($unicode_char & 0x3F)); } else if ( ( $unicode_char >= 0x10000 ) && ( $unicode_char <= 0x1FFFFF ) ) { // 4 Byte UTF-8 Unicode - binary encode data as : 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx $output .= chr(0xF0 + ($unicode_char/0x40000)); $output .= chr(0x80 + (($unicode_char/0x1000) & 0x3F)); $output .= chr(0x80 + (($unicode_char/0x40) & 0x3F)); $output .= chr(0x80 + ($unicode_char & 0x3F)); } else if ( ( $unicode_char >= 0x200000 ) && ( $unicode_char <= 0x3FFFFFF ) ) { // 5 Byte UTF-8 Unicode - binary encode data as : 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx $output .= chr(0xF8 + ($unicode_char/0x1000000)); $output .= chr(0x80 + (($unicode_char/0x40000) & 0x3F)); $output .= chr(0x80 + (($unicode_char/0x1000) & 0x3F)); $output .= chr(0x80 + (($unicode_char/0x40) & 0x3F)); $output .= chr(0x80 + ($unicode_char & 0x3F)); } else if ( ( $unicode_char >= 0x4000000 ) && ( $unicode_char <= 0x7FFFFFFF ) ) { // 6 Byte UTF-8 Unicode - binary encode data as : 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx $output .= chr(0xFC + ($unicode_char/0x40000000)); $output .= chr(0x80 + (($unicode_char/0x1000000) & 0x3F)); $output .= chr(0x80 + (($unicode_char/0x40000) & 0x3F)); $output .= chr(0x80 + (($unicode_char/0x1000) & 0x3F)); $output .= chr(0x80 + (($unicode_char/0x40) & 0x3F)); $output .= chr(0x80 + ($unicode_char & 0x3F)); } else { // Invalid Code - do nothing } } // Return resulting UTF-8 String return $output;}/******************************************************************************* End of Function: unicode_array_to_UTF8******************************************************************************//******************************************************************************** Function: unicode_array_to_UTF16** Description: Converts an array of unicode character numbers to a string* encoded by UTF-16** Parameters: unicode_array - the array containing unicode character numbers* MSB_first - True will cause processing as Big Endian UTF-16 (Motorola, MSB first)* False will cause processing as Little Endian UTF-16 (Intel, LSB first)** Returns: output - the UTF-16 encoded string representing the data*******************************************************************************/function unicode_array_to_UTF16( $unicode_array, $MSB_first ){ // Create a string to receive the UTF-16 output $output = ""; // Cycle through each Unicode character number foreach( $unicode_array as $unicode_char ) { // Check which range the current unicode character lies in if ( ( ( $unicode_char >= 0x0000 ) && ( $unicode_char <= 0xD7FF ) ) || ( ( $unicode_char >= 0xE000 ) && ( $unicode_char <= 0xFFFF ) ) ) { // Normal 16 Bit Character (Not a Surrogate Pair) // Check what byte order should be used if ( $MSB_first ) { // Big Endian $output .= chr( $unicode_char / 0x100 ) . chr( $unicode_char % 0x100 ) ; } else { // Little Endian $output .= chr( $unicode_char % 0x100 ) . chr( $unicode_char / 0x100 ) ; } } else if ( ( $unicode_char >= 0x10000 ) && ( $unicode_char <= 0x10FFFF ) ) { // Surrogate Pair required // Calculate Surrogates $High_Surrogate = ( ( $unicode_char - 0x10000 ) / 0x400 ) + 0xD800; $Low_Surrogate = ( ( $unicode_char - 0x10000 ) % 0x400 ) + 0xDC00; // Check what byte order should be used if ( $MSB_first ) { // Big Endian $output .= chr( $High_Surrogate / 0x100 ) . chr( $High_Surrogate % 0x100 ); $output .= chr( $Low_Surrogate / 0x100 ) . chr( $Low_Surrogate % 0x100 ); } else { // Little Endian $output .= chr( $High_Surrogate % 0x100 ) . chr( $High_Surrogate / 0x100 ); $output .= chr( $Low_Surrogate % 0x100 ) . chr( $Low_Surrogate / 0x100 ); } } else { // Invalid UTF-16 codepoint // Unicode value should never be between 0xD800 and 0xDFFF // Do not output this point - there is no way to encode it in UTF-16 } } // Return resulting UTF-16 String return $output;}/******************************************************************************* End of Function: unicode_array_to_UTF16******************************************************************************//******************************************************************************** Function: xml_UTF8_clean** Description: XML has specific requirements about the characters that are* allowed, and characters that must be escaped.* This function ensures that all characters in the given string* are valid, and that characters such as Quotes, Greater than,* Less than and Ampersand are properly escaped. Newlines and Tabs* are also escaped.* Note - Do not use this on constructed XML which includes tags,* as it will escape the tags. It is designed to be used* on the tag and attribute names, attribute values, and text.** Parameters: utf8_text - a string containing the UTF-8 data** Returns: output - the array containing the unicode character numbers*******************************************************************************/function xml_UTF8_clean( $UTF8_text ){ // Ensure that the Unicode UTF8 encoding is valid. $UTF8_text = UTF8_fix( $UTF8_text ); // XML only allows characters in the following unicode ranges // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] // Hence we need to delete any characters that dont fit this // Convert the UTF-8 string to an array of unicode character numbers $unicode_array = UTF8_to_unicode_array( $UTF8_text ); // Create a new array to receive the valid unicode character numbers $new_unicode_array = array( ); // Cycle through the unicode character numbers foreach( $unicode_array as $unichar ) { // Check if the unicode character number is valid for XML if ( ( $unichar == 0x09 ) || ( $unichar == 0x0A ) || ( $unichar == 0x0D ) || ( ( $unichar >= 0x20 ) && ( $unichar <= 0xD7FF ) ) || ( ( $unichar >= 0xE000 ) && ( $unichar <= 0xFFFD ) ) || ( ( $unichar >= 0x10000 ) && ( $unichar <= 0x10FFFF ) ) ) { // Unicode character is valid for XML - add it to the valid characters array $new_unicode_array[] = $unichar; } } // Convert the array of valid unicode character numbers back to UTF-8 encoded text $UTF8_text = unicode_array_to_UTF8( $new_unicode_array ); // Escape any special HTML characters present $UTF8_text = htmlspecialchars ( $UTF8_text, ENT_QUOTES ); // Escape CR, LF and TAB characters, so that they are kept and not treated as expendable white space $trans = array( "\x09" => "	", "\x0A" => "
", "\x0D" => "
" ); $UTF8_text = strtr( $UTF8_text, $trans ); // Return the resulting XML valid string return $UTF8_text;}/******************************************************************************* End of Function: xml_UTF8_clean******************************************************************************//******************************************************************************** Function: xml_UTF16_clean** Description: XML has specific requirements about the characters that are* allowed, and characters that must be escaped.* This function ensures that all characters in the given string* are valid, and that characters such as Quotes, Greater than,* Less than and Ampersand are properly escaped. Newlines and Tabs* are also escaped.* Note - Do not use this on constructed XML which includes tags,* as it will escape the tags. It is designed to be used* on the tag and attribute names, attribute values, and text.** Parameters: utf16_text - a string containing the UTF-16 data* MSB_first - True will cause processing as Big Endian UTF-16 (Motorola, MSB first)* False will cause processing as Little Endian UTF-16 (Intel, LSB first)** Returns: output - the array containing the unicode character numbers*******************************************************************************/function xml_UTF16_clean( $UTF16_text, $MSB_first ){ // Ensure that the Unicode UTF16 encoding is valid. $UTF16_text = UTF16_fix( $UTF16_text, $MSB_first ); // XML only allows characters in the following unicode ranges // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] // Hence we need to delete any characters that dont fit this // Convert the UTF-16 string to an array of unicode character numbers $unicode_array = UTF16_to_unicode_array( $UTF16_text, $MSB_first ); // Create a new array to receive the valid unicode character numbers $new_unicode_array = array( ); // Cycle through the unicode character numbers foreach( $unicode_array as $unichar ) { // Check if the unicode character number is valid for XML if ( ( $unichar == 0x09 ) || ( $unichar == 0x0A ) || ( $unichar == 0x0D ) || ( ( $unichar >= 0x20 ) && ( $unichar <= 0xD7FF ) ) || ( ( $unichar >= 0xE000 ) && ( $unichar <= 0xFFFD ) ) || ( ( $unichar >= 0x10000 ) && ( $unichar <= 0x10FFFF ) ) ) { // Unicode character is valid for XML - add it to the valid characters array $new_unicode_array[] = $unichar; }
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -