?? unicode coding.txt

?? Windows環境下的Unicode編程
?? TXT
?? 第 1 頁 / 共 2 頁
字號:
12 下一頁
Windows環境下的Unicode編程

　　國際上規定了兩種二進制文字內碼表示方法。
　　ASCII是8位的，表示漢字需要兩個字節以前大部分應用都使這種，不過有的時候會有亂碼出現。UNICODE是16位的，比較好。

　　ASCII是American Standard Code for Information Interchange的縮寫，用來制訂計算機中每個符號對應的代碼，這也叫做計算機的內碼(code)。每個ASCII碼以1個字節(Byte)儲存，從0到數字127代表不同的常用符號，例如大寫A的ASCII碼是65，小寫a則是97。由于ASCII字節的七個位，最高位并不使用，所以后來又將最高的一個位也編入這套內碼中，成為八個位的延伸ASCII(ExtendedASCII)碼，這套內碼加上了許多外文和表格等特殊符號，成為目前常用的內碼。

　　Unicode是Unified Code的縮寫，也就是統一碼。具體可參考：http://www.unicode.org/unicode/standard/translations/s-chinese.html 


　　ASCII碼：

　　ASCII碼是目前計算機中用得最廣泛的字符集及其編碼，是由美國國家標準局(ANSI)制定的ASCII碼（American Standard Code for Information Interchange，美國標準信息交換碼），它已被國際標準化組織（ISO）定為國際標準，稱為ISO 646標準。適用于所有拉丁文字字母，ASCII碼有7位碼和8位碼兩種形式。 

　　因為1位二進制數可以表示（21=）2種狀態：0、1；而2位二進制數可以表示（22）=4種狀態：00、01、10、11；依次類推，7位二進制數可以表示（27=）128種狀態，每種狀態都唯一地編為一個7位的二進制碼，對應一個字符（或控制碼），這些碼可以排列成一個十進制序號0～127。所以，7位ASCII碼是用七位二進制數進行編碼的，可以表示128個字符。 
　　第0～32號及第127號(共34個)是控制字符或通訊專用字符，如控制符：LF（換行）、CR（回車）、FF（換頁）、DEL（刪除）、BEL（振鈴）等；通訊專用字符：SOH（文頭）、EOT（文尾）、ACK（確認）等；第33～126號(共94個)是字符，其中第48～57號為0～9十個阿拉伯數字；65～90號為26個大寫英文字母，97～122號為26個小寫英文字母，其余為一些標點符號、運算符號等。 

　　注意：在計算機的存儲單元中，一個ASCII碼值占一個字節(8個二進制位)，其最高位(b7)用作奇偶校驗位。所謂奇偶校驗，是指在代碼傳送過程中用來檢驗是否出現錯誤的一種方法，一般分奇校驗和偶校驗兩種。奇校驗規定：正確的代碼一個字節中1的個數必須是奇數，若非奇數，則在最高位b7添1；偶校驗規定：正確的代碼一個字節中1的個數必須是偶數，若非偶數，則在最高位b7添1。 


　　Unicode碼：

　　Unicode碼也是一種國際標準編碼，采用二個字節編碼，與ANSI碼不兼容。目前，在網絡、Windows系統和很多大型軟件中得到應用。


　　手機通信中普遍使用Unicode碼。例如手機短消息的發送是以PDU串的形式發送出去的，中文字符以Unicode碼來表示，所以在發送中文短消息之前必須首先將中文字符轉換為Unicode碼。下面的函數將實現這個功能。

　　這個函數主要應用到VB自帶的一個格式轉換函數：ChrW（）將中文轉換為Unicode碼。　 

　　Public Function chg(rmsg As String) As String
　　Dim tep As String
　　Dim temp As String
　　Dim i As Integer
　　Dim b As Integer
　　tep = rmsg
　　i = Len(tep)
　　b = i / 4
　　If i = b * 4 Then
　　 b = b - 1
　　 tep = Left(tep, b * 4)
　　Else
　　 tep = Left(tep, b * 4)
　　End If
　　chg = “”
　　For i = 1 To b
　　 temp = “&H” & Mid(tep, (i - 1) * 4 + 1, 4)
　　 chg = chg & ChrW(CInt(Val(temp)))
　　Next i
　　End Function　


　　另外一種：

　　在日常編程中，常常需要把中文轉換為unicode碼：

StringBuilder sb = new StringBuilder();//Unicode
StringBuilder sa = new StringBuilder();//UTF-8
string s1;
string s2;
for(int i=0;i<sCode.length();i++)
{ 
byte[] bt = System.Text.Encoding.Unicode.GetBytes(sCode.Text.Substring(i,1));
if(bt.Length>1)//判斷是否漢字
{
s1=Convert.ToString((short)(bt[1] - '\0'),16);//轉化為16進制字符串
s2=Convert.ToString((short)(bt[0] - '\0'),16);//轉化為16進制字符串
s1=(s1.Length==1?“0”:“”)+s1;//不足位補0
s2=(s2.Length==1?“0”:“”)+s2;//不足位補0
sa.Append("&#"+Convert.ToInt32(s1+s2,16)+“;”);
sb.Append("\\u"+s1+s2);
}
}
Clipboard.SetDataObject(sb.ToString());
txtUTF8.Text = sa.ToString();
txtUnicode.Text = sb.ToString(); 




　　如何讓自己的開發環境支持UNICODE？

　　1.安裝Visual Studio時，在選擇VC++時需要加入unicode選項，保證相關的庫文件可以拷貝到system32下。
　　2.VC++中：工程/設置/C/C++/預處理程序定義(Preprocessor difinitions)/去除_MBCS，加_UNICODE,UNICODE 
              工程/設置/link/output/輸入項-點符號(Entry)添加wWinMainCRTStartup 反之為MBCS（ANSI）編譯。 




　　Windows環境下Unicode編程總結

　　一、讓VC6支持Unicode

　　通過使用unicode編譯，軟件可以適應多種情況，如何在自己的工程中添加這兩種編譯方式呢？下面是一個簡單的步驟：
    
　　1、選擇“Build->Configurations”菜單
　　2、點擊“Add”按鈕，添加“Unicode Debug” copysetting from “win32 Debug”配置
　　3、添加“Unicode Release”copysetting from “win32 Release”配置 ，然后點擊“OK”
　　4、選擇“Project->Setting”菜單
　　5、切換到“General ”TAB頁(可選）
　　6、修改“Win32 Unicode Debug”的Intermediate Files和Output Files為DebugU (可選）  
　　7、修改“Win32 Unicode Release”的Intermediate Files和Output Files為ReleaseU (可選）
　　8、切換到“C++ ” Tab頁
　　9、從下拉列表框中選擇“Preprocessor”
　　10、為“Win32 Unicode Debug”和“Win32 Unicode Release”分別添加"_UNICODE,UNICODE " variables
　　11、切換到“link ” Tab頁， 從下拉列表框中選擇“output” ，設置Entry為wWinMainCRTStartup（如果為cosole程序則不需要）

　　12、在需要Unicode字符變量的地方用TCHAR（或WCHAR，兩個是一樣的)定義，如果需要ANSI則依然用char定義
　　13、把所有的Unicode字符串常量用L宏包起來,比如 TCHAR* szText = L"我的Text";


　　二、Unicode與UTF8的相互轉換

///////////////////////////////////////////////////////////////////////////////
// Convert a UTF8 string to WCHAR string. 
// Caller must release the memory of pwszOutput by calling "delete[] pwszOutput ".
///////////////////////////////////////////////////////////////////////////////
HRESULT UTF8ToWChar( char * ptszInput, WCHAR ** pwszOutput )
{
    int cchOutput = 0;
    
    if( NULL == ptszInput || NULL == pwszOutput )
    {
        return( E_INVALIDARG );
    }

    //
    // Get output buffer size
    //
#ifndef UNICODE
    cchOutput = wcslen( ptszInput ) + 1;
#else //UNICODE
    cchOutput = MultiByteToWideChar( CP_UTF8, 0, ptszInput, -1, NULL, 0 );
    if( 0 == cchOutput )
    {
        return( HRESULT_FROM_WIN32( GetLastError() ) );
    }
#endif // UNICODE

    *pwszOutput = new WCHAR[ cchOutput ];
//memset( *pwszOutput, 0 , cchOutput + 1 ) ;
    if( NULL == *pwszOutput)
    {
        return( E_OUTOFMEMORY );
    }

#ifndef UNICODE
    wcsncpy( *pwszOutput, ptszInput, cchOutput );
#else //UNICODE
    if( 0 == MultiByteToWideChar( CP_UTF8, 0, ptszInput, -1, *pwszOutput, cchOutput ) )
    {
        //SAFE_ARRAYDELETE( *pwszOutput );
        return( HRESULT_FROM_WIN32( GetLastError() ) );
    }        
#endif // UNICODE

    return( S_OK );
}

///////////////////////////////////////////////////////////////////////////////
// Convert a WCHAR string to UTF8 string. 
// Caller must release the memory of ptszOutput by calling "delete[] ptszOutput ".
///////////////////////////////////////////////////////////////////////////////
HRESULT WCharToUTF8(WCHAR * pwszInput , char ** ptszOutput)
{
    int cchOutput = 0;
    
    if( NULL == pwszInput || NULL == ptszOutput )
    {
        return( E_INVALIDARG );
    }

    //
    // Get output buffer size
    //
#ifndef UNICODE
    cchOutput = wcslen( pwszInput ) + 1;
#else //UNICODE
    cchOutput = WideCharToMultiByte( CP_UTF8, 0, pwszInput, -1, NULL, 0 ,NULL ,NULL );
    if( 0 == cchOutput )
    {
        return( HRESULT_FROM_WIN32( GetLastError() ) );
    }
#endif // UNICODE

    *ptszOutput = new char[ cchOutput] ;
    if( NULL == *ptszOutput)
    {
        return( E_OUTOFMEMORY );
    }
memset( *ptszOutput , 0 , cchOutput) ;

#ifndef UNICODE
    wcsncpy( *ptszOutput, pwszInput, cchOutput );
#else //UNICODE
    if( 0 == WideCharToMultiByte( CP_UTF8, 0, pwszInput, -1, *ptszOutput, cchOutput,NULL ,NULL ) )
    {
        //SAFE_ARRAYDELETE( *ptszOutput );
        return( HRESULT_FROM_WIN32( GetLastError() ) );
    }        
#endif // UNICODE

    return( S_OK );

}



　　三、UTF8與ANSI的相互轉換

=====================================================
///////////////////////////////////////////////////////////////////////////////
// Convert a UTF8 string to ANSI string. 
// Caller must release the memory of pANSIOutput by calling delete[] pANSIOutput.
///////////////////////////////////////////////////////////////////////////////
BOOL   UTF8toANSI( char * pUtf8Input , char ** pANSIOutput )
{
if( NULL == pUtf8Input && NULL == pANSIOutput ) return FALSE ;

//convert to unicdoe firstly
    int nchOutput = MultiByteToWideChar( CP_UTF8, 0, pUtf8Input, -1, NULL, 0 );
    if( 0 == nchOutput )  
return FALSE ;

    WCHAR  * pWcharsz = new WCHAR[ nchOutput ];
ASSERT( pWcharsz != NULL ) ;
if( NULL == pWcharsz ) 
return FALSE ;
memset( pWcharsz , 0 , nchOutput * sizeof(WCHAR) ) ;
  
nchOutput = MultiByteToWideChar( CP_UTF8, 0, pUtf8Input, -1, pWcharsz, nchOutput )  ;
if( nchOutput <= 0 ) 
{
delete [] pWcharsz ;
return FALSE ;
}
//convert ANSI 
nchOutput = WideCharToMultiByte( CP_ACP, 0, pWcharsz, -1, NULL, 0 ,NULL ,NULL );
if( nchOutput <= 0 )
{
delete [] pWcharsz ;
return FALSE ;
}

* pANSIOutput  =  new char[nchOutput] ;
ASSERT( * pANSIOutput != NULL ) ;
12 下一頁
?? 文件大小 10 K
?? 上傳用戶 yangjiuhe
?? 所屬分類文件格式
??? 相關標簽

#Windows #Unicode #環境 #編程
?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

?? unicode coding.txt

?? 快捷鍵說明