?? utf8.h
字號:
#ifndef __DNC_UTF8_H__
#define __DNC_UTF8_H__
//##############################################################################
//The dnc Library
//Copyright (c) 2005 Dreamsoft 趙純華
//Last update: 2005-1-10
//UTF8轉換函數
//##############################################################################
//
#ifndef __DNC_DEFINE_H__
#include "define.h"
#endif
#ifndef __DNC_ARRAY_H__
#include "array.h"
#endif
#include <string>
namespace dnc{
DNC_DECLARE size_t ANSIToUTF8(castr srcData,size_t srcCount,ustr destData,size_t destCount);
DNC_DECLARE size_t UTF8ToANSI(custr srcData,size_t srcCount, astr destData,size_t destCount);
DNC_DECLARE size_t ANSIToUNICODE(castr srcData,size_t srcCount, wstr destData,size_t destCount) ;
DNC_DECLARE size_t UNICODEToANSI(cwstr srcData,size_t srcCount,astr destData,size_t destCount) ;
DNC_DECLARE size_t UNICODEToUTF8(cwstr srcData,size_t srcCount,ustr destData,size_t destCount) ;
DNC_DECLARE size_t UTF8ToUNICODE(custr srcData,size_t srcCount, wstr destData,size_t destCount) ;
inline size_t ANSIToUTF8(const Array<achar> &src,Array<uchar> &dest){
return ANSIToUTF8(src.data(),src.size(),dest.data(),dest.size());
}
inline size_t UTF8ToANSI(const Array<uchar> &src,Array<achar> &dest){
return UTF8ToANSI(src.data(),src.size(),dest.data(),dest.size());
}
inline size_t UNICODEToUTF8(const Array<wchar> &src,Array<uchar> &dest){
return UNICODEToUTF8(src.data(),src.size(),dest.data(),dest.size());
}
inline size_t UTF8ToUNICODE(const Array<uchar> &src,Array<wchar> &dest){
return UTF8ToUNICODE(src.data(),src.size(),dest.data(),dest.size());
}
DNC_DECLARE std::string UTF8ToANSI(const std::string &srcData) dnc_reg();
DNC_DECLARE std::string ANSIToUTF8(const std::string &srcData) dnc_reg();
//把一個xchar轉換成UTF-8編碼
//parameters:
// ch 任意的字符
// utf8 不小于6字節的緩沖區,存儲ch轉換的結果
//return:
// ch 轉換成utf8后占用的字節數
DNC_DECLARE unsigned int XCharToUTF8(xchar ch,astr utf8);
//把一個utf8字符轉換成任意字符xchar
//parameters:
// utf8 存儲一個字符的UTF-8格式
// ch 用于存儲utf8轉換的結果
//return:
// utf8 表示的一個字符占用的字節數
DNC_DECLARE unsigned int UTF8ToXChar(custr utf8,xchar &ch);
//UTF-8字符串操作函數
//默認情況下此函數以標準庫的strcmp函數實現,我們也可以設置系統參數
//使得strcmp以更為人性的方式比較字符串。比如以邏輯的字符值或者是按照
//漢字拼音比較。
DNC_DECLARE int utf8_strcmp(castr str1,castr str2,unsigned int count= (unsigned int)-1);
//返回值
//size str的邏輯長度,字符數
//rawSize str的物理長度,以0結尾的緩沖區長度
DNC_DECLARE void utf8_strlen(castr str,unsigned int &size,unsigned int &rawSize,unsigned int count= (unsigned int)-1);
DNC_DECLARE xchar utf8_value(custr str);
extern DNC_DECLARE cuchar gUTFBytes[256];
extern DNC_DECLARE cuchar gFirstByteMark[7];
extern DNC_DECLARE const unsigned long gUTFOffsets[6];
/////////////////////////////////////////
//utf8字符跌代器
class utf8_const_iterator{
public:
typedef xchar value_type;
typedef value_type reference;
typedef utf8_const_iterator MyType;
public:
utf8_const_iterator(castr it):m_it((ustr)it){
//如果it剛好在一個utf8字符的中間,則向前邊推算出這個字符
for(;*m_it >= 0x80 && *m_it < 0xE0 && (ustr)it - m_it < 6;--m_it);
}
reference operator*() const{
return *m_it <= 127 ? *m_it : utf8_value(m_it);
}
operator castr () const{
return (castr)m_it;
}
int get_charSize() const{
return gUTFBytes[*m_it]+1;
}
long operator - (const utf8_const_iterator &other) const{
//long size;
//for(utf8_const_iterator it=*this;it!=other;it++)
//return (size_t)(m_it-it.m_it);
return 0;
}
MyType operator + (int offset) const{
if(offset < 0)
return operator-(-offset);
const uchar *str = m_it;
for(int i=0;i<offset;i++)
str += gUTFBytes[*str]+1;
return MyType((castr)str);
}
MyType operator - (int offset) const{
if(offset < 0)
return operator+(-offset);
const uchar *str = m_it;
for(int i=0;i<offset;i++)
for(--str;*str >= 0x80 && *str < 0xE0;--str);
return MyType((castr)str);
}
MyType& operator ++(){
m_it += gUTFBytes[*m_it]+1;
return *this;
}
MyType operator ++(int){
MyType tmp = *this;
m_it += gUTFBytes[*m_it]+1;
return tmp;
}
MyType& operator --(){
//如果是在1000 0000(0x80)和1100 0000(0xE0)之間的值就忽略掉
for(--m_it;*m_it >= 0x80 && *m_it < 0xE0;--m_it);
return *this;
}
MyType operator --(int){
MyType tmp = *this;
for(--m_it;*m_it >= 0x80 && *m_it < 0xE0;--m_it);
return tmp;
}
bool operator == (const MyType &right){
return m_it==right.m_it;
}
bool operator != (const MyType &right){
return !(*this == right);
}
bool operator<(const MyType &right) const{
return m_it<right.m_it;
}
bool operator>(const MyType& right) const{
return (right < *this);
}
bool operator<=(const MyType& right) const{
return (!(right < *this));
}
bool operator>=(const MyType& right) const{
return (!(*this < right));
}
protected:
ustr m_it;
};
class utf8_iterator : public utf8_const_iterator{
public:
typedef utf8_iterator MyType;
public:
utf8_iterator(castr it):utf8_const_iterator(it){
}
MyType& operator ++(){
m_it += gUTFBytes[*m_it]+1;
return *this;
}
MyType operator ++(int){
MyType temp = *this;
m_it += gUTFBytes[*m_it]+1;
return temp;
}
MyType& operator --(){
for(--m_it;*m_it >= 0x80 && *m_it < 0xE0;--m_it);
return *this;
}
MyType operator --(int){
MyType tmp = *this;
for(--m_it;*m_it >= 0x80 && *m_it < 0xE0;--m_it);
return tmp;
}
};
////////////////////////////////////
//反向跌代器
template<class Base>
class utf8_reverse_bidirectional_iterator{
public:
typedef typename Base::value_type value_type;
typedef typename Base::reference reference;
typedef utf8_reverse_bidirectional_iterator MyType;
public:
utf8_reverse_bidirectional_iterator(const Base &it):m_it(it){}
Base base() const{
return (m_it);
}
reference operator*() const{
return *m_it;
}
MyType& operator ++(){
--m_it;
return *this;
}
MyType operator ++(int){
MyType tmp = *this;
--m_it;
return tmp;
}
MyType& operator --(){
++m_it;
return *this;
}
MyType operator --(int){
MyType tmp = *this;
++m_it;
return tmp;
}
bool operator == (const MyType &right){
return m_it==right.m_it;
}
bool operator != (const MyType &right){
return !(*this == right);
}
bool operator<(const MyType &right) const{
return m_it<right.m_it;
}
bool operator>(const MyType& right) const{
return (right < *this);
}
bool operator<=(const MyType& right) const{
return (!(right < *this));
}
bool operator>=(const MyType& right) const{
return (!(*this < right));
}
private:
Base m_it;
};
typedef utf8_reverse_bidirectional_iterator<utf8_iterator> utf8_reverse_iterator;
typedef utf8_reverse_bidirectional_iterator<utf8_const_iterator> utf8_const_reverse_iterator;
}
#endif //__DNC_UTF8_H__
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -