?? iconvgnutransservice.cpp
字號:
/* * Copyright 2002,2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Log: IconvGNUTransService.cpp,v $ * Revision 1.15 2004/09/08 13:56:45 peiyongz * Apache License Version 2.0 * * Revision 1.14 2004/07/23 15:29:09 amassari * transcode was badly terminating the converted string (jira#1206) * * Revision 1.13 2004/07/23 14:35:03 amassari * A global mutex was not cleaned up * * Revision 1.12 2004/02/25 14:53:24 peiyongz * Bug#27209: Xerces 2.5.0 does not build with option -t IconvGNU because of syntax errors! * * Revision 1.11 2003/12/24 15:24:15 cargilld * More updates to memory management so that the static memory manager. * * Revision 1.10 2003/08/19 14:01:41 neilg * fix for bug 22537 * * Revision 1.9 2003/05/17 16:32:18 knoaman * Memory manager implementation : transcoder update. * * Revision 1.8 2003/05/16 21:37:00 knoaman * Memory manager implementation: Modify constructors to pass in the memory manager. * * Revision 1.7 2003/05/15 18:47:05 knoaman * Partial implementation of the configurable memory manager. * * Revision 1.6 2003/04/07 16:52:13 peiyongz * Bug# 18672: IconvGNUTranscoder can't be build when namespaces is on. * Patch from Bacek@yandex-team.ru (Vasily Tchekalkin) * * Revision 1.5 2003/03/09 17:03:25 peiyongz * PanicHandler * * Revision 1.4 2002/12/31 18:42:54 tng * [Bug 15608] IconvLCPTranscoder::transcode() is wrong at wcstombs() usage. * * Revision 1.3 2002/11/04 15:14:34 tng * C++ Namespace Support. * * Revision 1.2 2002/09/27 13:33:43 tng * [Bug 12547] Xerces C++ 2.1 fails to build on Linux 64 bits arch with -tlinux. Patch from Guillaume Morin. * * Revision 1.1 2002/08/19 19:38:18 tng * [Bug 11771] Linux specific IconvGNU transcoder. Patch from Vasily Tchekalkin. * */// ---------------------------------------------------------------------------// Includes// ---------------------------------------------------------------------------#include <ctype.h>#include <locale.h>#include <iconv.h>#include <errno.h>#include <endian.h>#include <xercesc/util/XMLString.hpp>#include <xercesc/util/XMLUniDefs.hpp>#include <xercesc/util/XMLUni.hpp>#include <xercesc/util/PlatformUtils.hpp>#include <xercesc/util/TranscodingException.hpp>#include "IconvGNUTransService.hpp"#if !defined(APP_NO_THREADS)#include <xercesc/util/Mutexes.hpp>#include <xercesc/util/XMLRegisterCleanup.hpp>#endif /* !APP_NO_THREADS */XERCES_CPP_NAMESPACE_BEGIN#if !defined(APP_NO_THREADS)// Iconv() access syncronization pointstatic XMLMutex *gIconvMutex = NULL;static XMLRegisterCleanup IconvGNUMutexCleanup;# define ICONV_LOCK XMLMutexLock lockConverter(gIconvMutex);#else /* APP_NO_THREADS */# define ICONV_LOCK#endif /* !APP_NO_THREADS */// ---------------------------------------------------------------------------// Description of encoding schemas, supported by iconv()// ---------------------------------------------------------------------------typedef struct __IconvGNUEncoding { const char* fSchema; // schema name size_t fUChSize; // size of the character unsigned int fUBO; // byte order, relative to the host} IconvGNUEncoding;static const IconvGNUEncoding gIconvGNUEncodings[] = { { "UCS-2LE", 2, LITTLE_ENDIAN }, { "ucs-2-internal", 2, LITTLE_ENDIAN }, { NULL, 0, 0 }};//--------------------------------------------------// Macro-definitions to translate "native unicode"// characters <-> XMLCh with different host byte order// and encoding schemas.# if BYTE_ORDER == LITTLE_ENDIAN# define IXMLCh2WC16(x,w) \ *(w) = ((*(x)) >> 8) & 0xFF; \ *((w)+1) = (*(x)) & 0xFF# define IWC162XMLCh(w,x) *(x) = ((*(w)) << 8) | (*((w)+1))# define XMLCh2WC16(x,w) \ *(w) = (*(x)) & 0xFF; \ *((w)+1) = ((*(x)) >> 8) & 0xFF# define WC162XMLCh(w,x) *(x) = ((*((w)+1)) << 8) | (*(w))# define IXMLCh2WC32(x,w) \ *(w) = ((*(x)) >> 24) & 0xFF; \ *((w)+1) = ((*(x)) >> 16) & 0xFF; \ *((w)+2) = ((*(x)) >> 8) & 0xFF; \ *((w)+3) = (*(x)) & 0xFF# define IWC322XMLCh(w,x) \ *(x) = ((*(w)) << 24) | ((*((w)+1)) << 16) | \ ((*((w)+2)) << 8) | (*((w)+3))# define XMLCh2WC32(x,w) \ *((w)+3) = ((*(x)) >> 24) & 0xFF; \ *((w)+2) = ((*(x)) >> 16) & 0xFF; \ *((w)+1) = ((*(x)) >> 8) & 0xFF; \ *(w) = (*(x)) & 0xFF# define WC322XMLCh(w,x) \ *(x) = ((*((w)+3)) << 24) | ((*((w)+2)) << 16) | \ ((*((w)+1)) << 8) | (*(w))# else /* BYTE_ORDER != LITTLE_ENDIAN */# define XMLCh2WC16(x,w) \ *(w) = ((*(x)) >> 8) & 0xFF; \ *((w)+1) = (*(x)) & 0xFF# define WC162XMLCh(w,x) *(x) = ((*(w)) << 8) | (*((w)+1))# define IXMLCh2WC16(x,w) \ *(w) = (*(x)) & 0xFF; \ *((w)+1) = ((*(x)) >> 8) & 0xFF# define IWC162XMLCh(w,x) *(x) = ((*((w)+1)) << 8) | (*(w))# define XMLCh2WC32(x,w) \ *(w) = ((*(x)) >> 24) & 0xFF; \ *((w)+1) = ((*(x)) >> 16) & 0xFF; \ *((w)+2) = ((*(x)) >> 8) & 0xFF; \ *((w)+3) = (*(x)) & 0xFF# define WC322XMLCh(w,x) \ *(x) = ((*(w)) << 24) | ((*((w)+1)) << 16) | \ ((*((w)+2)) << 8) | (*((w)+3))# define IXMLCh2WC32(x,w) \ *((w)+3) = ((*(x)) >> 24) & 0xFF; \ *((w)+2) = ((*(x)) >> 16) & 0xFF; \ *((w)+1) = ((*(x)) >> 8) & 0xFF; \ *(w) = (*(x)) & 0xFF# define IWC322XMLCh(w,x) \ *(x) = ((*((w)+3)) << 24) | ((*((w)+2)) << 16) | \ ((*((w)+1)) << 8) | (*(w))# endif /* BYTE_ORDER == LITTLE_ENDIAN */#include <wchar.h>#include <string.h>#include <stdlib.h>#include <stdio.h>// ---------------------------------------------------------------------------// Local, const data// ---------------------------------------------------------------------------static const unsigned int gTempBuffArraySize = 4096;static const XMLCh gMyServiceId[] ={ chLatin_I, chLatin_C, chLatin_o, chLatin_n, chLatin_v, chNull};// ---------------------------------------------------------------------------// Local methods// ---------------------------------------------------------------------------static unsigned int getWideCharLength(const XMLCh* const src){ if (!src) return 0; unsigned int len = 0; const XMLCh* pTmp = src; while (*pTmp++) len++; return len;}//----------------------------------------------------------------------------// There is implementation of the libiconv for FreeBSD (available through the// ports collection). The following is a wrapper around the iconv().//----------------------------------------------------------------------------IconvGNUWrapper::IconvGNUWrapper () : fUChSize(0), fUBO(LITTLE_ENDIAN), fCDTo((iconv_t)-1), fCDFrom((iconv_t)-1){}IconvGNUWrapper::IconvGNUWrapper ( iconv_t cd_from, iconv_t cd_to, size_t uchsize, unsigned int ubo ) : fUChSize(uchsize), fUBO(ubo), fCDTo(cd_to), fCDFrom(cd_from){ if (fCDFrom == (iconv_t) -1 || fCDTo == (iconv_t) -1) { XMLPlatformUtils::panic (PanicHandler::Panic_NoTransService); }}IconvGNUWrapper::~IconvGNUWrapper(){}// Convert "native unicode" character into XMLChvoid IconvGNUWrapper::mbcToXMLCh (const char *mbc, XMLCh *toRet) const{ if (fUBO == LITTLE_ENDIAN) { if (fUChSize == sizeof(XMLCh)) *toRet = *((XMLCh*) mbc); else if (fUChSize == 2) { WC162XMLCh( mbc, toRet ); } else { WC322XMLCh( mbc, toRet ); } } else { if (fUChSize == 2) { IWC162XMLCh( mbc, toRet ); } else { IWC322XMLCh( mbc, toRet ); } }}// Convert XMLCh into "native unicode" charactervoid IconvGNUWrapper::xmlChToMbc (XMLCh xch, char *mbc) const{ if (fUBO == LITTLE_ENDIAN) { if (fUChSize == sizeof(XMLCh)) { memcpy (mbc, &xch, fUChSize); return; } if (fUChSize == 2) { XMLCh2WC16( &xch, mbc ); } else { XMLCh2WC32( &xch, mbc ); } } else { if (fUChSize == 2) { IXMLCh2WC16( &xch, mbc ); } else { IXMLCh2WC32( &xch, mbc ); } }}// Return uppercase equivalent for XMLChXMLCh IconvGNUWrapper::toUpper (const XMLCh ch) const{ if (ch <= 0x7F) return toupper(ch); char wcbuf[fUChSize * 2]; xmlChToMbc (ch, wcbuf); char tmpArr[4]; char* ptr = wcbuf; size_t len = fUChSize; char *pTmpArr = tmpArr; size_t bLen = 2; ICONV_LOCK; if (::iconv (fCDTo, &ptr, &len, &pTmpArr, &bLen) == (size_t) -1) return 0; tmpArr[1] = toupper (*((unsigned char *)tmpArr)); *tmpArr = tmpArr[1]; len = 1; pTmpArr = wcbuf; bLen = fUChSize; ptr = tmpArr; if (::iconv (fCDFrom, &ptr, &len, &pTmpArr, &bLen) == (size_t) -1) return 0; mbcToXMLCh (wcbuf, (XMLCh*) &ch); return ch;}// Return lowercase equivalent for XMLChXMLCh IconvGNUWrapper::toLower (const XMLCh ch) const{ if (ch <= 0x7F) return tolower(ch); char wcbuf[fUChSize * 2]; xmlChToMbc (ch, wcbuf); char tmpArr[4]; char* ptr = wcbuf; size_t len = fUChSize; char *pTmpArr = tmpArr; size_t bLen = 2; ICONV_LOCK; if (::iconv (fCDTo, &ptr, &len, &pTmpArr, &bLen) == (size_t) -1) return 0; tmpArr[1] = tolower (*((unsigned char*)tmpArr)); *tmpArr = tmpArr[1]; len = 1; pTmpArr = wcbuf; bLen = fUChSize; ptr = tmpArr; if (::iconv (fCDFrom, &ptr, &len, &pTmpArr, &bLen) == (size_t) -1) return 0; mbcToXMLCh (wcbuf, (XMLCh*) &ch); return ch;}// Check if passed characters belongs to the :space: classbool IconvGNUWrapper::isSpace(const XMLCh toCheck) const{ if (toCheck <= 0x7F) return isspace(toCheck); char wcbuf[fUChSize * 2]; char tmpArr[4]; xmlChToMbc (toCheck, wcbuf); char* ptr = wcbuf; size_t len = fUChSize; char *pTmpArr = tmpArr; size_t bLen = 2; { ICONV_LOCK; if (::iconv (fCDTo, &ptr, &len, &pTmpArr, &bLen) == (size_t) -1) return 0; } return isspace(*tmpArr);}// Fill array of XMLCh characters with data, supplyed in the array// of "native unicode" characters.XMLCh* IconvGNUWrapper::mbsToXML( const char* mbs_str , size_t mbs_cnt , XMLCh* xml_str , size_t xml_cnt) const{ if (mbs_str == NULL || mbs_cnt == 0 || xml_str == NULL || xml_cnt == 0) return NULL; size_t cnt = (mbs_cnt < xml_cnt) ? mbs_cnt : xml_cnt; if (fUBO == LITTLE_ENDIAN) { if (fUChSize == sizeof(XMLCh)) { // null-transformation memcpy (xml_str, mbs_str, fUChSize * cnt); return xml_str; } if (fUChSize == 2) for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) { WC162XMLCh( mbs_str, xml_str + i); } else for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) { WC322XMLCh( mbs_str, xml_str + i ); } } else { if (fUChSize == 2) for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) { IWC162XMLCh( mbs_str, xml_str + i ); } else for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) { IWC322XMLCh( mbs_str, xml_str + i ); } } return xml_str;}// Fill array of "native unicode" characters with data, supplyed// in the array of XMLCh characters.char* IconvGNUWrapper::xmlToMbs( const XMLCh* xml_str , size_t xml_cnt , char* mbs_str , size_t mbs_cnt) const{ if (mbs_str == NULL || mbs_cnt == 0 || xml_str == NULL || xml_cnt == 0) return NULL; size_t cnt = (mbs_cnt < xml_cnt) ? mbs_cnt : xml_cnt; char *toReturn = mbs_str; if (fUBO == LITTLE_ENDIAN) { if (fUChSize == sizeof(XMLCh)) { // null-transformation memcpy (mbs_str, xml_str, fUChSize * cnt); return toReturn; } if (fUChSize == 2) for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) { XMLCh2WC16( xml_str, mbs_str ); } else for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) { XMLCh2WC32( xml_str, mbs_str ); } } else { if (fUChSize == 2) for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) { IXMLCh2WC16( xml_str, mbs_str ); } else for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) { IXMLCh2WC32( xml_str, mbs_str ); }
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -