?? icutransservice.cpp
字號:
/* * Copyright 1999-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Id: ICUTransService.cpp,v 1.16 2004/09/08 13:56:44 peiyongz Exp $ */// ---------------------------------------------------------------------------// Includes// ---------------------------------------------------------------------------#include <xercesc/util/Janitor.hpp>#include <xercesc/util/TranscodingException.hpp>#include <xercesc/util/XMLString.hpp>#include <xercesc/util/XMLUniDefs.hpp>#include "ICUTransService.hpp"#include <string.h>#include <unicode/uloc.h>#include <unicode/uchar.h>#include <unicode/ucnv.h>#include <unicode/ucnv_err.h>#include <unicode/ustring.h>#include <unicode/udata.h>#if (U_ICU_VERSION_MAJOR_NUM >= 2) #include <unicode/uclean.h>#endif#if !defined(XML_OS390) && !defined(XML_AS400) && !defined(XML_HPUX) && !defined(XML_PTX)// Forward reference the symbol which points to the ICU converter data.#if (U_ICU_VERSION_MAJOR_NUM < 2)extern "C" const uint8_t U_IMPORT icudata_dat[];#endif#endifXERCES_CPP_NAMESPACE_BEGIN// ---------------------------------------------------------------------------// Local, const data// ---------------------------------------------------------------------------static const XMLCh gMyServiceId[] ={ chLatin_I, chLatin_C, chLatin_U, chNull};static const XMLCh gS390Id[] ={ chLatin_S, chDigit_3, chDigit_9, chDigit_0, chNull};static const XMLCh gs390Id[] ={ chLatin_s, chDigit_3, chDigit_9, chDigit_0, chNull};static const XMLCh gswaplfnlId[] ={ chComma, chLatin_s, chLatin_w, chLatin_a, chLatin_p, chLatin_l, chLatin_f, chLatin_n, chLatin_l, chNull};// ---------------------------------------------------------------------------// Local functions// ---------------------------------------------------------------------------//// When XMLCh and ICU's UChar are not the same size, we have to do a temp// conversion of all strings. These local helper methods make that easier.//static UChar* convertToUChar( const XMLCh* const toConvert , const unsigned int srcLen = 0 , MemoryManager* const manager = 0){ const unsigned int actualLen = srcLen ? srcLen : XMLString::stringLen(toConvert); UChar* tmpBuf = (manager) ? (UChar*) manager->allocate((actualLen + 1) * sizeof(UChar)) : new UChar[actualLen + 1]; const XMLCh* srcPtr = toConvert; UChar* outPtr = tmpBuf; while (*srcPtr) *outPtr++ = UChar(*srcPtr++); *outPtr = 0; return tmpBuf;}static XMLCh* convertToXMLCh( const UChar* const toConvert, MemoryManager* const manager = 0){ const unsigned int srcLen = u_strlen(toConvert); XMLCh* retBuf = (manager) ? (XMLCh*) manager->allocate((srcLen+1) * sizeof(XMLCh)) : new XMLCh[srcLen + 1]; XMLCh* outPtr = retBuf; const UChar* srcPtr = toConvert; while (*srcPtr) *outPtr++ = XMLCh(*srcPtr++); *outPtr = 0; return retBuf;}// ---------------------------------------------------------------------------// ICUTransService: Constructors and Destructor// ---------------------------------------------------------------------------ICUTransService::ICUTransService(){#if !defined(XML_OS390) && !defined(XML_AS400) && !defined(XML_HPUX) && !defined(XML_PTX)#if (U_ICU_VERSION_MAJOR_NUM < 2) // Starting with ICU 2.0, ICU itself includes a static reference to the data // entrypoint symbol. // // ICU 1.8 (and previous) did not include a static reference, but would // dynamically load the data dll when it was first needed, however this dynamic // loading proved unreliable in some of the odd environments that Xerces needed // to run in. Hence, the static reference. // Pass the location of the converter data to ICU. By doing so, we are // forcing the load of ICU converter data DLL, after the Xerces-C DLL is // loaded. This implies that Xerces-C, now has to explicitly link with the // ICU converter dll. However, the advantage is that we no longer depend // on the code which does demand dynamic loading of DLL's. The demand // loading is highly system dependent and was a constant source of support // calls. UErrorCode uerr = U_ZERO_ERROR; udata_setCommonData((void *) icudata_dat, &uerr);#endif#endif}ICUTransService::~ICUTransService(){ /* * commented out the following clean up code * in case users use ICU outside of the parser * if we clean up here, users' code may crash * #if (U_ICU_VERSION_MAJOR_NUM >= 2) // release all lasily allocated data u_cleanup(); #endif */}// ---------------------------------------------------------------------------// ICUTransService: The virtual transcoding service API// ---------------------------------------------------------------------------int ICUTransService::compareIString(const XMLCh* const comp1 , const XMLCh* const comp2){ const XMLCh* psz1 = comp1; const XMLCh* psz2 = comp2; unsigned int curCount = 0; while (true) { // // If an inequality, then return the difference. Note that the XMLCh // might be bigger physically than UChar, but it won't hold anything // larger than 0xFFFF, so our cast here will work for both possible // sizes of XMLCh. // if (u_toupper(UChar(*psz1)) != u_toupper(UChar(*psz2))) return int(*psz1) - int(*psz2); // If either has ended, then they both ended, so equal if (!*psz1 || !*psz2) break; // Move upwards for the next round psz1++; psz2++; } return 0;}int ICUTransService::compareNIString(const XMLCh* const comp1 , const XMLCh* const comp2 , const unsigned int maxChars){ const XMLCh* psz1 = comp1; const XMLCh* psz2 = comp2; unsigned int curCount = 0; while (true) { // // If an inequality, then return the difference. Note that the XMLCh // might be bigger physically than UChar, but it won't hold anything // larger than 0xFFFF, so our cast here will work for both possible // sizes of XMLCh. // if (u_toupper(UChar(*psz1)) != u_toupper(UChar(*psz2))) return int(*psz1) - int(*psz2); // If either ended, then both ended, so equal if (!*psz1 || !*psz2) break; // Move upwards to next chars psz1++; psz2++; // // Bump the count of chars done. If it equals the count then we // are equal for the requested count, so break out and return // equal. // curCount++; if (maxChars == curCount) break; } return 0;}const XMLCh* ICUTransService::getId() const{ return gMyServiceId;}bool ICUTransService::isSpace(const XMLCh toCheck) const{ // // <TBD> // For now, we short circuit some of the control chars because ICU // is not correctly reporting them as space. Later, when they change // this, we can get rid of this special case. // if ((toCheck == 0x09) || (toCheck == 0x0A) || (toCheck == 0x0D)) { return true; } return (u_isspace(UChar(toCheck)) != 0);}XMLLCPTranscoder* ICUTransService::makeNewLCPTranscoder(){ // // Try to create a default converter. If it fails, return a null // pointer which will basically cause the system to give up because // we really can't do anything without one. // UErrorCode uerr = U_ZERO_ERROR; UConverter* converter = ucnv_open(NULL, &uerr); if (!converter) return 0; // That went ok, so create an ICU LCP transcoder wrapper and return it return new ICULCPTranscoder(converter);}bool ICUTransService::supportsSrcOfs() const{ // This implementation supports source offset information return true;}void ICUTransService::upperCase(XMLCh* const toUpperCase) const{ XMLCh* outPtr = toUpperCase; while (*outPtr) { *outPtr = XMLCh(u_toupper(UChar(*outPtr))); outPtr++; }}void ICUTransService::lowerCase(XMLCh* const toLowerCase) const{ XMLCh* outPtr = toLowerCase; while (*outPtr) { *outPtr = XMLCh(u_tolower(UChar(*outPtr))); outPtr++; }}// ---------------------------------------------------------------------------// ICUTransService: The protected virtual transcoding service API// ---------------------------------------------------------------------------XMLTranscoder* ICUTransService::makeNewXMLTranscoder(const XMLCh* const encodingName , XMLTransService::Codes& resValue , const unsigned int blockSize , MemoryManager* const manager){ // // For encodings that end with "s390" we need to strip off the "s390" // from the encoding name and add ",swaplfnl" to the encoding name // that we pass into ICU on the ucnv_openU. // XMLCh* encodingNameToUse = (XMLCh*) encodingName; XMLCh* workBuffer = 0; if ( (XMLString::endsWith(encodingNameToUse, gs390Id)) || (XMLString::endsWith(encodingNameToUse, gS390Id)) ) { int workBufferSize = (XMLString::stringLen(encodingNameToUse) + XMLString::stringLen(gswaplfnlId) - XMLString::stringLen(gS390Id) + 1); workBuffer = (XMLCh*) manager->allocate(workBufferSize * sizeof(XMLCh)); int moveSize = XMLString::stringLen(encodingNameToUse) - XMLString::stringLen(gS390Id); XMLString::moveChars(workBuffer, encodingNameToUse, moveSize); XMLString::moveChars((workBuffer + moveSize), gswaplfnlId, XMLString::stringLen(gswaplfnlId)); encodingNameToUse = workBuffer; } // // If UChar and XMLCh are not the same size, then we have premassage the // encoding name into a UChar type string. // const UChar* actualName; UChar* tmpName = 0; if (sizeof(UChar) == sizeof(XMLCh)) { actualName = (const UChar*)encodingNameToUse; } else { tmpName = convertToUChar(encodingNameToUse, 0, manager); actualName = tmpName; } ArrayJanitor<UChar> janTmp(tmpName, manager); ArrayJanitor<XMLCh> janTmp1(workBuffer, manager); UErrorCode uerr = U_ZERO_ERROR; UConverter* converter = ucnv_openU(actualName, &uerr); if (!converter) { resValue = XMLTransService::UnsupportedEncoding; return 0; } return new (manager) ICUTranscoder(encodingName, converter, blockSize, manager);}// ---------------------------------------------------------------------------// ICUTranscoder: Constructors and Destructor// ---------------------------------------------------------------------------ICUTranscoder::ICUTranscoder(const XMLCh* const encodingName , UConverter* const toAdopt , const unsigned int blockSize , MemoryManager* const manager) : XMLTranscoder(encodingName, blockSize, manager) , fConverter(toAdopt) , fFixed(false) , fSrcOffsets(0){ // If there is a block size, then allocate our source offset array if (blockSize) fSrcOffsets = (XMLUInt32*) manager->allocate ( blockSize * sizeof(XMLUInt32) );//new XMLUInt32[blockSize]; // Remember if its a fixed size encoding fFixed = (ucnv_getMaxCharSize(fConverter) == ucnv_getMinCharSize(fConverter));}ICUTranscoder::~ICUTranscoder(){ getMemoryManager()->deallocate(fSrcOffsets);//delete [] fSrcOffsets; // If there is a converter, ask ICU to clean it up if (fConverter) { // <TBD> Does this actually delete the structure??? ucnv_close(fConverter); fConverter = 0; }}// ---------------------------------------------------------------------------// ICUTranscoder: The virtual transcoder API// ---------------------------------------------------------------------------unsigned intICUTranscoder::transcodeFrom(const XMLByte* const srcData , const unsigned int srcCount , XMLCh* const toFill , const unsigned int maxChars , unsigned int& bytesEaten , unsigned char* const charSizes){ // If debugging, insure the block size is legal #if defined(XERCES_DEBUG) checkBlockSize(maxChars); #endif // Set up pointers to the start and end of the source buffer const XMLByte* startSrc = srcData; const XMLByte* endSrc = srcData + srcCount; // // And now do the target buffer. This works differently according to // whether XMLCh and UChar are the same size or not. // UChar* startTarget; if (sizeof(XMLCh) == sizeof(UChar)) startTarget = (UChar*)toFill; else startTarget = (UChar*) getMemoryManager()->allocate ( maxChars * sizeof(UChar) );//new UChar[maxChars]; UChar* orgTarget = startTarget; // // Transoode the buffer. Buffer overflow errors are normal, occuring // when the raw input buffer holds more characters than will fit in // the Unicode output buffer. // UErrorCode err = U_ZERO_ERROR; ucnv_toUnicode ( fConverter , &startTarget , startTarget + maxChars , (const char**)&startSrc , (const char*)endSrc , (fFixed ? 0 : (int32_t*)fSrcOffsets)
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -