?? avc_mbyte.c
字號:
psDBCSInfo->nDBCSBufSize = nMaxOutputLen+2; psDBCSInfo->pszDBCSBuf = (unsigned char *)CPLRealloc(psDBCSInfo->pszDBCSBuf, psDBCSInfo->nDBCSBufSize* sizeof(unsigned char)); } /* Do the conversion according to current code page */ switch (psDBCSInfo->nDBCSCodePage) { case AVC_DBCS_JAPANESE: pszOutBuf = (char*)_AVCArcDBCS2JapaneseShiftJIS(psDBCSInfo, (const unsigned char *)pszLine, nMaxOutputLen); break; default: /* We should never get here anyways, but just in case return pszLine */ pszOutBuf = pszLine; } return pszOutBuf;}/*===================================================================== *===================================================================== * Functions Specific to Japanese encoding (CodePage 932). * * For now we assume that we can receive only Katakana, Shift-JIS, or EUC * encoding as input. Coverages use EUC encoding in most cases, except * for Katakana characters that are prefixed with a 0x8e byte. * * Most of the Japanese conversion functions are based on information and * algorithms found at: * http://www.mars.dti.ne.jp/~torao/program/appendix/japanese-en.html *===================================================================== *====================================================================*//********************************************************************** * _AVCDetectJapaneseEncoding() * * Scan a line of text to try to establish the type of japanese encoding * * Returns the encoding number (AVC_CODE_JAP_*), or AVC_CODE_UNKNOWN if no * specific encoding was detected. **********************************************************************/#define IS_JAP_SHIFTJIS_1(c) ((c) >= 0x81 && (c) <= 0x9f)#define IS_JAP_SHIFTJIS_2(c) (((c) >= 0x40 && (c) <= 0x7e) || \ ((c) >= 0x80 && (c) <= 0xA0) )#define IS_JAP_EUC_1(c) ((c) >= 0xF0 && (c) <= 0xFE)#define IS_JAP_EUC_2(c) ((c) >= 0xFD && (c) <= 0xFE)#define IS_JAP_KANA(c) ((c) >= 0xA1 && (c) <= 0xDF)static int _AVCDetectJapaneseEncoding(const unsigned char *pszLine){ int nEncoding = AVC_CODE_UNKNOWN; for( ; nEncoding == AVC_CODE_UNKNOWN && pszLine && *pszLine; pszLine++) { if (IS_ASCII(*pszLine)) continue; else if (IS_JAP_SHIFTJIS_1(*pszLine)) { nEncoding = AVC_CODE_JAP_SHIFTJIS; break; } else if (IS_JAP_KANA(*pszLine) && *(pszLine+1) && (IS_ASCII(*(pszLine+1)) || (*(pszLine+1)>=0x80 && *(pszLine+1)<=0xA0) ) ) { nEncoding = AVC_CODE_JAP_SHIFTJIS; /* SHIFT-JIS + Kana */ break; } else if (IS_JAP_EUC_1(*pszLine)) { nEncoding = AVC_CODE_JAP_EUC; break; } if (*(++pszLine) == '\0') break; if (IS_JAP_SHIFTJIS_2(*pszLine)) { nEncoding = AVC_CODE_JAP_SHIFTJIS; break; } else if (IS_JAP_EUC_2(*pszLine)) { nEncoding = AVC_CODE_JAP_EUC; break; } } return nEncoding;}/********************************************************************** * _AVCJapanese2ArcDBCS() * * Try to detect type of Japanese encoding if not done yet, and convert * string from Japanese to proper coverage DBCS encoding. **********************************************************************/static const char *_AVCJapanese2ArcDBCS(AVCDBCSInfo *psDBCSInfo, const unsigned char *pszLine, int nMaxOutputLen){ unsigned char *pszOut; int iDst; pszOut = psDBCSInfo->pszDBCSBuf; if (psDBCSInfo->nDBCSEncoding == AVC_CODE_UNKNOWN) { /* Type of encoding (Shift-JIS or EUC) not known yet... try to * detect it now. */ psDBCSInfo->nDBCSEncoding = _AVCDetectJapaneseEncoding(pszLine);/* if (psDBCSInfo->nDBCSEncoding == AVC_CODE_JAP_SHIFTJIS) { printf("Found Japanese Shift-JIS encoding\n"); } else if (psDBCSInfo->nDBCSEncoding == AVC_CODE_JAP_EUC) { printf("Found Japanese EUC encoding\n"); }*/ } for(iDst=0; *pszLine && iDst < nMaxOutputLen; pszLine++) { if (IS_ASCII(*pszLine)) { /* No transformation required for ASCII */ pszOut[iDst++] = *pszLine; } else if ( psDBCSInfo->nDBCSEncoding==AVC_CODE_JAP_EUC && *(pszLine+1) ) { /* This must be a pair of EUC chars and both should be in * the range 0xA1-0xFE */ pszOut[iDst++] = *(pszLine++); pszOut[iDst++] = *pszLine; } else if ( IS_JAP_KANA(*pszLine) ) { /* Katakana char. prefix it with 0x8e */ pszOut[iDst++] = 0x8e; pszOut[iDst++] = *pszLine; } else if ( *(pszLine+1) ) { /* This must be a pair of Shift-JIS chars... convert them to EUC * * If we haven't been able to establish the encoding for sure * yet, then it is possible that a pair of EUC chars could be * treated as shift-JIS here... but there is not much we can do * about that unless we scan the whole E00 input before we * start the conversion. */ unsigned char leader, trailer; leader = *(pszLine++); trailer = *pszLine; if(leader <= 0x9F) leader -= 0x71; else leader -= 0xB1; leader = (leader << 1) + 1; if(trailer > 0x7F) trailer --; if(trailer >= 0x9E) { trailer -= 0x7D; leader ++; } else { trailer -= 0x1F; } pszOut[iDst++] = leader | 0x80; pszOut[iDst++] = trailer | 0x80; } else { /* We should never get here unless a double-byte pair was * truncated... but just in case... */ pszOut[iDst++] = *pszLine; } } pszOut[iDst] = '\0'; return psDBCSInfo->pszDBCSBuf;}/********************************************************************** * _AVCArcDBCS2JapaneseShiftJIS() * * Convert string from coverage DBCS (EUC) to Japanese Shift-JIS. * * We know that binary coverages use a custom EUC encoding for japanese * which is EUC + all Katakana chars are prefixed with 0x8e. So this * function just does a simple conversion. **********************************************************************/static const char *_AVCArcDBCS2JapaneseShiftJIS(AVCDBCSInfo *psDBCSInfo, const unsigned char *pszLine, int nMaxOutputLen){ unsigned char *pszOut; int iDst; pszOut = psDBCSInfo->pszDBCSBuf; for(iDst=0; *pszLine && iDst < nMaxOutputLen; pszLine++) { if (IS_ASCII(*pszLine)) { /* No transformation required for ASCII */ pszOut[iDst++] = *pszLine; } else if (*pszLine == 0x8e && *(pszLine+1)) { pszLine++; /* Flush the 0x8e */ pszOut[iDst++] = *pszLine; } else if (*(pszLine+1)) { /* This is a pair of EUC chars... convert them to Shift-JIS */ unsigned char leader, trailer; leader = *(pszLine++) & 0x7F; trailer = *pszLine & 0x7F; if((leader & 0x01) != 0) trailer += 0x1F; else trailer += 0x7D; if(trailer >= 0x7F) trailer ++; leader = ((leader - 0x21) >> 1) + 0x81; if(leader > 0x9F) leader += 0x40; pszOut[iDst++] = leader; pszOut[iDst++] = trailer; } else { /* We should never get here unless a double-byte pair was * truncated... but just in case... */ pszOut[iDst++] = *pszLine; } } pszOut[iDst] = '\0'; return psDBCSInfo->pszDBCSBuf;}
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -