?? avc_mbyte.c
字號:
/* $Id: avc_mbyte.c,v 1.3 2005/06/03 03:49:59 daniel Exp $ * * Name: avc_mbyte.c * Project: Arc/Info vector coverage (AVC) E00->BIN conversion library * Language: ANSI C * Purpose: Functions to handle multibyte character conversions. * Author: Daniel Morissette, dmorissette@dmsolutions.ca * ********************************************************************** * Copyright (c) 1999-2005, Daniel Morissette * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. ********************************************************************** * * $Log: avc_mbyte.c,v $ * Revision 1.3 2005/06/03 03:49:59 daniel * Update email address, website url, and copyright dates * * Revision 1.2 2000/09/22 19:45:21 daniel * Switch to MIT-style license * * Revision 1.1 2000/05/29 15:31:03 daniel * Initial revision - Japanese support * **********************************************************************/#include "avc.h"#ifdef _WIN32# include <mbctype.h>#endifstatic int _AVCDetectJapaneseEncoding(const unsigned char *pszLine);static const char *_AVCJapanese2ArcDBCS(AVCDBCSInfo *psDBCSInfo, const unsigned char *pszLine, int nMaxOutputLen);static const char *_AVCArcDBCS2JapaneseShiftJIS(AVCDBCSInfo *psDBCSInfo, const unsigned char *pszLine, int nMaxOutputLen);/*===================================================================== * Functions to handle multibyte char conversions *====================================================================*/#define IS_ASCII(c) ((c) < 0x80)/********************************************************************** * AVCAllocDBCSInfo() * * Alloc and init a new AVCDBCSInfo structure. **********************************************************************/AVCDBCSInfo *AVCAllocDBCSInfo(){ AVCDBCSInfo *psInfo; psInfo = (AVCDBCSInfo*)CPLCalloc(1, sizeof(AVCDBCSInfo)); psInfo->nDBCSCodePage = AVCGetDBCSCodePage(); psInfo->nDBCSEncoding = AVC_CODE_UNKNOWN; psInfo->pszDBCSBuf = NULL; psInfo->nDBCSBufSize = 0; return psInfo;}/********************************************************************** * AVCFreeDBCSInfo() * * Release all memory associated with a AVCDBCSInfo structure. **********************************************************************/void AVCFreeDBCSInfo(AVCDBCSInfo *psInfo){ if (psInfo) { CPLFree(psInfo->pszDBCSBuf); CPLFree(psInfo); }}/********************************************************************** * AVCGetDBCSCodePage() * * Fetch current multibyte codepage on the system. * Returns a valid codepage number, or 0 if the codepage is single byte or * unsupported. **********************************************************************/int AVCGetDBCSCodePage(){#ifdef _WIN32 int nCP; nCP = _getmbcp(); /* Check if that's a supported codepage */ if (nCP == AVC_DBCS_JAPANESE) return nCP;#endif return 0;}/********************************************************************** * AVCE00DetectEncoding() * * Try to detect the encoding used in the current file by examining lines * of input. * * Returns TRUE once the encoding is established, or FALSE if more lines * of input are required to establish the encoding. **********************************************************************/GBool AVCE00DetectEncoding(AVCDBCSInfo *psDBCSInfo, const char *pszLine){ if (psDBCSInfo == NULL || psDBCSInfo->nDBCSCodePage == 0 || psDBCSInfo->nDBCSEncoding != AVC_CODE_UNKNOWN) { /* Either single byte codepage, or encoding has already been detected */ return TRUE; } switch (psDBCSInfo->nDBCSCodePage) { case AVC_DBCS_JAPANESE: psDBCSInfo->nDBCSEncoding = _AVCDetectJapaneseEncoding((const unsigned char *)pszLine); break; default: psDBCSInfo->nDBCSEncoding = AVC_CODE_UNKNOWN; return TRUE; /* Codepage not supported... no need to scan more lines*/ } if (psDBCSInfo->nDBCSEncoding != AVC_CODE_UNKNOWN) return TRUE; /* We detected the encoding! */ return FALSE;}/********************************************************************** * AVCE00Convert2ArcDBCS() * * If encoding is still unknown, try to detect the encoding used in the * current file, and then convert the string to an encoding validfor output * to a coverage. * * Returns a reference to a const buffer that should not be freed by the * caller. It can be either the original string buffer or a ref. to an * internal buffer. **********************************************************************/const char *AVCE00Convert2ArcDBCS(AVCDBCSInfo *psDBCSInfo, const char *pszLine, int nMaxOutputLen){ const char *pszOutBuf = NULL; unsigned char *pszTmp; GBool bAllAscii; if (psDBCSInfo == NULL || psDBCSInfo->nDBCSCodePage == 0 || pszLine == NULL) { /* Single byte codepage... nothing to do */ return pszLine; } /* If string is all ASCII then there is nothing to do... */ pszTmp = (unsigned char *)pszLine; for(bAllAscii = TRUE ; bAllAscii && pszTmp && *pszTmp; pszTmp++) { if ( !IS_ASCII(*pszTmp) ) bAllAscii = FALSE; } if (bAllAscii) return pszLine; /* Make sure output buffer is large enough. * We add 2 chars to buffer size to simplify processing... no need to * check if second byte of a pair would overflow buffer. */ if (psDBCSInfo->pszDBCSBuf == NULL || psDBCSInfo->nDBCSBufSize < nMaxOutputLen+2) { psDBCSInfo->nDBCSBufSize = nMaxOutputLen+2; psDBCSInfo->pszDBCSBuf = (unsigned char *)CPLRealloc(psDBCSInfo->pszDBCSBuf, psDBCSInfo->nDBCSBufSize* sizeof(unsigned char)); } /* Do the conversion according to current code page */ switch (psDBCSInfo->nDBCSCodePage) { case AVC_DBCS_JAPANESE: pszOutBuf = (char*)_AVCJapanese2ArcDBCS(psDBCSInfo, (const unsigned char *)pszLine, nMaxOutputLen); break; default: /* We should never get here anyways, but just in case return pszLine */ pszOutBuf = pszLine; } return pszOutBuf;}/********************************************************************** * AVCE00ConvertFromArcDBCS() * * Convert DBCS encoding in binary coverage files to E00 encoding. * * Returns a reference to a const buffer that should not be freed by the * caller. It can be either the original string buffer or a ref. to an * internal buffer. **********************************************************************/const char *AVCE00ConvertFromArcDBCS(AVCDBCSInfo *psDBCSInfo, const char *pszLine, int nMaxOutputLen){ const char *pszOutBuf = NULL; unsigned char *pszTmp; GBool bAllAscii; if (psDBCSInfo == NULL || psDBCSInfo->nDBCSCodePage == 0 || pszLine == NULL) { /* Single byte codepage... nothing to do */ return pszLine; } /* If string is all ASCII then there is nothing to do... */ pszTmp = (unsigned char *)pszLine; for(bAllAscii = TRUE ; bAllAscii && pszTmp && *pszTmp; pszTmp++) { if ( !IS_ASCII(*pszTmp) ) bAllAscii = FALSE; } if (bAllAscii) return pszLine; /* Make sure output buffer is large enough. * We add 2 chars to buffer size to simplify processing... no need to * check if second byte of a pair would overflow buffer. */ if (psDBCSInfo->pszDBCSBuf == NULL || psDBCSInfo->nDBCSBufSize < nMaxOutputLen+2) {
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -