?? encoding.c.svn-base
字號(hào):
/** * xmlCleanupCharEncodingHandlers: * * Cleanup the memory allocated for the char encoding support, it * unregisters all the encoding handlers and the aliases. */voidxmlCleanupCharEncodingHandlers(void) { xmlCleanupEncodingAliases(); if (handlers == NULL) return; for (;nbCharEncodingHandler > 0;) { nbCharEncodingHandler--; if (handlers[nbCharEncodingHandler] != NULL) { if (handlers[nbCharEncodingHandler]->name != NULL) xmlFree(handlers[nbCharEncodingHandler]->name); xmlFree(handlers[nbCharEncodingHandler]); } } xmlFree(handlers); handlers = NULL; nbCharEncodingHandler = 0; xmlDefaultCharEncodingHandler = NULL;}/** * xmlRegisterCharEncodingHandler: * @handler: the xmlCharEncodingHandlerPtr handler block * * Register the char encoding handler, surprising, isn't it ? */voidxmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { if (handlers == NULL) xmlInitCharEncodingHandlers(); if (handler == NULL) { xmlGenericError(xmlGenericErrorContext, "xmlRegisterCharEncodingHandler: NULL handler !\n"); return; } if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) { xmlGenericError(xmlGenericErrorContext, "xmlRegisterCharEncodingHandler: Too many handler registered\n"); xmlGenericError(xmlGenericErrorContext, "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__); return; } handlers[nbCharEncodingHandler++] = handler;}/** * xmlGetCharEncodingHandler: * @enc: an xmlCharEncoding value. * * Search in the registered set the handler able to read/write that encoding. * * Returns the handler or NULL if not found */xmlCharEncodingHandlerPtrxmlGetCharEncodingHandler(xmlCharEncoding enc) { xmlCharEncodingHandlerPtr handler; if (handlers == NULL) xmlInitCharEncodingHandlers(); switch (enc) { case XML_CHAR_ENCODING_ERROR: return(NULL); case XML_CHAR_ENCODING_NONE: return(NULL); case XML_CHAR_ENCODING_UTF8: return(NULL); case XML_CHAR_ENCODING_UTF16LE: return(xmlUTF16LEHandler); case XML_CHAR_ENCODING_UTF16BE: return(xmlUTF16BEHandler); case XML_CHAR_ENCODING_EBCDIC: handler = xmlFindCharEncodingHandler("EBCDIC"); if (handler != NULL) return(handler); handler = xmlFindCharEncodingHandler("ebcdic"); if (handler != NULL) return(handler); break; case XML_CHAR_ENCODING_UCS4BE: handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); if (handler != NULL) return(handler); handler = xmlFindCharEncodingHandler("UCS-4"); if (handler != NULL) return(handler); handler = xmlFindCharEncodingHandler("UCS4"); if (handler != NULL) return(handler); break; case XML_CHAR_ENCODING_UCS4LE: handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); if (handler != NULL) return(handler); handler = xmlFindCharEncodingHandler("UCS-4"); if (handler != NULL) return(handler); handler = xmlFindCharEncodingHandler("UCS4"); if (handler != NULL) return(handler); break; case XML_CHAR_ENCODING_UCS4_2143: break; case XML_CHAR_ENCODING_UCS4_3412: break; case XML_CHAR_ENCODING_UCS2: handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2"); if (handler != NULL) return(handler); handler = xmlFindCharEncodingHandler("UCS-2"); if (handler != NULL) return(handler); handler = xmlFindCharEncodingHandler("UCS2"); if (handler != NULL) return(handler); break; /* * We used to keep ISO Latin encodings native in the * generated data. This led to so many problems that * this has been removed. One can still change this * back by registering no-ops encoders for those */ case XML_CHAR_ENCODING_8859_1: handler = xmlFindCharEncodingHandler("ISO-8859-1"); if (handler != NULL) return(handler); break; case XML_CHAR_ENCODING_8859_2: handler = xmlFindCharEncodingHandler("ISO-8859-2"); if (handler != NULL) return(handler); break; case XML_CHAR_ENCODING_8859_3: handler = xmlFindCharEncodingHandler("ISO-8859-3"); if (handler != NULL) return(handler); break; case XML_CHAR_ENCODING_8859_4: handler = xmlFindCharEncodingHandler("ISO-8859-4"); if (handler != NULL) return(handler); break; case XML_CHAR_ENCODING_8859_5: handler = xmlFindCharEncodingHandler("ISO-8859-5"); if (handler != NULL) return(handler); break; case XML_CHAR_ENCODING_8859_6: handler = xmlFindCharEncodingHandler("ISO-8859-6"); if (handler != NULL) return(handler); break; case XML_CHAR_ENCODING_8859_7: handler = xmlFindCharEncodingHandler("ISO-8859-7"); if (handler != NULL) return(handler); break; case XML_CHAR_ENCODING_8859_8: handler = xmlFindCharEncodingHandler("ISO-8859-8"); if (handler != NULL) return(handler); break; case XML_CHAR_ENCODING_8859_9: handler = xmlFindCharEncodingHandler("ISO-8859-9"); if (handler != NULL) return(handler); break; case XML_CHAR_ENCODING_2022_JP: handler = xmlFindCharEncodingHandler("ISO-2022-JP"); if (handler != NULL) return(handler); break; case XML_CHAR_ENCODING_SHIFT_JIS: handler = xmlFindCharEncodingHandler("SHIFT-JIS"); if (handler != NULL) return(handler); handler = xmlFindCharEncodingHandler("SHIFT_JIS"); if (handler != NULL) return(handler); handler = xmlFindCharEncodingHandler("Shift_JIS"); if (handler != NULL) return(handler); break; case XML_CHAR_ENCODING_EUC_JP: handler = xmlFindCharEncodingHandler("EUC-JP"); if (handler != NULL) return(handler); break; default: break; } #ifdef DEBUG_ENCODING xmlGenericError(xmlGenericErrorContext, "No handler found for encoding %d\n", enc);#endif return(NULL);}/** * xmlFindCharEncodingHandler: * @name: a string describing the char encoding. * * Search in the registered set the handler able to read/write that encoding. * * Returns the handler or NULL if not found */xmlCharEncodingHandlerPtrxmlFindCharEncodingHandler(const char *name) { const char *nalias; const char *norig; xmlCharEncoding alias;#ifdef LIBXML_ICONV_ENABLED xmlCharEncodingHandlerPtr enc; iconv_t icv_in, icv_out;#endif /* LIBXML_ICONV_ENABLED */ char upper[100]; int i; if (handlers == NULL) xmlInitCharEncodingHandlers(); if (name == NULL) return(xmlDefaultCharEncodingHandler); if (name[0] == 0) return(xmlDefaultCharEncodingHandler); /* * Do the alias resolution */ norig = name; nalias = xmlGetEncodingAlias(name); if (nalias != NULL) name = nalias; /* * Check first for directly registered encoding names */ for (i = 0;i < 99;i++) { upper[i] = toupper(name[i]); if (upper[i] == 0) break; } upper[i] = 0; for (i = 0;i < nbCharEncodingHandler; i++) if (!strcmp(upper, handlers[i]->name)) {#ifdef DEBUG_ENCODING xmlGenericError(xmlGenericErrorContext, "Found registered handler for encoding %s\n", name);#endif return(handlers[i]); }#ifdef LIBXML_ICONV_ENABLED /* check whether iconv can handle this */ icv_in = iconv_open("UTF-8", name); icv_out = iconv_open(name, "UTF-8"); if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) { enc = (xmlCharEncodingHandlerPtr) xmlMalloc(sizeof(xmlCharEncodingHandler)); if (enc == NULL) { iconv_close(icv_in); iconv_close(icv_out); return(NULL); } enc->name = xmlMemStrdup(name); enc->input = NULL; enc->output = NULL; enc->iconv_in = icv_in; enc->iconv_out = icv_out;#ifdef DEBUG_ENCODING xmlGenericError(xmlGenericErrorContext, "Found iconv handler for encoding %s\n", name);#endif return enc; } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) { xmlGenericError(xmlGenericErrorContext, "iconv : problems with filters for '%s'\n", name); }#endif /* LIBXML_ICONV_ENABLED */#ifdef DEBUG_ENCODING xmlGenericError(xmlGenericErrorContext, "No handler found for encoding %s\n", name);#endif /* * Fallback using the canonical names */ alias = xmlParseCharEncoding(norig); if (alias != XML_CHAR_ENCODING_ERROR) { const char* canon; canon = xmlGetCharEncodingName(alias); if ((canon != NULL) && (strcmp(name, canon))) { return(xmlFindCharEncodingHandler(canon)); } } /* If "none of the above", give up */ return(NULL);}/************************************************************************ * * * ICONV based generic conversion functions * * * ************************************************************************/#ifdef LIBXML_ICONV_ENABLED/** * xmlIconvWrapper: * @cd: iconv converter data structure * @out: a pointer to an array of bytes to store the result * @outlen: the length of @out * @in: a pointer to an array of ISO Latin 1 chars * @inlen: the length of @in * * Returns 0 if success, or * -1 by lack of space, or * -2 if the transcoding fails (for *in is not valid utf8 string or * the result of transformation can't fit into the encoding we want), or * -3 if there the last byte can't form a single output char. * * The value of @inlen after return is the number of octets consumed * as the return value is positive, else unpredictable. * The value of @outlen after return is the number of ocetes consumed. */static intxmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, const unsigned char *in, int *inlen) { size_t icv_inlen = *inlen, icv_outlen = *outlen; const char *icv_in = (const char *) in; char *icv_out = (char *) out; int ret; ret = iconv(cd, (char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen); if (in != NULL) { *inlen -= icv_inlen; *outlen -= icv_outlen; } else { *inlen = 0; *outlen = 0; } if ((icv_inlen != 0) || (ret == -1)) {#ifdef EILSEQ if (errno == EILSEQ) { return -2; } else#endif#ifdef E2BIG if (errno == E2BIG) { return -1; } else#endif#ifdef EINVAL if (errno == EINVAL) { return -3; } else#endif { return -3; } } return 0;}#endif /* LIBXML_ICONV_ENABLED *//************************************************************************ * * * The real API used by libxml for on-the-fly conversion * * * ************************************************************************//** * xmlCharEncFirstLine: * @handler: char enconding transformation data structure * @out: an xmlBuffer for the output. * @in: an xmlBuffer for the input * * Front-end for the encoding handler input function, but handle only * the very first line, i.e. limit itself to 45 chars. * * Returns the number of byte written if success, or * -1 general error * -2 if the transcoding fails (for *in is not valid utf8 string or * the result of transformation can't fit into the encoding we want), or */intxmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in) { int ret = -2; int written; int toconv; if (handler == NULL) return(-1); if (out == NULL) return(-1); if (in == NULL) return(-1); written = out->size - out->use; toconv = in->use; if (toconv * 2 >= written) { xmlBufferGrow(out, toconv); written = out->size - out->use - 1; } /* * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 * 45 chars should be sufficient to reach the end of the encoding * declaration without going too far inside the document content. */ written = 45; if (handler->input != NULL) { ret = handler->input(&out->content[out->use], &written, in->content, &toconv); xmlBufferShrink(in, toconv); out->use += written; out->content[out->use] = 0; }#ifdef LIBXML_ICONV_ENABLED else if (handler->iconv_in != NULL) { ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], &written, in->content, &toconv); xmlBufferShrink(in, toconv); out->use += written; out->content[out->use] = 0; if (ret == -1) ret = -3; }#endif /* LIBXML_ICONV_ENABLED */#ifdef DEBUG_ENCODING switch (ret) { case 0: xmlGenericError(xmlGenericErrorContext, "converted %d bytes to %d bytes of input\n", toconv, written); break; case -1: xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", toconv, written, in->use); break; case -2: xmlGenericError(xmlGenericErrorContext, "input conversion failed due to input error\n"); break; case -3: xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", toconv, written, in->use); break; default: xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret); }#endif /* DEBUG_ENCODING */ /* * Ignore when input buffer is not on a boundary */ if (ret == -3) ret = 0; if (ret == -1) ret = 0; return(ret);}/** * xmlCharEncInFunc: * @handler: char encoding transformation data structure * @out: an xmlBuffer for the output. * @in: an xmlBuffer for the input * * Generic front-end for the encoding handler input function *
?? 快捷鍵說(shuō)明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -