?? encoding.c.svn-base
字號:
*outlen = out - outstart; *inlenb = processed - inb; return(-2); } } /* assertion: c is a single UTF-4 value */ if (out >= outend) break; if (c < 0x80) { *out++= c; bits= -6; } else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } for ( ; bits >= 0; bits-= 6) { if (out >= outend) break; *out++= ((c >> bits) & 0x3F) | 0x80; } processed = (const unsigned char*) in; } *outlen = out - outstart; *inlenb = processed - inb; return(0);}#ifdef LIBXML_OUTPUT_ENABLED/** * UTF8ToUTF16LE: * @outb: a pointer to an array of bytes to store the result * @outlen: the length of @outb * @in: a pointer to an array of UTF-8 chars * @inlen: the length of @in * * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE * block of chars out. * * Returns the number of bytes written, or -1 if lack of space, or -2 * if the transcoding failed. */static intUTF8ToUTF16LE(unsigned char* outb, int *outlen, const unsigned char* in, int *inlen){ unsigned short* out = (unsigned short*) outb; const unsigned char* processed = in; const unsigned char *const instart = in; unsigned short* outstart= out; unsigned short* outend; const unsigned char* inend= in+*inlen; unsigned int c, d; int trailing; unsigned char *tmp; unsigned short tmp1, tmp2; /* UTF16LE encoding has no BOM */ if (in == NULL) { *outlen = 0; *inlen = 0; return(0); } outend = out + (*outlen / 2); while (in < inend) { d= *in++; if (d < 0x80) { c= d; trailing= 0; } else if (d < 0xC0) { /* trailing byte in leading position */ *outlen = (out - outstart) * 2; *inlen = processed - instart; return(-2); } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } else if (d < 0xF8) { c= d & 0x07; trailing= 3; } else { /* no chance for this in UTF-16 */ *outlen = (out - outstart) * 2; *inlen = processed - instart; return(-2); } if (inend - in < trailing) { break; } for ( ; trailing; trailing--) { if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break; c <<= 6; c |= d & 0x3F; } /* assertion: c is a single UTF-4 value */ if (c < 0x10000) { if (out >= outend) break; if (xmlLittleEndian) { *out++ = c; } else { tmp = (unsigned char *) out; *tmp = c ; *(tmp + 1) = c >> 8 ; out++; } } else if (c < 0x110000) { if (out+1 >= outend) break; c -= 0x10000; if (xmlLittleEndian) { *out++ = 0xD800 | (c >> 10); *out++ = 0xDC00 | (c & 0x03FF); } else { tmp1 = 0xD800 | (c >> 10); tmp = (unsigned char *) out; *tmp = (unsigned char) tmp1; *(tmp + 1) = tmp1 >> 8; out++; tmp2 = 0xDC00 | (c & 0x03FF); tmp = (unsigned char *) out; *tmp = (unsigned char) tmp2; *(tmp + 1) = tmp2 >> 8; out++; } } else break; processed = in; } *outlen = (out - outstart) * 2; *inlen = processed - instart; return(0);}/** * UTF8ToUTF16: * @outb: a pointer to an array of bytes to store the result * @outlen: the length of @outb * @in: a pointer to an array of UTF-8 chars * @inlen: the length of @in * * Take a block of UTF-8 chars in and try to convert it to an UTF-16 * block of chars out. * * Returns the number of bytes written, or -1 if lack of space, or -2 * if the transcoding failed. */static intUTF8ToUTF16(unsigned char* outb, int *outlen, const unsigned char* in, int *inlen){ if (in == NULL) { /* * initialization, add the Byte Order Mark for UTF-16LE */ if (*outlen >= 2) { outb[0] = 0xFF; outb[1] = 0xFE; *outlen = 2; *inlen = 0;#ifdef DEBUG_ENCODING xmlGenericError(xmlGenericErrorContext, "Added FFFE Byte Order Mark\n");#endif return(2); } *outlen = 0; *inlen = 0; return(0); } return (UTF8ToUTF16LE(outb, outlen, in, inlen));}#endif /* LIBXML_OUTPUT_ENABLED *//** * UTF16BEToUTF8: * @out: a pointer to an array of bytes to store the result * @outlen: the length of @out * @inb: a pointer to an array of UTF-16 passed as a byte array * @inlenb: the length of @in in UTF-16 chars * * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8 * block of chars out. This function assumes the endian property * is the same between the native type of this machine and the * inputed one. * * Returns the number of bytes written, or -1 if lack of space, or -2 * if the transcoding fails (if *in is not a valid utf16 string) * The value of *inlen after return is the number of octets consumed * if the return value is positive, else unpredictable. */static intUTF16BEToUTF8(unsigned char* out, int *outlen, const unsigned char* inb, int *inlenb){ unsigned char* outstart = out; const unsigned char* processed = inb; unsigned char* outend = out + *outlen; unsigned short* in = (unsigned short*) inb; unsigned short* inend; unsigned int c, d, inlen; unsigned char *tmp; int bits; if ((*inlenb % 2) == 1) (*inlenb)--; inlen = *inlenb / 2; inend= in + inlen; while (in < inend) { if (xmlLittleEndian) { tmp = (unsigned char *) in; c = *tmp++; c = c << 8; c = c | (unsigned int) *tmp; in++; } else { c= *in++; } if ((c & 0xFC00) == 0xD800) { /* surrogates */ if (in >= inend) { /* (in > inend) shouldn't happens */ *outlen = out - outstart; *inlenb = processed - inb; return(-2); } if (xmlLittleEndian) { tmp = (unsigned char *) in; d = *tmp++; d = d << 8; d = d | (unsigned int) *tmp; in++; } else { d= *in++; } if ((d & 0xFC00) == 0xDC00) { c &= 0x03FF; c <<= 10; c |= d & 0x03FF; c += 0x10000; } else { *outlen = out - outstart; *inlenb = processed - inb; return(-2); } } /* assertion: c is a single UTF-4 value */ if (out >= outend) break; if (c < 0x80) { *out++= c; bits= -6; } else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } for ( ; bits >= 0; bits-= 6) { if (out >= outend) break; *out++= ((c >> bits) & 0x3F) | 0x80; } processed = (const unsigned char*) in; } *outlen = out - outstart; *inlenb = processed - inb; return(0);}#ifdef LIBXML_OUTPUT_ENABLED/** * UTF8ToUTF16BE: * @outb: a pointer to an array of bytes to store the result * @outlen: the length of @outb * @in: a pointer to an array of UTF-8 chars * @inlen: the length of @in * * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE * block of chars out. * * Returns the number of byte written, or -1 by lack of space, or -2 * if the transcoding failed. */static intUTF8ToUTF16BE(unsigned char* outb, int *outlen, const unsigned char* in, int *inlen){ unsigned short* out = (unsigned short*) outb; const unsigned char* processed = in; const unsigned char *const instart = in; unsigned short* outstart= out; unsigned short* outend; const unsigned char* inend= in+*inlen; unsigned int c, d; int trailing; unsigned char *tmp; unsigned short tmp1, tmp2; /* UTF-16BE has no BOM */ if (in == NULL) { *outlen = 0; *inlen = 0; return(0); } outend = out + (*outlen / 2); while (in < inend) { d= *in++; if (d < 0x80) { c= d; trailing= 0; } else if (d < 0xC0) { /* trailing byte in leading position */ *outlen = out - outstart; *inlen = processed - instart; return(-2); } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } else if (d < 0xF8) { c= d & 0x07; trailing= 3; } else { /* no chance for this in UTF-16 */ *outlen = out - outstart; *inlen = processed - instart; return(-2); } if (inend - in < trailing) { break; } for ( ; trailing; trailing--) { if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break; c <<= 6; c |= d & 0x3F; } /* assertion: c is a single UTF-4 value */ if (c < 0x10000) { if (out >= outend) break; if (xmlLittleEndian) { tmp = (unsigned char *) out; *tmp = c >> 8; *(tmp + 1) = c; out++; } else { *out++ = c; } } else if (c < 0x110000) { if (out+1 >= outend) break; c -= 0x10000; if (xmlLittleEndian) { tmp1 = 0xD800 | (c >> 10); tmp = (unsigned char *) out; *tmp = tmp1 >> 8; *(tmp + 1) = (unsigned char) tmp1; out++; tmp2 = 0xDC00 | (c & 0x03FF); tmp = (unsigned char *) out; *tmp = tmp2 >> 8; *(tmp + 1) = (unsigned char) tmp2; out++; } else { *out++ = 0xD800 | (c >> 10); *out++ = 0xDC00 | (c & 0x03FF); } } else break; processed = in; } *outlen = (out - outstart) * 2; *inlen = processed - instart; return(0);}#endif /* LIBXML_OUTPUT_ENABLED *//************************************************************************ * * * Generic encoding handling routines * * * ************************************************************************//** * xmlDetectCharEncoding: * @in: a pointer to the first bytes of the XML entity, must be at least * 2 bytes long (at least 4 if encoding is UTF4 variant). * @len: pointer to the length of the buffer * * Guess the encoding of the entity using the first bytes of the entity content * according to the non-normative appendix F of the XML-1.0 recommendation. * * Returns one of the XML_CHAR_ENCODING_... values. */xmlCharEncodingxmlDetectCharEncoding(const unsigned char* in, int len){ if (len >= 4) { if ((in[0] == 0x00) && (in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) return(XML_CHAR_ENCODING_UCS4BE); if ((in[0] == 0x3C) && (in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x00)) return(XML_CHAR_ENCODING_UCS4LE); if ((in[0] == 0x00) && (in[1] == 0x00) && (in[2] == 0x3C) && (in[3] == 0x00)) return(XML_CHAR_ENCODING_UCS4_2143); if ((in[0] == 0x00) && (in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x00)) return(XML_CHAR_ENCODING_UCS4_3412); if ((in[0] == 0x4C) && (in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) return(XML_CHAR_ENCODING_EBCDIC); if ((in[0] == 0x3C) && (in[1] == 0x3F) && (in[2] == 0x78) && (in[3] == 0x6D)) return(XML_CHAR_ENCODING_UTF8); /* * Although not part of the recommendation, we also * attempt an "auto-recognition" of UTF-16LE and * UTF-16BE encodings. */ if ((in[0] == 0x3C) && (in[1] == 0x00) && (in[2] == 0x3F) && (in[3] == 0x00)) return(XML_CHAR_ENCODING_UTF16LE); if ((in[0] == 0x00) && (in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) return(XML_CHAR_ENCODING_UTF16BE); } if (len >= 3) { /* * Errata on XML-1.0 June 20 2001 * We now allow an UTF8 encoded BOM */ if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) return(XML_CHAR_ENCODING_UTF8); } /* For UTF-16 we can recognize by the BOM */ if (len >= 2) { if ((in[0] == 0xFE) && (in[1] == 0xFF)) return(XML_CHAR_ENCODING_UTF16BE); if ((in[0] == 0xFF) && (in[1] == 0xFE)) return(XML_CHAR_ENCODING_UTF16LE); } return(XML_CHAR_ENCODING_NONE);}/** * xmlCleanupEncodingAliases: * * Unregisters all aliases */voidxmlCleanupEncodingAliases(void) {
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -