?? lzx.cpp
字號:
/* $Id: lzx.c,v 1.5 2002/10/09 01:16:33 jedwin Exp $ */
/***************************************************************************
* lzx.c - LZX decompression routines *
* ------------------- *
* *
* maintainer: Jed Wing <jedwin@ugcs.caltech.edu> *
* source: modified lzx.c from cabextract v0.5 *
* notes: This file was taken from cabextract v0.5, which was, *
* itself, a modified version of the lzx decompression code *
* from unlzx. *
* *
* platforms: In its current incarnation, this file has been tested on *
* two different Linux platforms (one, redhat-based, with a *
* 2.1.2 glibc and gcc 2.95.x, and the other, Debian, with *
* 2.2.4 glibc and both gcc 2.95.4 and gcc 3.0.2). Both were *
* Intel x86 compatible machines. *
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. Note that an exemption to this *
* license has been granted by Stuart Caie for the purposes of *
* distribution with chmlib. This does not, to the best of my *
* knowledge, constitute a change in the license of this (the LZX) code *
* in general. *
* *
***************************************************************************/
#include "lzx.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef __GNUC__
#define memcpy __builtin_memcpy
#endif
/* sized types */
typedef unsigned char UBYTE; /* 8 bits exactly */
typedef unsigned short UWORD; /* 16 bits (or more) */
typedef unsigned int ULONG; /* 32 bits (or more) */
typedef signed int LONG; /* 32 bits (or more) */
/* some constants defined by the LZX specification */
#define LZX_MIN_MATCH (2)
#define LZX_MAX_MATCH (257)
#define LZX_NUM_CHARS (256)
#define LZX_BLOCKTYPE_INVALID (0) /* also blocktypes 4-7 invalid */
#define LZX_BLOCKTYPE_VERBATIM (1)
#define LZX_BLOCKTYPE_ALIGNED (2)
#define LZX_BLOCKTYPE_UNCOMPRESSED (3)
#define LZX_PRETREE_NUM_ELEMENTS (20)
#define LZX_ALIGNED_NUM_ELEMENTS (8) /* aligned offset tree #elements */
#define LZX_NUM_PRIMARY_LENGTHS (7) /* this one missing from spec! */
#define LZX_NUM_SECONDARY_LENGTHS (249) /* length tree #elements */
/* LZX huffman defines: tweak tablebits as desired */
#define LZX_PRETREE_MAXSYMBOLS (LZX_PRETREE_NUM_ELEMENTS)
#define LZX_PRETREE_TABLEBITS (6)
#define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 50*8)
#define LZX_MAINTREE_TABLEBITS (12)
#define LZX_LENGTH_MAXSYMBOLS (LZX_NUM_SECONDARY_LENGTHS+1)
#define LZX_LENGTH_TABLEBITS (12)
#define LZX_ALIGNED_MAXSYMBOLS (LZX_ALIGNED_NUM_ELEMENTS)
#define LZX_ALIGNED_TABLEBITS (7)
#define LZX_LENTABLE_SAFETY (64) /* we allow length table decoding overruns */
#define LZX_DECLARE_TABLE(tbl) \
UWORD tbl##_table[(1<<LZX_##tbl##_TABLEBITS) + (LZX_##tbl##_MAXSYMBOLS<<1)];\
UBYTE tbl##_len [LZX_##tbl##_MAXSYMBOLS + LZX_LENTABLE_SAFETY]
struct LZXstate
{
UBYTE *window; /* the actual decoding window */
ULONG window_size; /* window size (32Kb through 2Mb) */
ULONG actual_size; /* window size when it was first allocated */
ULONG window_posn; /* current offset within the window */
ULONG R0, R1, R2; /* for the LRU offset system */
UWORD main_elements; /* number of main tree elements */
int header_read; /* have we started decoding at all yet? */
UWORD block_type; /* type of this block */
ULONG block_length; /* uncompressed length of this block */
ULONG block_remaining; /* uncompressed bytes still left to decode */
ULONG frames_read; /* the number of CFDATA blocks processed */
LONG intel_filesize; /* magic header value used for transform */
LONG intel_curpos; /* current offset in transform space */
int intel_started; /* have we seen any translatable data yet? */
LZX_DECLARE_TABLE(PRETREE);
LZX_DECLARE_TABLE(MAINTREE);
LZX_DECLARE_TABLE(LENGTH);
LZX_DECLARE_TABLE(ALIGNED);
};
/* LZX decruncher */
/* Microsoft's LZX document and their implementation of the
* com.ms.util.cab Java package do not concur.
*
* In the LZX document, there is a table showing the correlation between
* window size and the number of position slots. It states that the 1MB
* window = 40 slots and the 2MB window = 42 slots. In the implementation,
* 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the
* first slot whose position base is equal to or more than the required
* window size'. This would explain why other tables in the document refer
* to 50 slots rather than 42.
*
* The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode
* is not defined in the specification.
*
* The LZX document does not state the uncompressed block has an
* uncompressed length field. Where does this length field come from, so
* we can know how large the block is? The implementation has it as the 24
* bits following after the 3 blocktype bits, before the alignment
* padding.
*
* The LZX document states that aligned offset blocks have their aligned
* offset huffman tree AFTER the main and length trees. The implementation
* suggests that the aligned offset tree is BEFORE the main and length
* trees.
*
* The LZX document decoding algorithm states that, in an aligned offset
* block, if an extra_bits value is 1, 2 or 3, then that number of bits
* should be read and the result added to the match offset. This is
* correct for 1 and 2, but not 3, where just a huffman symbol (using the
* aligned tree) should be read.
*
* Regarding the E8 preprocessing, the LZX document states 'No translation
* may be performed on the last 6 bytes of the input block'. This is
* correct. However, the pseudocode provided checks for the *E8 leader*
* up to the last 6 bytes. If the leader appears between -10 and -7 bytes
* from the end, this would cause the next four bytes to be modified, at
* least one of which would be in the last 6 bytes, which is not allowed
* according to the spec.
*
* The specification states that the huffman trees must always contain at
* least one element. However, many CAB files contain blocks where the
* length tree is completely empty (because there are no matches), and
* this is expected to succeed.
*/
/* LZX uses what it calls 'position slots' to represent match offsets.
* What this means is that a small 'position slot' number and a small
* offset from that slot are encoded instead of one large offset for
* every match.
* - position_base is an index to the position slot bases
* - extra_bits states how many bits of offset-from-base data is needed.
*/
static const UBYTE extra_bits[51] = {
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14,
15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17
};
static const ULONG position_base[51] = {
0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192,
256, 384, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152,
65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936,
1835008, 1966080, 2097152
};
struct LZXstate *LZXinit(int window)
{
struct LZXstate *pState=NULL;
ULONG wndsize = 1 << window;
int i, posn_slots;
/* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */
/* if a previously allocated window is big enough, keep it */
if (window < 15 || window > 21) return NULL;
/* allocate state and associated window */
pState = (struct LZXstate *)malloc(sizeof(struct LZXstate));
if (!(pState->window = (UBYTE *)malloc(wndsize)))
{
free(pState);
return NULL;
}
pState->actual_size = wndsize;
pState->window_size = wndsize;
/* calculate required position slots */
if (window == 20) posn_slots = 42;
else if (window == 21) posn_slots = 50;
else posn_slots = window << 1;
/** alternatively **/
/* posn_slots=i=0; while (i < wndsize) i += 1 << extra_bits[posn_slots++]; */
/* initialize other state */
pState->R0 = pState->R1 = pState->R2 = 1;
pState->main_elements = LZX_NUM_CHARS + (posn_slots << 3);
pState->header_read = 0;
pState->frames_read = 0;
pState->block_remaining = 0;
pState->block_type = LZX_BLOCKTYPE_INVALID;
pState->intel_curpos = 0;
pState->intel_started = 0;
pState->window_posn = 0;
/* initialise tables to 0 (because deltas will be applied to them) */
for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) pState->MAINTREE_len[i] = 0;
for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++) pState->LENGTH_len[i] = 0;
return pState;
}
void LZXteardown(struct LZXstate *pState)
{
if (pState)
{
if (pState->window)
free(pState->window);
free(pState);
}
}
int LZXreset(struct LZXstate *pState)
{
int i;
pState->R0 = pState->R1 = pState->R2 = 1;
pState->header_read = 0;
pState->frames_read = 0;
pState->block_remaining = 0;
pState->block_type = LZX_BLOCKTYPE_INVALID;
pState->intel_curpos = 0;
pState->intel_started = 0;
pState->window_posn = 0;
for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++) pState->MAINTREE_len[i] = 0;
for (i = 0; i < LZX_LENGTH_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++) pState->LENGTH_len[i] = 0;
return DECR_OK;
};
/* Bitstream reading macros:
*
* INIT_BITSTREAM should be used first to set up the system
* READ_BITS(var,n) takes N bits from the buffer and puts them in var
*
* ENSURE_BITS(n) ensures there are at least N bits in the bit buffer
* PEEK_BITS(n) extracts (without removing) N bits from the bit buffer
* REMOVE_BITS(n) removes N bits from the bit buffer
*
* These bit access routines work by using the area beyond the MSB and the
* LSB as a free source of zeroes. This avoids having to mask any bits.
* So we have to know the bit width of the bitbuffer variable. This is
* sizeof(ULONG) * 8, also defined as ULONG_BITS
*/
/* number of bits in ULONG. Note: This must be at multiple of 16, and at
* least 32 for the bitbuffer code to work (ie, it must be able to ensure
* up to 17 bits - that's adding 16 bits when there's one bit left, or
* adding 32 bits when there are no bits left. The code should work fine
* for machines where ULONG >= 32 bits.
*/
#define ULONG_BITS (sizeof(ULONG)<<3)
#define INIT_BITSTREAM do { bitsleft = 0; bitbuf = 0; } while (0)
#define ENSURE_BITS(n) \
while (bitsleft < (n)) { \
bitbuf |= ((inpos[1]<<8)|inpos[0]) << (ULONG_BITS-16 - bitsleft); \
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -