?? fstenc.c
字號:
/*
* fstenc.c
*
* Fast encoder
*
* This is a one pass encoder which uses predefined trees. However, since these are not the same
* trees defined for a fixed block (we use better trees than that), we output a dynamic block header.
*/
#include <string.h>
#include <stdio.h>
#include <crtdbg.h>
#include "deflate.h"
#include "fasttbl.h"
//
// For debugging purposes:
//
// Verifies that all of the hash pointers in the hash table are correct, and that everything
// in the same hash chain has the same hash value
//
#ifdef FULL_DEBUG
#define VERIFY_HASHES(bufpos) FastEncoderVerifyHashes(context, bufpos)
#else
#define VERIFY_HASHES(bufpos) ;
#endif
//
// Update hash variable "h" with character c
//
#define UPDATE_HASH(h,c) \
h = ((h) << FAST_ENCODER_HASH_SHIFT) ^ (c);
//
// Insert a string into the hash chain at location bufpos
//
#define INSERT_STRING(search,bufpos) \
{ \
UPDATE_HASH(hash, window[bufpos+2]); \
\
_ASSERT((unsigned int) FAST_ENCODER_RECALCULATE_HASH(bufpos) == (unsigned int) (hash & FAST_ENCODER_HASH_MASK)); \
\
search = lookup[hash & FAST_ENCODER_HASH_MASK]; \
lookup[hash & FAST_ENCODER_HASH_MASK] = (t_search_node) (bufpos); \
prev[bufpos & FAST_ENCODER_WINDOW_MASK] = (t_search_node) (search); \
}
//
// Output bits function which uses local variables for the bit buffer
//
#define LOCAL_OUTPUT_BITS(n, x) \
{ \
bitbuf |= ((x) << bitcount); \
bitcount += (n); \
if (bitcount >= 16) \
{ \
*output_curpos++ = (BYTE) bitbuf; \
*output_curpos++ = (BYTE) (bitbuf >> 8); \
bitcount -= 16; \
bitbuf >>= 16; \
} \
}
//
// Output unmatched symbol c
//
#define OUTPUT_CHAR(c) \
LOCAL_OUTPUT_BITS(g_FastEncoderLiteralCodeInfo[c] & 31, g_FastEncoderLiteralCodeInfo[c] >> 5);
//
// Output a match with length match_len (>= MIN_MATCH) and displacement match_pos
//
// Optimisation: unlike the other encoders, here we have an array of codes for each match
// length (not just each match length slot), complete with all the extra bits filled in, in
// a single array element.
//
// There are many advantages to doing this:
//
// 1. A single array lookup on g_FastEncoderLiteralCodeInfo, instead of separate array lookups
// on g_LengthLookup (to get the length slot), g_FastEncoderLiteralTreeLength,
// g_FastEncoderLiteralTreeCode, g_ExtraLengthBits, and g_BitMask
//
// 2. The array is an array of ULONGs, so no access penalty, unlike for accessing those USHORT
// code arrays in the other encoders (although they could be made into ULONGs with some
// modifications to the source).
//
// Note, if we could guarantee that code_len <= 16 always, then we could skip an if statement here.
//
// A completely different optimisation is used for the distance codes since, obviously, a table for
// all 8192 distances combining their extra bits is not feasible. The distance codeinfo table is
// made up of code[], len[] and # extra_bits for this code.
//
// The advantages are similar to the above; a ULONG array instead of a USHORT and BYTE array, better
// cache locality, fewer memory operations.
//
#define OUTPUT_MATCH(match_len, match_pos) \
{ \
int extra_bits; \
int code_len; \
ULONG code_info; \
\
_ASSERT(match_len >= MIN_MATCH && match_len <= MAX_MATCH); \
\
code_info = g_FastEncoderLiteralCodeInfo[(NUM_CHARS+1-MIN_MATCH)+match_len]; \
code_len = code_info & 31; \
_ASSERT(code_len != 0); \
if (code_len <= 16) \
{ \
LOCAL_OUTPUT_BITS(code_len, code_info >> 5); \
} \
else \
{ \
LOCAL_OUTPUT_BITS(16, (code_info >> 5) & 65535); \
LOCAL_OUTPUT_BITS(code_len-16, code_info >> (5+16)); \
} \
code_info = g_FastEncoderDistanceCodeInfo[POS_SLOT(match_pos)]; \
LOCAL_OUTPUT_BITS(code_info & 15, code_info >> 8); \
extra_bits = (code_info >> 4) & 15; \
if (extra_bits != 0) LOCAL_OUTPUT_BITS(extra_bits, (match_pos) & g_BitMask[extra_bits]); \
}
//
// This commented out code is the old way of doing things, which is what the other encoders use
//
#if 0
#define OUTPUT_MATCH(match_len, match_pos) \
{ \
int pos_slot = POS_SLOT(match_pos); \
int len_slot = g_LengthLookup[match_len - MIN_MATCH]; \
int extra_bits; \
\
_ASSERT(match_len >= MIN_MATCH && match_len <= MAX_MATCH); \
_ASSERT(g_FastEncoderLiteralTreeLength[(NUM_CHARS+1)+len_slot] != 0); \
_ASSERT(g_FastEncoderDistanceTreeLength[pos_slot] != 0); \
\
LOCAL_OUTPUT_BITS(g_FastEncoderLiteralTreeLength[(NUM_CHARS+1)+len_slot], g_FastEncoderLiteralTreeCode[(NUM_CHARS+1)+len_slot]); \
extra_bits = g_ExtraLengthBits[len_slot]; \
if (extra_bits != 0) LOCAL_OUTPUT_BITS(extra_bits, (match_len-MIN_MATCH) & g_BitMask[extra_bits]); \
\
LOCAL_OUTPUT_BITS(g_FastEncoderDistanceTreeLength[pos_slot], g_FastEncoderDistanceTreeCode[pos_slot]); \
extra_bits = g_ExtraDistanceBits[pos_slot]; \
if (extra_bits != 0) LOCAL_OUTPUT_BITS(extra_bits, (match_pos) & g_BitMask[extra_bits]); \
}
#endif
//
// Local function prototypes
//
static void FastEncoderMoveWindows(t_encoder_context *context);
static int FastEncoderFindMatch(
const BYTE * window,
const USHORT * prev,
long bufpos,
long search,
t_match_pos * match_pos,
int cutoff,
int nice_length
);
//
// Output the block type and tree structure for our hard-coded trees.
//
// Functionally equivalent to:
//
// outputBits(context, 1, 1); // "final" block flag
// outputBits(context, 2, BLOCKTYPE_DYNAMIC);
// outputTreeStructure(context, g_FastEncoderLiteralTreeLength, g_FastEncoderDistanceTreeLength);
//
// However, all of the above has smartly been cached in global data, so we just memcpy().
//
void FastEncoderOutputPreamble(t_encoder_context *context)
{
#if 0
// slow way:
outputBits(context, 1+2, 1 | (BLOCKTYPE_DYNAMIC << 1));
outputTreeStructure(context, g_FastEncoderLiteralTreeLength, g_FastEncoderDistanceTreeLength);
#endif
// make sure tree has been init
_ASSERT(g_FastEncoderTreeLength > 0);
// make sure we have enough space to output tree
_ASSERT(context->output_curpos + g_FastEncoderTreeLength < context->output_endpos);
// fast way:
memcpy(context->output_curpos, g_FastEncoderTreeStructureData, g_FastEncoderTreeLength);
context->output_curpos += g_FastEncoderTreeLength;
// need to get final states of bitbuf and bitcount after outputting all that stuff
context->bitbuf = g_FastEncoderPostTreeBitbuf;
context->bitcount = g_FastEncoderPostTreeBitcount;
}
//
// Fast encoder deflate function
//
void FastEncoderDeflate(
t_encoder_context * context,
int search_depth, // # hash links to traverse
int lazy_match_threshold, // don't search @ X+1 if match length @ X is > lazy
int good_length, // divide traversal depth by 4 if match length > good
int nice_length // in match finder, if we find >= nice_length match, quit immediately
)
{
long bufpos;
unsigned int hash;
unsigned long bitbuf;
int bitcount;
BYTE * output_curpos;
t_fast_encoder *encoder = context->fast_encoder;
byte * window = encoder->window; // make local copies of context variables
t_search_node * prev = encoder->prev;
t_search_node * lookup = encoder->lookup;
//
// If this is the first time in here (since last reset) then we need to output our dynamic
// block header
//
if (encoder->fOutputBlockHeader == FALSE)
{
encoder->fOutputBlockHeader = TRUE;
//
// Watch out! Calls to outputBits() and outputTreeStructure() use the bit buffer
// variables stored in the context, not our local cached variables.
//
FastEncoderOutputPreamble(context);
}
//
// Copy bitbuf vars into local variables since we're now using OUTPUT_BITS macro.
// Do not call anything that uses the context structure's bit buffer variables!
//
output_curpos = context->output_curpos;
bitbuf = context->bitbuf;
bitcount = context->bitcount;
// copy bufpos into local variable
bufpos = context->bufpos;
VERIFY_HASHES(bufpos); // debug mode: verify that the hash table is correct
// initialise the value of the hash
// no problem if locations bufpos, bufpos+1 are invalid (not enough data), since we will
// never insert using that hash value
hash = 0;
UPDATE_HASH(hash, window[bufpos]);
UPDATE_HASH(hash, window[bufpos+1]);
// while we haven't come to the end of the input, and we still aren't close to the end
// of the output
while (bufpos < context->bufpos_end && output_curpos < context->output_near_end_threshold)
{
int match_len;
t_match_pos match_pos;
t_match_pos search;
VERIFY_HASHES(bufpos); // debugger: verify that hash table is correct
if (context->bufpos_end - bufpos <= 3)
{
// The hash value becomes corrupt when we get within 3 characters of the end of the
// input buffer, since the hash value is based on 3 characters. We just stop
// inserting into the hash table at this point, and allow no matches.
match_len = 0;
}
else
{
// insert string into hash table and return most recent location of same hash value
INSERT_STRING(search,bufpos);
// did we find a recent location of this hash value?
if (search != 0)
{
// yes, now find a match at what we'll call position X
match_len = FastEncoderFindMatch(window, prev, bufpos, search, &match_pos, search_depth, nice_length);
// truncate match if we're too close to the end of the input buffer
if (bufpos + match_len > context->bufpos_end)
match_len = context->bufpos_end - bufpos;
}
else
{
// no most recent location found
match_len = 0;
}
}
if (match_len < MIN_MATCH)
{
// didn't find a match, so output unmatched char
OUTPUT_CHAR(window[bufpos]);
bufpos++;
}
else
{
// bufpos now points to X+1
bufpos++;
// is this match so good (long) that we should take it automatically without
// checking X+1 ?
if (match_len <= lazy_match_threshold)
{
int next_match_len;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -