?? lzari.cpp
字號:
// Created:09-23-98
// By Jeff Connelly
// LZARI (LZSS with arithmetic) compression/decompression
// Original code written by Haruhiko Okumura in
// ORIGSRC\LZARI.C. You can use, distribute, and modify
// this program freely.
#include "stdafx.h"
#define EXPORTING
#include "comprlib.h"
#include "lzari.h"
#include <string.h>
#include <ctype.h>
static unsigned long textsize = 0, codesize = 0;
// *********** BIT I/O *********
// Write one bit (bit can be 0 or 1)
static void PutBit (bool bit)
{
static unsigned int buffer = 0, mask = 128;
if (bit)
buffer |= mask;
if ((mask >>= 1) == 0)
{
write_byte (buffer);
buffer = 0;
mask = 128;
++codesize;
}
}
// Send remaining unsent bits
static void FlushBitBuffer ()
{
int i;
for (i = 0; i < 7; ++i)
PutBit(0);
}
static bool GetBit ()
{
static unsigned int buffer, mask = 0;
if ((mask >>= 1) == 0)
{
buffer = read_byte();
mask = 128;
}
return ((buffer & mask) != 0);
}
// ********** LZSS with multible binary trees ***********
#define N 4096 // size of ring buffer
#define F 60 // upper limit for match_length
#define THRESHOLD 2 // encode string into position, length
// pair if match_length is greater than this
#define NIL N // index for root of binary search trees
static unsigned char text_buf[N + F - 1]; // Ring buffer of size N with extra
// F - 1 bytes for string comparison
static int match_position, match_length; // Length of longest match
static int lson[N + 1], rson[N + 257], dad[N + 1]; // Children and parents
// Initalizes trees
static void InitTree()
{
int i;
// For i = 0 to N - 1, rson[i] and lson[i] will be the right
// and left children of node i. These nodes do not need to be
// initalizes. dad[i] is parent of node i. Initalizes to
// NIL (defined as N) which means 'not used.'
// For i = 0 to 255, rson[N + i + 1] is root of strings that begin
// with character i. These are initailzed to NIL. 256 trees.
for (i = N + 1; i <= N + 256; i++)
rson[i] = NIL; // root
for (i = 0; i < N; i++)
dad[i] = NIL; // node
}
// Inserts string of length F, text_buf[r..r + F - 1] into one of the
// trees (text_buf[r]th tree) and sets the longest match position and
// length (match_length and match_position). If match_length == F, then
// removes the old node in favor of the new node because the old one will be
// deleted sooner. r is the both tree node and position in the buffer.
static void InsertNode (int r)
{
int i, p, cmp, temp;
unsigned char* key;
cmp = 1;
key = &text_buf[r];
p = N + 1 + key[0];
rson[r] = lson[r] = NIL;
match_length = 0;
while (true)
{
if (cmp >= 0)
{
if (rson[p] != NIL)
p = rson[p];
else
{
rson[p] = r;
dad[r] = p;
return;
}
} else {
if (lson[p] != NIL)
p = lson[p];
else
{
lson[p] = r;
dad[r] = p;
return;
}
}
for (i = 1; i < F; i++)
if ((cmp = key[i] - text_buf[p + i]) != 0)
break;
if (i > THRESHOLD)
{
if (i > match_length)
{
match_position = (r - p) & (N - 1);
if ((match_length = i) >= F)
break;
} else if (i == match_length) {
if ((temp = (r - p) & (N - 1)) < match_position)
match_position = temp;
}
}
}
dad[r] = dad[p];
lson[r] = lson[p];
rson[r] = rson[p];
dad[lson[p]] = r;
dad[rson[p]] = r;
if (rson[dad[p]] == p)
rson[dad[p]] = r;
else
lson[dad[p]] = r;
dad[p] = NIL; // remove p
}
// Remove node p from the tree
static void DeleteNode (int p)
{
int q;
if (dad[p] == NIL)
return; // not in tree
if (rson[p] = NIL)
q = lson[p];
else if (lson[p] == NIL)
q = rson[p];
else
{
q = lson[p];
if (rson[q] != NIL)
{
do
{
q = rson[q];
} while (rson[q] != NIL);
rson[dad[q]] = lson[q];
dad[lson[q]] = dad[q];
lson[q] = lson[p];
dad[lson[q]] = q;
}
rson[q] = rson[p];
dad[rson[p]] = q;
}
dad[q] = dad[p];
if (rson[dad[p]] == p)
rson[dad[p]] = q;
else
lson[dad[p]] = q;
dad[p] = NIL;
}
// ***************** Arithmetic Compression ************
// The book
// I. E. Witten, R. M. Neal, and J. G. Cleary,
// Communications of the ACM, Vol. 30, pp. 520-540 (1987)
// explains about arithmetic compression, in case you are not familer with
// it.
#define M 15
// Q1 (== 2 to the M) must be large, but not large as 4 * Q1 * (Q1 - 1)
// overflows.
#define Q1 (1UL << M)
#define Q2 (2 * Q1)
#define Q3 (3 * Q1)
#define Q4 (4 * Q1)
#define MAX_CUM (Q1 - 1)
#define N_CHAR (256 - THRESHOLD + F) // Char code: 0, 1, ..., N_CHAR - 1
static unsigned int low = 0, high = Q4, value = 0;
static int shifts = 0; // counts for magnifying low and high around Q2
static int char_to_sym[N_CHAR], sym_to_char[N_CHAR + 1];
static unsigned int sym_freq[N_CHAR + 1]; // frequency of symbols
static unsigned int sym_cum[N_CHAR + 1]; // cumulative freq for symbols
static unsigned int position_cum[N + 1]; // cumulative freq for positions
// Initalize model
static void StartModel ()
{
int ch, sym, i;
sym_cum[N_CHAR] = 0;
for (sym = N_CHAR; sym >= 1; sym--)
{
ch = sym - 1;
char_to_sym[ch] = sym;
sym_to_char[sym] = ch;
sym_freq[sym] = 1;
sym_cum[sym - 1] = sym_cum[sym] + sym_freq[sym];
}
sym_freq[0] = 0; // sentinal (!= sym_freq[1])
position_cum[N] = 0;
for (i = N; i >= 1; i--)
// Empirical distribuation function (very tentative)
position_cum[i - 1] = position_cum[i] + 10000 / (i + 200);
}
static void UpdateModel (int sym)
{
int i, c, ch_i, ch_sym;
if (sym_cum[0] >= MAX_CUM)
{
c = 0;
for (i = N_CHAR; i > 0; i--)
{
sym_cum[i] = c;
c += (sym_freq[i] = (sym_freq[i] + 1) >> 1);
}
sym_cum[0] = c;
}
for (i = sym; sym_freq[i] == sym_freq[i - 1]; i--)
;
if (i < sym)
{
ch_i = sym_to_char[i];
ch_sym = sym_to_char[sym];
sym_to_char[i] = ch_sym;
sym_to_char[sym] = ch_i;
char_to_sym[ch_i] = sym;
char_to_sym[ch_sym] = i;
}
++sym_freq[i];
while (--i >= 0)
++sym_cum[i];
}
// Output one bit followed by its complements
static void Output (bool bit)
{
PutBit(bit);
for (; shifts > 0; shifts--)
PutBit(!bit);
}
static void EncodeChar (int ch)
{
int sym;
unsigned long range;
sym = char_to_sym[ch];
range = high - low;
high = low + (range * sym_cum[sym - 1]) / sym_cum[0];
low += (range * sym_cum[sym ]) / sym_cum[0];
while (true)
{
if (high <= Q2)
Output (0);
else if (low >= Q2)
{
Output(1);
low -= Q2;
high -= Q2;
}
else if (low >= Q1 && high <= Q3)
{
++shifts;
low -= Q1;
high -= Q1;
} else
break;
low += low;
high += high;
}
UpdateModel(sym);
}
static void EncodePosition(int position)
{
unsigned long range;
range = high - low;
high = low + (range * position_cum[position ]) / position_cum[0];
low += (range * position_cum[position + 1]) / position_cum[0];
while (true)
{
if (high <= Q2)
Output (0);
else if (low >= Q2)
{
Output (1);
low -= Q2;
high -= Q2;
}
else if (low >= Q1 && high <= Q3)
{
++shifts;
low -= Q1;
high -= Q1;
} else
break;
low += low;
high += high;
}
}
static void EncodeEnd()
{
++shifts;
if (low < Q1)
Output (0);
else
Output (1);
FlushBitBuffer();
}
static int BinarySearchSym (unsigned int x)
{
int i, j, k;
i = 1;
j = N_CHAR;
while (i < j)
{
k = (i + j) / 2;
if (sym_cum[k] > x)
i = k + 1;
else
j = k;
}
return i;
}
static int BinarySearchPos (unsigned int x)
{
int i, j, k;
i = 1;
j = N;
while (i < j)
{
k = (i + j) / 2;
if (position_cum[k] > x)
i = k + 1;
else
j = k;
}
return i - 1;
}
static void StartDecode ()
{
int i;
for (i = 0; i < M + 2; i++)
value = 2 * value + GetBit();
}
static int DecodeChar()
{
int sym, ch;
unsigned long range;
range = high - low;
sym = BinarySearchSym((unsigned)
(((value - low + 1) * sym_cum[0] - 1) / range));
high = low + (range * sym_cum[sym - 1]) / sym_cum[0];
low += (range * sym_cum[sym ]) / sym_cum[0];
while (true)
{
if (low >= Q2)
{
value -= Q2;
low -= Q2;
high -= Q2;
} else if (low >= Q1 && high <= Q3) {
value -= Q1;
low -= Q1;
high -= Q1;
}
else if (high > Q2)
break;
low += low;
high += high;
value = 2 * value + GetBit();
}
ch = sym_to_char[sym];
UpdateModel (sym);
return ch;
}
static int DecodePosition()
{
int position;
unsigned long range;
range = high - low;
position = BinarySearchPos((unsigned int)
(((value - low + 1) * position_cum[0] - 1) / range));
high = low + (range * position_cum[position ]) / position_cum[0];
low += (range * position_cum[position + 1]) / position_cum[0];
while (true)
{
if (low >= Q2)
{
value -= Q2;
low -= Q2;
high -= Q2;
} else if (low >= Q1 && high <= Q3) {
value -= Q1;
low -= Q1;
high -= Q1;
}
else if (high > Q2)
break;
low += low;
high += high;
value = 2 * value + GetBit();
}
return position;
}
// ************ LZARI ************
void EXPORT lzari_encode ()
{
int i, c, len, r, s, last_match_length;
textsize = stream_size ();
// Write 'textsize'
write_byte ((unsigned char)(textsize & 0x000000FF));
write_byte ((unsigned char)(textsize & 0x0000FF00));
write_byte ((unsigned char)(textsize & 0x00FF0000));
write_byte ((unsigned char)(textsize & 0xFF000000));
codesize += sizeof(textsize);
if (!textsize)
return; // Cannot compress nothing
beginning_of_data();
textsize = 0;
StartModel();
InitTree();
s = 0; r = N - F;
for (i = s; i < r; i++)
text_buf[i] = ' '; // Any frequently-occuring character will do
for (len = 0; len < F && !end_of_data(); len++)
text_buf[r + len] = c = read_byte();
textsize = len;
for (i = 1; i <= F; i++)
InsertNode(r - i);
InsertNode(r);
do
{
if (match_length > len)
match_length = len;
if (match_length <= THRESHOLD)
{
match_length = 1;
EncodeChar(text_buf[r]);
} else {
EncodeChar (255 - THRESHOLD + match_length);
EncodePosition(match_position - 1);
}
last_match_length = match_length;
for (i = 0; i < last_match_length &&
!end_of_data(); i++)
{
c = read_byte();
DeleteNode (s);
text_buf[s] = c;
if (s < F - 1)
text_buf[s + N] = c;
s = (s + 1) & (N - 1);
r = (r + 1) & (N - 1);
InsertNode(r);
}
while (i++ < last_match_length)
{
DeleteNode(s);
s = (s + 1) & (N - 1);
r = (r + 1) & (N - 1);
if (--len)
InsertNode(r);
}
} while (len > 0);
EncodeEnd();
}
// Decode 'lzari_encode'd stream
void EXPORT lzari_decode()
{
int i, j, k, r, c;
unsigned long count;
char tsize[4];
// Read 'textsize'
tsize[0] = read_byte();
tsize[1] = read_byte();
tsize[2] = read_byte();
tsize[3] = read_byte();
memcpy (&textsize, tsize, 4); // Quick way to convert 4 chars to long
if (!textsize)
return;
StartDecode();
StartModel();
for (i = 0; i < N - F; i++)
text_buf[i] = ' ';
r = N - F;
for (count = 0; count < textsize; )
{
c = DecodeChar();
if (c < 256)
{
write_byte(c);
text_buf[r++] = c;
r &= (N - 1);
++count;
} else {
i = (r - DecodePosition() - 1) & (N - 1);
j = c - 255 + THRESHOLD;
for (k = 0; k < j; k++)
{
c = text_buf[(i + k) & (N - 1)];
write_byte(c);
text_buf[r++] = c;
r &= (N - 1);
++count;
}
}
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -