?? cm.cpp
字號:
#if 0
CM is the static context modeling archiver.
(C) Bulat Ziganshin 1993, 97, 98
My email addresses:
FIDO 2:5049/36.26
bulatz@fort.tatarstan.ru
The algorithm:
1. A context tree of the depth set by the -o option is built, and
the frequency of each character in each context is noted.
2. All the context-character pairs with the frequency that is less than
the one given by the -n option are removed from the tree. The frequencies
of such pairs are given to the ESCAPE-code of their respective contexts
and to the combination "parent context-character".
3. The remaining tree is coded and written to the output file.
4. The source is coded using the tree.
Deficiencies:
1. All the memory - for the text and for the tree is allocated once at the
start of the program (its amount is confugurable by the -t and -m options).
A more flexible memory allocation strategy is needed.
2. The command-line interface is dumb.
Future directions:
1. The tree depth to be made adjustable depending on the actual frequencies
of particular contexts.
2. The deletion of an element from the tree to be done if and only if
the element is not beneficial (number of bits saved in coding text is
greater than number of bits used in coding the tree).
3. Tree coding to be made smarter.
When compiled with Visual C++ 5.0 the program works faster than HAP 4;
compression ratio is approximately the same.
The first group of #define's (in the source code) denotes compilation
modes. To switch modes, add or remove the letter N, e.g. ARITH <-> NARITH.
#endif
#define DEBUG
#define TEST_TREE
#define NPRINT_TREE_STATS
#define PRINT_STATS
#define ARITH
#define NARITH_PRINT
#define NPRINT_TREE
#define NPRINT_TEXT
#define STATIC static
#define INLINE /*inline*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <io.h>
#include <fcntl.h>
#include <ctype.h>
#include <limits.h>
#include <assert.h>
#include <sys\stat.h>
#define MAXIMAL_MAXORDER 256
#define MAX_SUM (1<<14)
#define max(a,b) ((a)>(b)?(a):(b))
#define endof(array) (array+sizeof(array)/sizeof(*array))
#define set0(array) memset( array, 0, sizeof(array) )
typedef unsigned int uint;
typedef unsigned char uchar;
#define SWITCH_CHARS "-/"
#define command_set "ax"
#define command_a uint('a')
#define command_x uint('x')
#define no_command 0
#define test_command(if_a,if_x) ( command==command_a? (if_a):(if_x) )
#define mincount_for(n) mincount
#define mincount_for0 (2*mincount)
static char M_ARGUMENT[] = "Superfluous argument";
static char M_BADNUM[] = "Bad numeric argument";
static char M_COMMAND[] = "Bad command";
static char M_FILES[] = "Can't open files.";
static char M_NOMEMORY[] = "No memory";
static char M_NOTEXT[] = "No memory for text buffer";
static char M_NOTREE[] = "No memory for context tree";
static char M_OPTION[] = "Bad flag";
static char M_READ[] = "Can't read file.";
static char M_WRITE[] = "Can't write file. Disk full?";
#if 1 || __I86__ >= 3
#define FILEBUFFER_SIZE 32768
#define DEFAULT_FREEMEMSIZE (5<<20)
#define DEFAULT_MAXSIZE (1<<20)
#elif defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__)
#define FILEBUFFER_SIZE 2048
#define DEFAULT_FREEMEMSIZE 30720
#define DEFAULT_MAXSIZE 4096
#elif defined(__COMPACT__) || defined(__LARGE__) || defined(__HUGE__)
#define FILEBUFFER_SIZE 32768
#define DEFAULT_FREEMEMSIZE 65000
#define DEFAULT_MAXSIZE 57344
#else
#error What's your memory model???
#endif
#define DEFAULT_MAXORDER 3
#define DEFAULT_MINCOUNT 10
#define DEFAULT_CHAR ' '
uchar *text;
char *arcname, *textname;
uint command, size, maxsize, maxorder, &maxlevel=maxorder, mincount;
uchar contextstr[MAXIMAL_MAXORDER];
struct {
uint n; // No EncodeTreeChar
} flags[1];
STATIC INLINE void error(char *message) {
printf( "\n" );
printf( message );
exit(1);
}
#ifndef NDEBUG
#define do_assert(cmd) cmd
#define debug_printf(args) printf args
#else
#define do_assert(cmd)
#define debug_printf(args)
#endif
#ifdef PRINT_STATS
#define printf_stats(args) printf args
#define do_stats(expr) expr
#else
#define printf_stats(args)
#define do_stats(expr)
#endif
#ifdef PRINT_TREE
#define printf_tree(args) printf args
#else
#define printf_tree(args)
#endif
#ifdef PRINT_TEXT
#define printf_text(args) printf args
#else
#define printf_text(args)
#endif
// Buffered I/O section ****************************************************
struct TFile {
static uchar buffer[FILEBUFFER_SIZE], *pbf;
int handle;
uint ModeW, ModeB; // Flags for write-mode & buffering
void init(char *name,uint writing,uint buffering);
void done();
uint read(uchar *buf, uint bytes);
void write(uchar *buf, uint bytes);
void InitReadBuffer() { assert(ModeB); pbf = endof(buffer); }
void InitWriteBuffer() { assert(ModeB); pbf = buffer; }
void ReadBuffer() { assert(ModeB); read(pbf=buffer,FILEBUFFER_SIZE); }
void WriteBuffer() { assert(ModeB); write(pbf=buffer,FILEBUFFER_SIZE); }
void CheckReadBuffer() { if(pbf==endof(buffer)) ReadBuffer(); }
void CheckWriteBuffer() { if(pbf==endof(buffer)) WriteBuffer(); }
uchar ReadByte() { CheckReadBuffer(); assert(ModeB && pbf>=buffer && pbf<endof(buffer)); return *pbf++; }
void WriteByte(uchar c) { CheckWriteBuffer(); assert(ModeB && pbf>=buffer && pbf<endof(buffer)); *pbf++=c; }
void WriteInt(uint n);
uint ReadInt();
void DoneReadBuffer() { }
void DoneWriteBuffer() { assert(ModeB); write(buffer,pbf-buffer); }
};
uchar TFile::buffer[]="", *TFile::pbf=0;
void TFile::init( char *name, uint writing, uint buffering ) {
if( writing ) {
handle = open( name, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, S_IWRITE );
} else {
handle = open( name, O_RDONLY|O_BINARY );
}
if( handle < 0 ) {
error( M_FILES );
}
ModeW = writing;
ModeB = buffering;
if( buffering ) {
if( writing ) {
InitWriteBuffer();
} else {
InitReadBuffer();
}
}
}
void TFile::done() {
if( ModeB ) {
if( ModeW ) {
DoneWriteBuffer();
} else {
DoneReadBuffer();
}
}
close(handle);
}
uint TFile::read(uchar *buf, uint bytes) {
assert(!ModeW);
int n=::read(handle,buf,bytes);
if( n<0 ) {
error( M_READ );
}
return (uint)n;
}
void TFile::write(uchar *buf, uint bytes) {
assert(ModeW);
if( ::write(handle,buf,bytes) != bytes ) {
error( M_WRITE );
}
}
INLINE void TFile::WriteInt( uint n ) {
if( n<254 ) {
WriteByte(n);
} else if( n<=65535u ) {
WriteByte(254);
WriteByte(n&255);
WriteByte(n>>8);
} else {
uchar *p=(uchar*)&n;
uint i=sizeof(n);
WriteByte(255);
do {
WriteByte(*p++);
} while( --i );
}
}
INLINE uint TFile::ReadInt() {
uint n;
n = ReadByte();
if( n==254 ) {
n = ReadByte();
n += ReadByte()<<8;
} else if( n==255 ) {
uchar *p=(uchar*)&n;
uint i=sizeof(n);
do {
*p++ = ReadByte();
} while( --i );
}
return n;
}
TFile infile, outfile;
//ZBR
//STATIC INLINE uint eof_infile() {
// return tell(infile) == filelength(infile);
// return 0;
//}
// Actual arithmetic compression section ***********************************
uint CodedBytes;
#define MIN_RANGE 0x4001
#define MAX_RANGE 0xFFFF
#ifdef ARITH
#define WriteOutputFile(code_value) (CodedBytes++, outfile.WriteByte(code_value))
#define ReadInputFile() (CodedBytes++, infile.ReadByte())
#if 0
arith.c a byte oriented arithmetic coding
Michael Schindler
Feb. 1997
http://eiunix.tuwien.ac.at/~michael
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
arithmetic coder without bit-stuff (FAST!)
For a general idea on arithmetic coders check
A. Moffats paper at the DCC95.
Define NOWARN to suppress warnings about bytes_to_follow overflows.
It is faster; the compressed file must be >1GB that this can happen,
the probability for this to happen is about 256**-(2**31), so I do
not expect it to happen within this universe.
If this is still too likely increase the size of bytes_to_follow.
If tot_f is a power of two consider replacing the division by a shift
and doing an CACAM style rescaling (2 divisions) to save the if-statement.
#endif
#define NOWARN
/* PROTOTYPES ***************************************************/
/* Start encoding */
void start_encoding( char c );
/* Encode a symbol */
inline void encode_freq(
unsigned int sy_f, /* frequency of symbol */
unsigned int lt_f, /* frequency of symbols < the decoded symbol */
unsigned int tot_f); /* frequency of all symbols */
/* Finish encoding */
void done_encoding( void );
/* Start decoding */
int start_decoding( void );
/* Calculate culmulative frequency for next symbol. Does NO update! */
inline unsigned int decode_culfreq( unsigned int tot_f );
/* Update decoding variables */
inline void decode_update(
unsigned int sy_f, /* frequency of symbol */
unsigned int lt_f, /* frequency of symbols < the decoded symbol */
unsigned int tot_f); /* frequency of all symbols */
/* Finish decoding */
void done_decoding( void );
/* PROTOTYPES: END ***********************************************/
/* SIZE OF ARITHMETIC CODE VALUES. */
#define CODE_BITS 31 /* Number of bits in a code value */
/* Type of an arithmetic code value must accomodate CODE_BITS+1 bits */
#if MAXINT > 1L<<16
typedef unsigned int code_value;
#else
typedef unsigned long code_value;
#endif
/* it is highly recommended that the total frequency count is less than */
/* 1 << (CODE_BITS-13) to minimize the error introduced by approximation. */
/* for a discussion see "Arithmetic coding revisited" by A. Moffat. */
#define SHIFT_BITS (CODE_BITS - 8)
#define EXTRA_BITS ((CODE_BITS-1) % 8 + 1)
#define Top_value ((code_value)1 << CODE_BITS)
#define Bottom_value (Top_value >> 8)
static char copyright[]="arith.c 1.0 (c) 1997 michael@eiunix.tuwien.ac.at";
static unsigned char enbuffer, debuffer; /* char buffer */
static code_value enlow, enrange; /* Encoder state */
static unsigned int bytes_to_follow; /* number of bytes to follow */
static code_value deoffs, der, derange; /* Decoder state */
static inline void outbyte()
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -