?? block.c
字號:
/*---------------------------------------------*/
/* Zzip/Zzlib compressor block.c */
/* (un)compress/archive managing functions */
/*---------------------------------------------*/
/*
This file is a part of zzip and/or zzlib, a program and
library for lossless, block-sorting data compression.
Copyright (C) 1999-2001 Damien Debin. All Rights Reserved.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the
Free Software Foundation, Inc.,
59 Temple Place, Suite 330,
Boston, MA 02111-1307 USA
Damien Debin
<damien@debin.net>
This program is based on (at least) the work of: Mike Burrows,
David Wheeler, Peter Fenwick, Alistair Moffat, Ian H. Witten,
Robert Sedgewick, Jon Bentley, Brenton Chapin, Stephen R. Tate,
Szymon Grabowski, Bernhard Balkenhol, Stefan Kurtz
*/
#include <sys/stat.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <time.h>
#ifdef WIN32
# include <io.h>
# include <sys/utime.h>
# include <direct.h>
#else /* WIN32 */
# include <utime.h>
# include <unistd.h>
# include <dirent.h>
#endif /* WIN32 */
#include "zzip.h"
#ifdef SFX
# include "sfx_code.h"
#endif
/*---------------------------------------------*/
#ifdef WIN32
# define SEP_PATH '\\'
# define MKDIR_OPTIONS
#else /* WIN32 */
# define SEP_PATH '/'
# define MKDIR_OPTIONS ,S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH
#endif /* WIN32 */
#ifdef SFX
# define START_OFFSET SFX_CODE_SIZE
#else /* SFX */
# define START_OFFSET 0L
#endif /* SFX */
#define WRITE_F(a,b) fwrite((a),1,(b),session->output_file);
#define READ_F(a,b) fread((a),1,(b),session->input_file);
#define WRITE_M(a,b) { memcpy(buffer_in, (a), (b)); buffer_in += (b); }
#define READ_M(a,b) { memcpy((a), buffer_in, (b)); buffer_in += (b); }
#define IO_ERROR() { last_error = errno; return; }
#define CHECK_IO_R() { if (ferror(session->input_file) != 0) last_error = errno; if (feof(session->input_file) != 0) last_error = UNEXPECTED_EOF; }
#define CHECK_IO_W() { if (ferror(session->output_file) != 0) last_error = errno; }
#define FTELL_I(a) { if ((a = ftell(session->input_file )) == -1) last_error = errno; }
#define FTELL_O(a) { if ((a = ftell(session->output_file)) == -1) last_error = errno; }
#define FSEEK_I(a,b) { if (fseek(session->input_file, (a), (b)) != 0) last_error = FSEEK_INPUT_FILE; }
#define FSEEK_O(a,b) { if (fseek(session->output_file, (a), (b)) != 0) last_error = FSEEK_OUTPUT_FILE; }
int last_error = OK;
block_param_s block = { 0, false, false, false, 0, 0, 0, 0, NO_TYPE, NULL, NULL };
static session_param_s *session = NULL;
static union
{
uint8 *buffer8;
uint16 *buffer16;
uint32 *buffer32;
} mem = { NULL };
#ifdef GET_STAT
time_stat_s time_stat = { false, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
#endif /* GET_STAT */
#ifdef ZZLIB
# define NB_MAX_SESSION 8
static session_param_s *session_tab[NB_MAX_SESSION + 1] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL };
#endif /* ZZLIB */
/*---------------------------------------------*/
#ifdef ZZLIB
DLL_EXPORT int Get_last_error() { return last_error; }
#endif /* ZZLIB */
/*---------------------------------------------*/
INLINE static
void *MyFree(void *pp)
{
free(pp);
return NULL;
}
INLINE static
void *MyMalloc(size_t size)
{
void *mm = malloc(size);
if (mm == NULL) last_error = NOT_ENOUGH_MEMORY;
return mm;
}
INLINE static
void *MyRealloc(void *pp,
size_t size)
{
void *mm = realloc(pp, size);
if (mm == NULL) last_error = NOT_ENOUGH_MEMORY;
return mm;
}
/*---------------------------------------------*/
#ifndef SFX
void CleanMemory()
{
mem.buffer8 = MyFree(mem.buffer8);
}
#endif /* !SFX */
/*---------------------------------------------*/
#ifndef SFX
/* Write header for an "archive" file */
static
void Write_Header_Arc()
{
WRITE_F(session->head_arc.magic, sizeof(char) * 2);
WRITE_F(&session->head_arc.version_number, sizeof(session->head_arc.version_number));
WRITE_F(&session->head_arc.nb_of_file, sizeof(session->head_arc.nb_of_file));
CHECK_IO_W();
}
/* Write header for a compressed file inside an "archive" file */
static
void Write_Header_File()
{
uint lg = strlen(session->head_file.name);
WRITE_F(&lg, sizeof(lg));
WRITE_F(session->head_file.name, sizeof(char) * lg);
WRITE_F(&session->head_file.time, sizeof(session->head_file.time));
WRITE_F(&session->head_file.attributes, sizeof(session->head_file.attributes));
WRITE_F(&session->head_file.packed_size, sizeof(session->head_file.packed_size));
WRITE_F(&session->head_file.original_size, sizeof(session->head_file.original_size));
WRITE_F(&session->head_file.nb_of_block, sizeof(session->head_file.nb_of_block));
CHECK_IO_W();
}
#endif /* !SFX */
/*---------------------------------------------*/
/* Read header for an "archive" file */
static
void Read_Header_Arc()
{
READ_F(session->head_arc.magic, sizeof(char)*2);
READ_F(&session->head_arc.version_number, sizeof(session->head_arc.version_number));
READ_F(&session->head_arc.nb_of_file, sizeof(session->head_arc.nb_of_file));
CHECK_IO_R();
#ifndef SFX
if (session->head_arc.magic[0] != 'Z' || session->head_arc.magic[1] != 'Z')
{
last_error = NOT_A_ZZIP_FILE;
return;
}
if (session->head_arc.version_number != VERSION_NUMBER)
{
last_error = UNSUPPORTED_VERSION;
return;
}
#endif /* !SFX */
}
/* Read header for a compressed file inside an "archive" file */
static
void Read_Header_File()
{
uint lg;
READ_F(&lg, sizeof(lg));
MyFree(session->head_file.name);
session->head_file.name = (char*)MyMalloc(sizeof(char) * (lg + 1));
READ_F(session->head_file.name, sizeof(char) * lg);
session->head_file.name[lg] = '\0';
READ_F(&session->head_file.time, sizeof(session->head_file.time));
READ_F(&session->head_file.attributes, sizeof(session->head_file.attributes));
READ_F(&session->head_file.packed_size, sizeof(session->head_file.packed_size));
READ_F(&session->head_file.original_size, sizeof(session->head_file.original_size));
READ_F(&session->head_file.nb_of_block, sizeof(session->head_file.nb_of_block));
CHECK_IO_R();
}
/*---------------------------------------------*/
#ifdef SFX
#define BUFFER_SIZE (512*1024)
static
void Crc32_File(FILE *fin)
{
uint8 *buffer;
uint32 nb, crc = 0xFFFFFFFFUL, *crc_in_file;
sint end_of_file = 0;
buffer = MyMalloc(BUFFER_SIZE * sizeof(uint8));
if (last_error != OK) return;
while (end_of_file == 0)
{
nb = fread(buffer, 1, BUFFER_SIZE, fin);
if (ferror(fin) != 0)
{
last_error = errno;
return;
}
end_of_file = feof(fin);
if (end_of_file != 0) nb -= 4; /* 4 last bytes of file = CRC */
crc = Crc32(buffer, buffer + nb, crc);
}
crc_in_file = (uint32*)(buffer + nb);
if (*crc_in_file != crc)
{
last_error = CRC_ERROR;
return;
}
MyFree(buffer);
}
#endif /* SFX */
/*---------------------------------------------*/
#ifndef SFX
#define STAT_SIZE (8*1024)
#define NB_S 64
#define BLOCKSTAT_THRESHOLD 5400
static sint32 bstat[NB_S] ALIGN;
static sint32 cstat[NB_S] ALIGN;
/* trick to compute an absolute value without any test/jump */
INLINE static
uint32 MyAbs(sint32 a)
{
ssint64 s;
s.s64 = a;
return (s.d.l ^ s.d.h) - s.d.h;
}
static
uint32 BlockStat(uint8 *input_buffer,
uint32 input_len)
{
uint32 *buffer = (uint32*)input_buffer;
uint32 len = input_len;
sint32 *b1 = bstat, *b2 = cstat;
b2[0] = -1;
while (len > STAT_SIZE * 2)
{
uint32 i;
len -= STAT_SIZE;
for (i = 0; i < NB_S; ++i)
b1[i] = 0;
for (i = STAT_SIZE >> 2; i > 0; --i)
{
uuint32 u;
u.u32 = *buffer++;
b1[u.b.ll >> 2]++;
b1[u.b.lh >> 2]++;
b1[u.b.hl >> 2]++;
b1[u.b.hh >> 2]++;
}
/* skip the first time */
if (b2[0] != -1)
{
sint32 s = 0;
for (i = 0; i < NB_S; ++i)
s += MyAbs(b1[i] - b2[i]);
if (s > BLOCKSTAT_THRESHOLD)
return ((uint8*)buffer - input_buffer) - STAT_SIZE;
}
/* swap buffers bstat and cstat */
{
sint32 *t = b1;
b1 = b2;
b2 = t;
}
}
return input_len;
}
#endif /* !SFX */
/*---------------------------------------------*/
#ifndef SFX
static
#ifdef ZZLIB
sint32 CompressBlock(bool from_file,
uint8 *buffer_in,
uint32 len_in)
#else /* ZZLIB */
sint32 CompressBlock()
#endif /* ZZLIB */
{
bool ff_bug = false;
uint16 status;
uint8 *buffer1 = NULL, *buffer2 = NULL, *buf_out1 = NULL, *buf_out2 = NULL;
uint32 len, len2, len_max, first, tot1 = 0, tot2, block_len;
sint err = 0;
slong pos = 0, deb = 0, fin = 0;
#ifdef GET_STAT
uint64 p1, p2;
#endif /* GET_STAT */
#ifdef ZZLIB
uint8 *sav_buffer_in = buffer_in;
if (from_file == false)
{
mem.buffer8 = (uint8*)MyMalloc(sizeof(uint8) * (len_in + RUN_LENGTH_MAX + 32) * 6);
if (last_error != OK) return -1;
buffer1 = mem.buffer8;
buffer2 = (uint8*)ROUND32(buffer1 + len_in * 2);
memcpy(buffer1, buffer_in, sizeof(uint8) * len_in);
len = len_in;
}
else
#endif /* ZZLIB */
{
uint32 taille_max = MIN(session->head_file.original_size + 1, session->block_size);
mem.buffer8 = (uint8*)MyMalloc(sizeof(uint8) * (taille_max + RUN_LENGTH_MAX + 32) * 6);
if (last_error != OK) return -1;
buffer1 = mem.buffer8;
buffer2 = (uint8*)ROUND32(buffer1 + taille_max * 2);
FTELL_I(pos);
FTELL_O(deb);
if (last_error != OK) return -1;
len = READ_F(buffer1, sizeof(uint8) * taille_max);
CHECK_IO_R();
if (last_error == UNEXPECTED_EOF) last_error = OK;
if (last_error != OK) return -1;
if ((session->compression_mode & 2) == 2)
{
uint32 blockstat_len = BlockStat(buffer1, len);
if (blockstat_len != len)
{
FSEEK_I((sint32)blockstat_len - (sint32)len, SEEK_CUR);
if (last_error != OK) return -1;
len = blockstat_len;
}
}
}
block_len = len;
len_max = len;
block.crc = Crc32_2(buffer1, buffer1 + len);
block.mm_type = 0;
STAT_ADD_SIZE(kb_tot, len);
if (len > 64)
{
if (len < 3 * 1024) block.compression_mode = 0;
/*- Beginning ---- Analyze ------------*/
GET_TSC(p1);
Analysis(buffer1, buffer1 + len);
GET_TSC(p2);
STAT_ADD_TIME(time_ana, p2, p1);
STAT_ADD_SIZE(kb_ana, len);
/*- End ---------- Analyze ------------*/
/* trick for 'Canterbury Corpus: kennedy.xls' !, delta-encoding with record size of 13 */
if (((uint32*)buffer1)[10] == 161480704 && ((uint32*)buffer1)[20] == 60818693)
{
uint i;
uint8 *b8_out = buffer2, *b8;
for (i = 0; i < 13; ++i)
for (b8 = buffer1 + 2320 + i; b8 < buffer1 + len; b8 += 13)
*b8_out++ = *b8 - *(b8-13);
memcpy(buffer1 + 2320, buffer2, len - 2320);
block.rle_encoding = true;
block.type = NO_TYPE;
block.mm_type = 6;
}
/*- Beginning ---- RLE Coding ---------*/
if (block.rle_encoding == true)
{
uint32 rle_len;
GET_TSC(p1);
rle_len = RLE_Coding(buffer1, buffer2, buffer1 + len);
GET_TSC(p2);
STAT_ADD_TIME(time_rle, p2, p1);
STAT_ADD_SIZE(kb_rle, len);
len = rle_len;
memcpy(buffer1, buffer2, len);
}
/*- End ---------- RLE Coding ---------*/
/*- Beginning ---- MM Coding ----------*/
if ((len > 128 * 1024)
& ((block.type == BIN) | (block.type == NO_TYPE))
& (block.multimedia_test == true))
{
uint res;
GET_TSC(p1);
res = MM_Test(buffer1 + 512, buffer1 + len);
GET_TSC(p2);
STAT_ADD_TIME(time_ana, p2, p1);
if (res != 0)
{
block.mm_type = res;
block.type = MULTIMEDIA;
}
}
if (block.mm_type != 0)
{
GET_TSC(p1);
MM_Coding(buffer1, buffer1 + len);
GET_TSC(p2);
STAT_ADD_TIME(time_mm, p2, p1);
STAT_ADD_SIZE(kb_mm, len);
}
/*- End ---------- MM Coding ----------*/
/*- Beginning ---- W32/BIN Coding -----*/
if (block.type == WIN_EXE)
{
GET_TSC(p1)
Win32_Coding(buffer1, buffer1 + len);
GET_TSC(p2);
STAT_ADD_TIME(time_txt, p2, p1);
}
if (block.type == BIN || block.type == WIN_EXE)
{
GET_TSC(p1);
Reverse_Block(buffer1, buffer1 + len);
GET_TSC(p2);
STAT_ADD_TIME(time_txt, p2, p1);
STAT_ADD_SIZE(kb_txt, len);
}
/*- End ---------- W32/BIN Coding -----*/
/*- Beginning ---- alpha/txt Cod. -----*/
if (block.type == TEXT)
{
GET_TSC(p1);
len = Filter1(buffer1, buffer2, len);
memcpy(buffer1, buffer2, len);
mem.buffer8 = (uint8*)MyRealloc(mem.buffer8, sizeof(uint8) * (len + RUN_LENGTH_MAX + 32) * 6);
if (last_error != OK) return -1;
buffer1 = mem.buffer8;
len_max = MAX(len, len_max);
GET_TSC(p2);
STAT_ADD_TIME(time_txt, p2, p1);
STAT_ADD_SIZE(kb_txt, len);
}
/*- End ---------- alpha/txt Cod. -----*/
/*- Beginning ---- Phr. replacement ---*/
if (block.english_encoding == true)
{
GET_TSC(p1);
len = Filter2(buffer1, buffer1 + len);
GET_TSC(p2);
STAT_ADD_TIME(time_txt, p2, p1);
}
/*- End ---------- Phr. replacement ---*/
/*- Beginning ---- BWT ----------------*/
/* to avoid a bug if the block ends with a run of 0xFF */
if (buffer1[len - 1] == 0xFF)
{
buffer1[len - 1] -= buffer1[len - 2];
ff_bug = true;
}
BWT_Coding(len, &first, buffer1);
if (last_error != OK) return -1;
/*- End ---------- BWT ----------------*/
/*- Beginning ---- MTF Coding ---------*/
GET_TSC(p1);
M1FF2_Coding(buffer1, buffer1 + len);
/*- End ---------- MTF Coding ---------*/
/*- Beginning ---- Split --------------*/
buf_out1 = (uint8*)ROUND32(buffer1 + len);
buffer2 = (uint8*)ROUND32(buf_out1 + len);
buf_out2 = (uint8*)ROUND32(buffer2 + len);
len2 = Split(buffer1, buffer1 + len, buffer2);
GET_TSC(p2);
STAT_ADD_TIME(time_mtf, p2, p1);
STAT_ADD_SIZE(kb_mtf, len);
/*- End ---------- Split --------------*/
/*- Beginning ---- Arith Compression --*/
block.buffer = buf_out1;
GET_TSC(p1);
tot1 = Zip_SM0(len, buffer1);
GET_TSC(p2);
STAT_ADD_TIME(time_st0, p2, p1);
STAT_ADD_SIZE(kb_st0, len);
if (last_error != OK) return -1;
block.buffer = buf_out2;
GET_TSC(p1);
tot2 = Zip_SM1(len2, buffer2);
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -