?? gzip.c
字號:
#ifdef DEBUGstatic void check_match (IPos start, IPos match, int length);#endif/* =========================================================================== * Update a hash value with the given input byte * IN assertion: all calls to to UPDATE_HASH are made with consecutive * input characters, so that a running hash key can be computed from the * previous key instead of complete recalculation each time. */#define UPDATE_HASH(h,c) (h = (((h)<<H_SHIFT) ^ (c)) & HASH_MASK)/* =========================================================================== * Insert string s in the dictionary and set match_head to the previous head * of the hash chain (the most recent string with same hash key). Return * the previous length of the hash chain. * IN assertion: all calls to to INSERT_STRING are made with consecutive * input characters and the first MIN_MATCH bytes of s are valid * (except for the last MIN_MATCH-1 bytes of the input file). */#define INSERT_STRING(s, match_head) \ (UPDATE_HASH(ins_h, window[(s) + MIN_MATCH-1]), \ prev[(s) & WMASK] = match_head = head[ins_h], \ head[ins_h] = (s))/* =========================================================================== * Initialize the "longest match" routines for a new file */static void lm_init(ush *flags){ register unsigned j; /* Initialize the hash table. */ memzero((char *) head, HASH_SIZE * sizeof(*head)); /* prev will be initialized on the fly */ *flags |= SLOW; /* ??? reduce max_chain_length for binary files */ strstart = 0; block_start = 0L; lookahead = read_buf((char *) window, sizeof(int) <= 2 ? (unsigned) WSIZE : 2 * WSIZE); if (lookahead == 0 || lookahead == (unsigned) EOF) { eofile = 1, lookahead = 0; return; } eofile = 0; /* Make sure that we always have enough lookahead. This is important * if input comes from a device such as a tty. */ while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window(); ins_h = 0; for (j = 0; j < MIN_MATCH - 1; j++) UPDATE_HASH(ins_h, window[j]); /* If lookahead < MIN_MATCH, ins_h is garbage, but this is * not important since only literal bytes will be emitted. */}/* =========================================================================== * Set match_start to the longest match starting at the given string and * return its length. Matches shorter or equal to prev_length are discarded, * in which case the result is equal to prev_length and match_start is * garbage. * IN assertions: cur_match is the head of the hash chain for the current * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 *//* For MSDOS, OS/2 and 386 Unix, an optimized version is in match.asm or * match.s. The code is functionally equivalent, so you can use the C version * if desired. */static int longest_match(IPos cur_match){ unsigned chain_length = max_chain_length; /* max hash chain length */ register uch *scan = window + strstart; /* current string */ register uch *match; /* matched string */ register int len; /* length of current match */ int best_len = prev_length; /* best match length so far */ IPos limit = strstart > (IPos) MAX_DIST ? strstart - (IPos) MAX_DIST : NIL; /* Stop when cur_match becomes <= limit. To simplify the code, * we prevent matches with the string of window index 0. *//* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. * It is easy to get rid of this optimization if necessary. */#if HASH_BITS < 8 || MAX_MATCH != 258# error Code too clever#endif register uch *strend = window + strstart + MAX_MATCH; register uch scan_end1 = scan[best_len - 1]; register uch scan_end = scan[best_len]; /* Do not waste too much time if we already have a good match: */ if (prev_length >= good_match) { chain_length >>= 2; } Assert(strstart <= window_size - MIN_LOOKAHEAD, "insufficient lookahead"); do { Assert(cur_match < strstart, "no future"); match = window + cur_match; /* Skip to next match if the match length cannot increase * or if the match length is less than 2: */ if (match[best_len] != scan_end || match[best_len - 1] != scan_end1 || *match != *scan || *++match != scan[1]) continue; /* The check at best_len-1 can be removed because it will be made * again later. (This heuristic is not always a win.) * It is not necessary to compare scan[2] and match[2] since they * are always equal when the other bytes match, given that * the hash keys are equal and that HASH_BITS >= 8. */ scan += 2, match++; /* We check for insufficient lookahead only every 8th comparison; * the 256th check will be made at strstart+258. */ do { } while (*++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && *++scan == *++match && scan < strend); len = MAX_MATCH - (int) (strend - scan); scan = strend - MAX_MATCH; if (len > best_len) { match_start = cur_match; best_len = len; if (len >= nice_match) break; scan_end1 = scan[best_len - 1]; scan_end = scan[best_len]; } } while ((cur_match = prev[cur_match & WMASK]) > limit && --chain_length != 0); return best_len;}#ifdef DEBUG/* =========================================================================== * Check that the match at match_start is indeed a match. */static void check_match(IPos start, IPos match, int length){ /* check that the match is indeed a match */ if (memcmp((char *) window + match, (char *) window + start, length) != EQUAL) { fprintf(stderr, " start %d, match %d, length %d\n", start, match, length); error_msg("invalid match"); } if (verbose > 1) { fprintf(stderr, "\\[%d,%d]", start - match, length); do { putc(window[start++], stderr); } while (--length != 0); }}#else# define check_match(start, match, length)#endif/* =========================================================================== * Fill the window when the lookahead becomes insufficient. * Updates strstart and lookahead, and sets eofile if end of input file. * IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0 * OUT assertions: at least one byte has been read, or eofile is set; * file reads are performed for at least two bytes (required for the * translate_eol option). */static void fill_window(){ register unsigned n, m; unsigned more = (unsigned) (window_size - (ulg) lookahead - (ulg) strstart); /* Amount of free space at the end of the window. */ /* If the window is almost full and there is insufficient lookahead, * move the upper half to the lower one to make room in the upper half. */ if (more == (unsigned) EOF) { /* Very unlikely, but possible on 16 bit machine if strstart == 0 * and lookahead == 1 (input done one byte at time) */ more--; } else if (strstart >= WSIZE + MAX_DIST) { /* By the IN assertion, the window is not empty so we can't confuse * more == 0 with more == 64K on a 16 bit machine. */ Assert(window_size == (ulg) 2 * WSIZE, "no sliding with BIG_MEM"); memcpy((char *) window, (char *) window + WSIZE, (unsigned) WSIZE); match_start -= WSIZE; strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */ block_start -= (long) WSIZE; for (n = 0; n < HASH_SIZE; n++) { m = head[n]; head[n] = (Pos) (m >= WSIZE ? m - WSIZE : NIL); } for (n = 0; n < WSIZE; n++) { m = prev[n]; prev[n] = (Pos) (m >= WSIZE ? m - WSIZE : NIL); /* If n is not on any hash chain, prev[n] is garbage but * its value will never be used. */ } more += WSIZE; } /* At this point, more >= 2 */ if (!eofile) { n = read_buf((char *) window + strstart + lookahead, more); if (n == 0 || n == (unsigned) EOF) { eofile = 1; } else { lookahead += n; } }}/* =========================================================================== * Flush the current block, with given end-of-file flag. * IN assertion: strstart is set to the end of the current match. */#define FLUSH_BLOCK(eof) \ flush_block(block_start >= 0L ? (char*)&window[(unsigned)block_start] : \ (char*)NULL, (long)strstart - block_start, (eof))/* =========================================================================== * Same as above, but achieves better compression. We use a lazy * evaluation for matches: a match is finally adopted only if there is * no better match at the next window position. */static ulg deflate(){ IPos hash_head; /* head of hash chain */ IPos prev_match; /* previous match */ int flush; /* set if current block must be flushed */ int match_available = 0; /* set if previous match exists */ register unsigned match_length = MIN_MATCH - 1; /* length of best match */ /* Process the input block. */ while (lookahead != 0) { /* Insert the string window[strstart .. strstart+2] in the * dictionary, and set hash_head to the head of the hash chain: */ INSERT_STRING(strstart, hash_head); /* Find the longest match, discarding those <= prev_length. */ prev_length = match_length, prev_match = match_start; match_length = MIN_MATCH - 1; if (hash_head != NIL && prev_length < max_lazy_match && strstart - hash_head <= MAX_DIST) { /* To simplify the code, we prevent matches with the string * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ match_length = longest_match(hash_head); /* longest_match() sets match_start */ if (match_length > lookahead) match_length = lookahead; /* Ignore a length 3 match if it is too distant: */ if (match_length == MIN_MATCH && strstart - match_start > TOO_FAR) { /* If prev_match is also MIN_MATCH, match_start is garbage * but we will ignore the current match anyway. */ match_length--; } } /* If there was a match at the previous step and the current * match is not better, output the previous match: */ if (prev_length >= MIN_MATCH && match_length <= prev_length) { check_match(strstart - 1, prev_match, prev_length); flush = ct_tally(strstart - 1 - prev_match, prev_length - MIN_MATCH); /* Insert in hash table all strings up to the end of the match. * strstart-1 and strstart are already inserted. */ lookahead -= prev_length - 1; prev_length -= 2; do { strstart++; INSERT_STRING(strstart, hash_head); /* strstart never exceeds WSIZE-MAX_MATCH, so there are * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH * these bytes are garbage, but it does not matter since the * next lookahead bytes will always be emitted as literals. */ } while (--prev_length != 0); match_available = 0; match_length = MIN_MATCH - 1; strstart++; if (flush) FLUSH_BLOCK(0), block_start = strstart; } else if (match_available) { /* If there was no match at the previous position, output a * single literal. If there was a match but the current match * is longer, truncate the previous match to a single literal. */ Tracevv((stderr, "%c", window[strstart - 1])); if (ct_tally(0, window[strstart - 1])) { FLUSH_BLOCK(0), block_start = strstart; } strstart++; lookahead--; } else { /* There is no previous match to compare with, wait for * the next step to decide. */ match_available = 1; strstart++; lookahead--; } Assert(strstart <= isize && lookahead <= isize, "a bit too far"); /* Make sure that we always have enough lookahead, except * at the end of the input file. We need MAX_MATCH bytes * for the next match, plus MIN_MATCH bytes to insert the * string following the next match. */ while (lookahead < MIN_LOOKAHEAD && !eofile) fill_window(); } if (match_available) ct_tally(0, window[strstart - 1]); return FLUSH_BLOCK(1); /* eof */}/* gzip (GNU zip) -- compress files with zip algorithm and 'compress' interface * Copyright (C) 1992-1993 Jean-loup Gailly * The unzip code was written and put in the public domain by Mark Adler. * Portions of the lzw code are derived from the public domain 'compress' * written by Spencer Thomas, Joe Orost, James Woods, Jim McKie, Steve Davies, * Ken Turkowski, Dave Mack and Peter Jannesen. * * See the license_msg below and the file COPYING for the software license. * See the file algorithm.doc for the compression algorithms and file formats. *//* Compress files with zip algorithm and 'compress' interface. * See usage() and help() functions below for all options. * Outputs: * file.gz: compressed file with same mode, owner, and utimes * or stdout with -c option or if stdin used as input. * If the output file name had to be truncated, the original name is kept * in the compressed file. */ /* configuration */typedef struct dirent dir_type;typedef RETSIGTYPE(*sig_type) (int);/* ======================================================================== */// int main (argc, argv)// int argc;// char **argv;int gzip_main(int argc, char **argv){ int result; int inFileNum; int outFileNum; struct stat statBuf; char *delFileName; int tostdout = 0; int fromstdin = 0; int force = 0; int opt; int file_count; while ((opt = getopt(argc, argv, "cf123456789dq")) != -1) { switch (opt) { case 'c': tostdout = 1; break; case 'f': force = 1; break; /* Ignore 1-9 (compression level) options */ case '1': case '2': case '3': case '4': case '5':
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -