?? huffman-compress.c
字號:
/*
* File: huffman-compress.c
* Author: JD, with a bit of code pirated from a 2103 student
* Date: 2008-02-03
* Version: 1.1
*
* Description: This program reads in either stdin or the given file
* argument and compresses this data using Huffman compression. The
* output is written to stdout (if the input was stdin) and is
* otherwise written to the same file name as the original with ".huf"
* appended.
*
* If the first argument is "-a", the output codes (but not the
* overhead data) are written in ASCII, one code per line.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h> /* C99 */
#include <ctype.h>
#include <string.h>
#define NUM_SYMBOLS 256
#define NUM_ITEMS (2 * NUM_SYMBOLS - 1)
#define MAX_NUM_MERGES (NUM_SYMBOLS - 1)
#define NO_MORE (-1)
#define NO_PARENT (-1)
typedef struct
{
uint32_t count;
short parent_index;
short code_length;
char code[MAX_NUM_MERGES];
char merged;
char left_or_right; /* left or right child of parent? */
} Item_t;
int get_smallest(Item_t items[], int length);
void write_bit(int bit);
void flush_bits();
int parse_args(int argc, char * argv[], int * ascii_output);
void write_header(Item_t items[], int ascii_output);
void
usage(const char * progname)
{
fprintf(stderr, "Usage: %s [-a] [infile]\n", progname);
}
int
main(int argc, char * argv[])
{
int input, i, j, next_item;
int small1, small2;
Item_t items[NUM_ITEMS];
int ascii_output = 0;
if (parse_args(argc, argv, &ascii_output) != 0)
return EXIT_FAILURE;
for (i = 0; i < NUM_ITEMS; i++)
{
items[i].count = 0;
items[i].merged = 0;
items[i].parent_index = NO_PARENT;
}
/* Read the input */
while ((input = getchar()) != EOF)
items[input].count++;
write_header(items, ascii_output);
/* Merge the items */
next_item = NUM_SYMBOLS;
while (1)
{
small1 = get_smallest(items, NUM_ITEMS);
if (small1 == NO_MORE)
break;
items[small1].merged = 1;
small2 = get_smallest(items, NUM_ITEMS);
if (small2 == NO_MORE)
break;
items[small2].merged = 1;
items[small1].left_or_right = 0;
items[small1].parent_index = next_item;
items[small2].left_or_right = 1;
items[small2].parent_index = next_item;
items[next_item].count = items[small1].count + items[small2].count;
next_item++;
}
/* Compute the codes */
for (i = 0; i < NUM_SYMBOLS; i++)
{
char rev_code[MAX_NUM_MERGES];
int item_index;
if (items[i].count == 0)
continue;
for (j = 0, item_index = i;
items[item_index].parent_index != NO_PARENT; j++)
{
rev_code[j] = items[item_index].left_or_right;
item_index = items[item_index].parent_index;
}
items[i].code_length = 0;
j--;
while (j >= 0)
items[i].code[items[i].code_length++] = rev_code[j--];
#ifdef DEBUG
fprintf(stderr, "Code for char %d is", i);
for (j = 0; j < items[i].code_length; j++)
fprintf(stderr, " %d", items[i].code[j]);
fprintf(stderr, "\n");
#endif
}
/* Output codes for each letter */
rewind(stdin);
if (ascii_output)
while ((input = getchar()) != EOF)
{
for (i = 0; i < items[input].code_length; i++)
printf("%d", items[input].code[i]);
printf("\n");
}
else
{
while ((input = getchar()) != EOF)
for (i = 0; i < items[input].code_length; i++)
write_bit(items[input].code[i]);
flush_bits();
}
return EXIT_SUCCESS;
}
/*
* Function: parse_args
* Purpose: Examine the arguments and freopen() streams as necessary.
* Inputs: argc, argv and a pointer to an int
* Returns: 0 on success, non-zero on failure
*
* Error checking: enough...
* Sample usage: parse_args(argc, argv, &use_ascii_output);
*/
int
parse_args(int argc, char * argv[], int * ascii_output)
{
char * progname = argv[0];
if (argc > 1 && 0 == strcmp(argv[1], "-a"))
{
*ascii_output = 1;
argc--;
argv++;
}
if (argc > 2)
{
usage(argv[0]);
return 1;
}
if (argc > 1)
{
char * outfile;
if (freopen(argv[1], "r", stdin) == NULL)
{
fprintf(stderr, "%s: unable to open input file '%s' for reading\n",
progname, argv[1]);
return 2;
}
outfile = malloc(strlen(argv[1] + 5));
if (outfile == NULL)
{
fprintf(stderr, "%s: unable to allocate memory; quitting\n",
progname);
return 3;
}
strcpy(outfile, argv[1]);
strcat(outfile, ".huf");
if (freopen(outfile, "w", stdout) == NULL)
{
fprintf(stderr, "%s: unable to open output file '%s' for writing\n",
progname, outfile);
return 4;
}
}
return 0;
}
/*
* Function: write_header
* Purpose: Write out the header, which precedes all the codes.
* Inputs: items[] and ascii_output
* Returns: nothing
*
* Error checking: none
* Sample usage: write_header(items, ascii_output);
*
* The header starts with two ints which specify the min and max symbol
* values. Then it follows this with a 'b', 'h' or 'w' according to
* whether the counts are output with unsigned bytes, halfwords or words.
* Then the counts themselves are output in binary.
*/
void
write_header(Item_t items[], int ascii_output)
{
int i, min_symb, max_symb, max_count;
i = 0;
for (i = 0; items[i].count == 0 && i < NUM_SYMBOLS; i++)
;
min_symb = i;
max_symb = i;
max_count = items[i].count;
for (i++; i < NUM_SYMBOLS; i++)
{
if (items[i].count > 0)
{
if (items[i].count > max_count)
max_count = items[i].count;
max_symb = i;
}
}
fwrite(&min_symb, sizeof(min_symb), 1, stdout);
fwrite(&max_symb, sizeof(max_symb), 1, stdout);
if (max_count < 256)
{
unsigned char byte;
fputc((unsigned)'b', stdout);
for (i = min_symb; i <= max_symb; i++)
{
byte = items[i].count;
fwrite(&byte, sizeof(byte), 1, stdout);
}
}
else if (max_count < 65536)
{
uint16_t halfword;
fputc((unsigned)'h', stdout);
for (i = min_symb; i <= max_symb; i++)
{
halfword = items[i].count;
fwrite(&halfword, sizeof(halfword), 1, stdout);
}
}
else
{
uint32_t word;
fputc((unsigned)'w', stdout);
for (i = min_symb; i <= max_symb; i++)
{
word = items[i].count;
fwrite(&word, sizeof(word), 1, stdout);
}
}
if (ascii_output)
printf("\n");
}
/*
* Function: get_smallest
* Purpose: Gets the un-merged item with the smallest count.
* Inputs: items[] and length
* Returns: an int >= 0 or NO_MORE (NO_MORE if all items are merged)
*
* Error checking: none
* Sample usage: get_smallest(items, MAX);
*/
int
get_smallest(Item_t items[], int length)
{
int smallest = NO_MORE; /* Smallest item index */
int i;
for (i = 0; i < length; i++)
{
if (items[i].merged || items[i].count == 0)
continue;
if (smallest == NO_MORE)
smallest = i;
else if (items[i].count < items[smallest].count)
smallest = i;
}
return smallest;
}
/*
* Declare two global variables here, so they can only be used by the
* functions below here.
*
* Note for 2103 students:
* These two functions need to share the two variables. We could pass
* these as parameters to the functions, or combine the two functions into
* one so that the global variables would be static inside the function.
* To make that latter way work, we would need another parameter to the
* write_bit() function to tell it to flush the last few bits, and I like
* that design less than the way I did it.
*/
static unsigned char byte = 0;
static int bits_used = 0;
void
write_bit(int bit)
{
byte <<= 1;
byte |= bit;
if (++bits_used == 8)
{
fwrite(&byte, sizeof(byte), 1, stdout);
byte = 0;
bits_used = 0;
}
}
void
flush_bits()
{
if (bits_used > 0)
{
byte <<= 8 - bits_used;
fwrite(&byte, sizeof(byte), 1, stdout);
byte = 0;
bits_used = 0;
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -