?? word.c

?? source code for arithmatic coding
?? C
字號:
/******************************************************************************File: 		word.cAuthors: 	John Carpinelli   (johnfc@ecr.mu.oz.au)	 	Wayne Salamonsen  (wbs@mundil.cs.mu.oz.au)		Lang Stuiver      (langs@cs.mu.oz.au)Purpose:	Data compression with a word-based model using		arithmetic coding.Copyright 1995 John Carpinelli and Wayne Salamonsen, All Rights Reserved.Copyright 1996 Lang Stuiver.  All Rights Reserved.These programs are supplied free of charge for research purposes only,and may not sold or incorporated into any commercial product.  There isABSOLUTELY NO WARRANTY of any sort, nor any undertaking that they arefit for ANY PURPOSE WHATSOEVER.  Use them at your own risk.  If you dohappen to find a bug, or have modifications to suggest, please reportthe same to Alistair Moffat, alistair@cs.mu.oz.au.  The copyrightnotice above and this statement of conditions must remain an integralpart of each and every copy made of these files.******************************************************************************/#include <stdio.h>#include <stdlib.h>#include <string.h>#include "bitio.h"#include "arith.h"#include "stats.h"#include "main.h"#include "hashtable.h"#define	WORD		0		/* flag to process a word */#define NON_WORD	1		/* flag to process a non-word */#define INIT_CONTEXT	1023		/* initial size of word contexts */#define CHAR_CONTEXT	256		/* length of character contexts */#define BUFFER_SIZE	512		/* size of file input buffer */#define END_OF_MESSAGE  0               /* end of message symbol *//* Macro to specify what a word is */#define ISWORD(c) (((c >= 'A') && (c <= 'Z')) || \		   ((c >= 'a') && (c <= 'z')) || \		   ((c >= '0') && (c <= '9')))/* function prototypes */static void install_symbol_safe(context *pContext, int symbol);static void init_word_model(hash_table *tables[], context *words[]);static void purge_word_model(hash_table *tables[], context *words[]);static void init_char_model(context *characters[], context *lengths[]);static void read_word(char buffer[], int *buffer_length, int *curr_pos, 	       string *pWord, int type);/* global variables */static int base_memory;	       	/* memory used by character model */static unsigned int nWords[2]; 	/* counts number of words */static unsigned int nDistinctWords[2];	/* counts number of distinct words */#ifdef RCSIDstatic char   rcsid[] = "$Id: word.c,v 1.1 1996/08/07 01:34:11 langs Exp $";#endif/* * * print the results of compressing/decompressing a file * */void print_results_word(int operation){	fprintf(stderr, "\n" 		"                              words           non-words\n");	fprintf(stderr, "Words read             : %10u          %10u\n", 		nWords[0], nWords[1]);	fprintf(stderr, "Distinct words         : %10u          %10u\n",		nDistinctWords[0], nDistinctWords[1]);}/* * Installs a symbol, if it can't, it halts the program with an error * message.  Makes sure initial symbols are always added. */static void install_symbol_safe(context *pContext, int symbol){  if (install_symbol(pContext, symbol) == TOO_MANY_SYMBOLS)	{	  fprintf(stderr,"TOO_MANY_SYMBOLS error installing initial symbols\n");	  fprintf(stderr,"(Perhaps F_bits is too small?)\n");	  exit(1);	}}/* * * initialize the word/non-word context and hash tables * */static void init_word_model(hash_table *tables[], context *words[]){    tables[WORD] = create_table();    tables[NON_WORD] = create_table();    words[WORD] = create_context(INIT_CONTEXT, DYNAMIC);    words[NON_WORD] = create_context(INIT_CONTEXT, DYNAMIC);    if (tables[WORD]==NULL || tables[NON_WORD]==NULL)	{ fprintf(stderr,"init_word_model(): Unable to create word tables!\n");	  exit(1); 	}        /* add end of message symbol to word contexts */    install_symbol_safe(words[WORD], END_OF_MESSAGE);    install_symbol_safe(words[NON_WORD], END_OF_MESSAGE);    get_memory(2 * MEM_PER_SYMBOL);		/* record memory used */}/* * * free all memory associated with the word and non-word models * then create empty models. * */static void purge_word_model(hash_table *tables[], context *words[]){    /* free the memory used by the word models */    purge_context(words[WORD]);    purge_context(words[NON_WORD]);    purge_table(tables[WORD]);    purge_table(tables[NON_WORD]);    /* rebuild the hash tables with no entries */    purge_memory();			/* set memory count back to zero */    get_memory(base_memory);    tables[WORD] = create_table();    tables[NON_WORD] = create_table();    if (tables[WORD]==NULL || tables[NON_WORD]==NULL)	{ fprintf(stderr,		  "purge_word_model(): Unable to recreate word tables!\n");	  exit(1); 	}    /* add end of message symbol to word contexts */    install_symbol_safe(words[WORD], END_OF_MESSAGE);    install_symbol_safe(words[NON_WORD], END_OF_MESSAGE);}/* * * initialize the character and length contexts * */static void init_char_model(context *characters[], context *lengths[]){    int i;    /* initialize the character and length contexts */    characters[WORD] = create_context(CHAR_CONTEXT, STATIC);    characters[NON_WORD] = create_context(CHAR_CONTEXT, STATIC);    lengths[WORD] = create_context(MAX_WORD_LEN+1, STATIC);    lengths[NON_WORD] = create_context(MAX_WORD_LEN+1, STATIC);    /* initialise char contexts with all chars having a frequency of 1 */     for (i = 0; i < CHAR_CONTEXT; i++)    {	if (ISWORD(i)) 	    install_symbol_safe(characters[WORD], i);	else	    install_symbol_safe(characters[NON_WORD], i);    }    for (i = 0; i <= MAX_WORD_LEN; i++)    {	install_symbol_safe(lengths[WORD], i);	install_symbol_safe(lengths[NON_WORD], i);    }    /* record memory used by character and length contexts */    get_memory(2 * MAX_WORD_LEN * MEM_PER_SYMBOL);    get_memory(2 * CHAR_CONTEXT * MEM_PER_SYMBOL);}/* * * compress with word based model using i/o in bitio.c * */void encode_word(void){    char	buffer[BUFFER_SIZE];    int		buffer_len, buffer_pos = 0, word_no, i, type;    string	curr_word;    context	*words[2], *characters[2], *lengths[2];    hash_table	*tables[2];    /* set up the character and length contexts */    init_char_model(characters, lengths);    /* initialize the word and non-word contexts and hash tables */    init_word_model(tables, words);    base_memory = get_memory(0);		/* record base memory level */    buffer_len = 0;    startoutputtingbits();    start_encode();        /* start processing with a word */    type = WORD;    for (;;)    {	read_word(buffer, &buffer_len, &buffer_pos, &curr_word, type);	if ((buffer_len == 0) && (curr_word.length == 0))	    break;	nWords[type]++;	word_no = lookup_word(&curr_word, tables[type]);	if (encode(words[type], word_no) == NOT_KNOWN)	{	    /* spell out new word before adding to list of words */	    encode(lengths[type], curr_word.length);	    	    for (i = 0; i<curr_word.length; i++)		encode(characters[type], curr_word.text[i]);	    	    /* add word to hash table, and install new symbol */	    if ((word_no = add_word(&curr_word, tables[type])) == NOMEMLEFT ||		(install_symbol(words[type], word_no) != 0))		/* purge word model if memory or symbol limit is exceeded */		{		    if (verbose)			fprintf(stderr, "Reached %s limit "					"adding new word...purging\n",				word_no == NOMEMLEFT ? "memory" : "symbol");		    purge_word_model(tables, words);		}	    nDistinctWords[type]++;	} 	type = !type;				/* toggle WORD/NON_WORD type */    }     encode(words[type], END_OF_MESSAGE);	/* encode end of message */    finish_encode();    doneoutputtingbits();}/* * * uncompress with a word based model using bitio.c for i/o * */void decode_word(void){    int i, symbol, type, length;    hash_table *tables[2];    context *words[2], *characters[2], *lengths[2];    string word;    unsigned char *pWord;        /* set up the character and length contexts */    init_char_model(characters, lengths);    /* initialize word/non-word contexts and hash tables */    init_word_model(tables, words);    base_memory = get_memory(0);		/* record base memory level */    startinputtingbits();    start_decode();    type = WORD;				/* first symbol is a WORD */    for (;;)    {	symbol = decode(words[type]);	if (symbol == END_OF_MESSAGE)	    break;	nWords[type]++;	if (symbol == NOT_KNOWN)	{      	    /* read in the length, then the spelling of a new word */	    word.length = decode(lengths[type]);	    for (i = 0; i<word.length; i++)		word.text[i] = decode(characters[type]);	    pWord = word.text;	    length = word.length;	    nDistinctWords[type]++;	    /* add new word to hash table, and install new symbol */	    if (((symbol = add_word(&word, tables[type])) == NOMEMLEFT) || 		(install_symbol(words[type], symbol) != 0))		{		    /* purge word model if memory limit exceeded */		    if (verbose)			fprintf(stderr, "Reached %s limit "					"adding new word...purging\n",				symbol == NOMEMLEFT ? "memory" : "symbol");		    purge_word_model(tables, words);		}	}	else	    get_word(tables[type], symbol, &pWord, &length);	/* output the word to standard out */	BITIO_FWRITE(pWord, length, 1);	type = !type;			/* toggle between WORD/NON_WORD */    }     finish_decode();    doneinputtingbits();}/* * * read word or non-word from stdin and update the buffer_length  * and buffer_position variables * */static voidread_word(char buffer[], int *buffer_length, int *curr_pos, string *pWord,	  int type){    pWord->length = 0;    while (pWord->length < MAX_WORD_LEN)    {	if (*buffer_length == 0)	{	    /* 	     * if buffer is empty then fill it, using fread. If file to be             * encoded is empty then return current word	     */ 	    if ((*buffer_length = BITIO_FREAD(buffer, 1, BUFFER_SIZE)) == 0)		return;	    *curr_pos = 0;	}		/* 	 * terminate on non-word character if type = WORD (0)	 * or word character if type = NON_WORD (1)	 */	if ((!ISWORD(buffer[*curr_pos])) ^ type)	    return;	else	{	    pWord->text[pWord->length] = buffer[*curr_pos];	    pWord->length += 1;	    *curr_pos += 1;	    *buffer_length -= 1;	}    }}
?? 文件大小 41 K
?? 上傳用戶 KMPlayer33
?? 所屬分類數值算法/人工智能
??? 相關標簽

#arithmatic #source #coding #code
?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

?? word.c

?? 快捷鍵說明