?? agrep.c
字號(hào):
/* * bgopal: (1993-4) added a library interface and removed some bugs: also * selectively modified many routines to work with our text-compression algo. */#include <sys/stat.h>#include "agrep.h"#include "checkfile.h"#include <errno.h>#define PRINT(s)extern char **environ;extern int errno;int pattern_index; /* index in argv where the pattern is */int glimpse_isserver=0; /* so that there is no user interaction */int glimpse_call = 0; /* So that usage message is not printed twice */int glimpse_clientdied=0;/* to quit search if glimpseserver's client dies */int agrep_initialfd; /* Where does input come from? File/Memory? */CHAR *agrep_inbuffer;int agrep_inlen;int agrep_inpointer;FILE *agrep_finalfp; /* Where does output go to? File/Memory? */CHAR *agrep_outbuffer;int agrep_outlen;int agrep_outpointer;int execfd; /* used by exec called within agrep_search, set in agrep_init */int multifd = -1; /* fd for multipattern search used in ^^ , set in ^^^^^^^^ */extern char *pat_spool;#if DOTCOMPRESSEDextern char *tc_pat_spool;#endif /* DOTCOMPRESSED */char *multibuf=NULL; /* buffer to put the multiple patterns in */int multilen = 0; /* length of the multibuf: not the #of multi-patterns! */extern int pos_cnt; /* to re-initialize it to 0 for reg-exp search */unsigned Mask[MAXSYM];unsigned Init1, NO_ERR_MASK, Init[MaxError];unsigned Bit[WORD+1];CHAR buffer[BlockSize+Maxline+1]; /* should not be used anywhere: 10/18/93 */unsigned Next[MaxNext], Next1[MaxNext];int LIMITOUTPUT; /* maximum number of matches we are going to allow */int LIMITPERFILE; /* maximum number of matches per file we are going to allow */int LIMITTOTALFILE; /* maximum number of files we are going to allow */int EXITONERROR; /* return -1 or exit on error? */int REGEX, FASTREGEX, RE_ERR, FNAME, WHOLELINE, SIMPLEPATTERN;int COUNT, HEAD, TAIL, LINENUM, INVERSE, I, S, DD, AND, SGREP, JUMP; int NOOUTPUTZERO;int Num_Pat, PSIZE, prev_num_of_matched, num_of_matched, files_matched, SILENT, NOPROMPT, BESTMATCH, NOUPPER;int NOMATCH, TRUNCATE, FIRST_IN_RE, FIRSTOUTPUT;int WORDBOUND, DELIMITER, D_length, tc_D_length, original_D_length;int EATFIRST, OUTTAIL;int BYTECOUNT;int PRINTOFFSET;int PRINTRECORD;int PRINTNONEXISTENTFILE;int FILEOUT;int DNA;int APPROX;int PAT_FILE; /* multiple patterns from a given file */char PAT_FILE_NAME[MAX_LINE_LEN];int PAT_BUFFER; /* multiple patterns from a given buffer */int CONSTANT;int RECURSIVE;int total_line; /* used in mgrep */int D;int M;int TCOMPRESSED;int EASYSEARCH; /* 1 used only for compressed files: LITTLE/BIG */int ALWAYSFILENAME = OFF;int POST_FILTER = OFF;int NEW_FILE = OFF; /* only when post-filter is used */int PRINTFILENUMBER = OFF;int PRINTFILETIME = OFF;int PRINTPATTERN = OFF;int MULTI_OUTPUT = OFF; /* should mgrep print the matched line multiple times for each matched pattern or just once? *//* invisible to the user, used only by glimpse: cannot use -l since it is incompatible with stdin and -A is used for the index search (done next) *//* Stuff to handle complicated boolean patterns */int AComplexBoolean = 0;ParseTree *AParse = NULL;int anum_terminals = 0;ParseTree aterminals[MAXNUM_PAT];char amatched_terminals[MAXNUM_PAT];char aduplicates[MAXNUM_PAT][MAXNUM_PAT]; /* tells what other patterns are exactly equal to the i-th one */char tc_aduplicates[MAXNUM_PAT][MAXNUM_PAT]; /* tells what other patterns are exactly equal to the i-th one */#if MEASURE_TIMES/* timing variables */int OUTFILTER_ms;int FILTERALGO_ms;int INFILTER_ms;#endif /*MEASURE_TIMES*/CHAR **Textfiles = NULL; /* array of filenames to be searched */int Numfiles = 0; /* indicates how many files in Textfiles */int copied_from_argv = 0; /* were filenames copied from argv (should I free 'em)? */CHAR old_D_pat[MaxDelimit * 2] = "\n"; /* to hold original D_pattern */CHAR original_old_D_pat[MaxDelimit * 2] = "\n";CHAR Pattern[MAXPAT], OldPattern[MAXPAT];CHAR CurrentFileName[MAX_LINE_LEN];long CurrentFileTime;int SetCurrentFileName = 0; /* dirty glimpse trick to make filters work: output seems to come from another file */int SetCurrentFileTime = 0; /* dirty glimpse trick to avoid doing a stat to find the time */int CurrentByteOffset;int SetCurrentByteOffset = 0;CHAR Progname[MAXNAME]; CHAR D_pattern[MaxDelimit * 2] = "\n; "; /* string which delimits records -- defaults to newline */CHAR tc_D_pattern[MaxDelimit * 2] = "\n";CHAR original_D_pattern[MaxDelimit * 2] = "\n; ";char COMP_DIR[MAX_LINE_LEN];char FREQ_FILE[MAX_LINE_LEN], HASH_FILE[MAX_LINE_LEN], STRING_FILE[MAX_LINE_LEN]; /* interfacing with tcompress */int NOFILENAME, /* Boolean flag, set for -h option */ FILENAMEONLY;/* Boolean flag, set for -l option */extern int init();int table[WORD][WORD];CHAR *agrep_saved_pattern = NULL; /* to prevent multiple prepfs for each boolean search: crd@hplb.hpl.hp.com */longaget_file_time(stbuf, name) struct stat *stbuf; char *name;{ long ret = 0; struct stat mystbuf; if (stbuf != NULL) ret = stbuf->st_mtime; else { if (my_stat(name, &mystbuf) == -1) ret = 0; else ret = mystbuf.st_mtime; } return ret;}char *aprint_file_time(thetime) time_t thetime;{#if 0 char s[256], s1[16], s2[16], s3[16], s4[16], s5[16]; static char buffer[256]; strcpy(s, ctime(&thetime)); /* of the form: Sun Sep 16 01:03:52 1973\n\0 */ s[strlen(s) - 1] = '\0'; sscanf(s, "%s%s%s%s%s", s1, s2, s3, s4, s5); sprintf(buffer, ": %s %s %s", s2, s3, s5); /* ditch Sun 01:03:52 */#else static char buffer[256]; buffer[0] = ':'; buffer[1] = ' '; strftime(&buffer[2], 256, "%h %e %Y", gmtime(&thetime));#endif return &buffer[0];}/* Called when multipattern search and pattern has not changed */voidreinit_value_partial(){ num_of_matched = prev_num_of_matched = 0; errno = 0; FIRST_IN_RE = ON;}/* This must be called before every agrep_search to reset agrep globals */voidreinit_value(){ int i, j; /* Added on 7th Oct 194 */ if (AParse) { if (AComplexBoolean) destroy_tree(AParse); AComplexBoolean = 0; AParse = 0; PAT_BUFFER = 0; if (multibuf != NULL) free(multibuf); /* this was allocated for arbit booleans, not multipattern search */ multibuf = NULL; multilen = 0; /* Cannot free multifd here since that is always allocated for multipattern search */ } for (i=0; i<anum_terminals; i++) { free(aterminals[i].data.leaf.value); memset(&aterminals[i], '\0', sizeof(ParseTree)); } anum_terminals = 0; for (i=0; i<MAXNUM_PAT; i++) memset(aduplicates[i], '\0', MAXNUM_PAT); for (i=0; i<MAXNUM_PAT; i++) memset(tc_aduplicates[i], '\0', MAXNUM_PAT); Bit[WORD] = 1; for (i = WORD - 1; i > 0 ; i--) Bit[i] = Bit[i+1] << 1; for (i=0; i< MAXSYM; i++) Mask[i] = 0; /* bg: new things added on Mar 13 94 */ Init1 = 0; NO_ERR_MASK = 0; memset(Init, '\0', MaxError * sizeof(unsigned)); memset(Next, '\0', MaxNext * sizeof(unsigned)); memset(Next1, '\0', MaxNext * sizeof(unsigned)); wildmask = endposition = D_endpos = 0; for (i=0; i<WORD; i++) for (j=0; j<WORD; j++) table[i][j] = 0; strcpy(D_pattern, original_D_pattern); D_length = original_D_length; strcpy(old_D_pat, original_old_D_pat); /* Changed on Dec 26th: bg */ FASTREGEX = REGEX = 0; HEAD = TAIL = ON; /* were off initially */ RE_ERR = 0; AND = 0; M = 0; pos_cnt = 0; /* added 31 Jan 95 */ reinit_value_partial();}/* This must be called before every agrep_init to reset agrep options */voidinitial_value(){ SetCurrentFileName = 0; /* 16/9/94 */ SetCurrentFileTime = 0; SetCurrentByteOffset = 0; /* 23/9/94 */ /* courtesy: crd@hplb.hpl.hp.com */ if (agrep_saved_pattern) { free(agrep_saved_pattern); agrep_saved_pattern= NULL; } /* bg: new stuff on 17/Feb/94 */ if (multifd != -1) close(multifd); multifd = -1; if (multibuf != NULL) free(multibuf); multibuf = NULL; multilen = 0; if (pat_spool != NULL) free(pat_spool); pat_spool = NULL;#if DOTCOMPRESSED if (tc_pat_spool != NULL) free(tc_pat_spool); tc_pat_spool = NULL;#endif /* DOTCOMPRESSED */ LIMITOUTPUT = 0; /* means infinity = current semantics */ LIMITPERFILE = 0; /* means infinity = current semantics */ LIMITTOTALFILE = 0; /* means infinity = current semantics */ EASYSEARCH = 1; DNA = APPROX = PAT_FILE = PAT_BUFFER = CONSTANT = total_line = D = TCOMPRESSED = 0; PAT_FILE_NAME[0] = '\0'; EXITONERROR = NOFILENAME = FILENAMEONLY = FILEOUT = ALWAYSFILENAME = NEW_FILE = POST_FILTER = 0; original_old_D_pat[0] = old_D_pat[0] = '\n'; original_old_D_pat[1] = old_D_pat[1] = '\0'; original_D_pattern[0] = D_pattern[0] = '\n'; original_D_pattern[1] = D_pattern[1] = ';'; original_D_pattern[2] = D_pattern[2] = ' '; original_D_pattern[3] = D_pattern[3] = '\0'; strcpy(tc_D_pattern, "\n"); tc_D_length = 1; /* the functions agrep_init and agrep_search take care of Textfiles and Numfiles */ agrep_inpointer = 0; agrep_outpointer = 0; agrep_outlen = 0;#if MEASURE_TIMES OUTFILTER_ms = FILTERALGO_ms = INFILTER_ms = 0;#endif /*MEASURE_TIMES*/ MULTI_OUTPUT = 0; PRINTPATTERN = 0; PRINTFILENUMBER = 0; PRINTFILETIME = 0; JUMP = FNAME = BESTMATCH = NOPROMPT = NOUPPER = 0; RECURSIVE = 0; COUNT = LINENUM = WHOLELINE = SGREP = 0; NOOUTPUTZERO = 0; EATFIRST = INVERSE = TRUNCATE = OUTTAIL = 0; NOMATCH = FIRSTOUTPUT = ON; /* were off initally */ I = DD = S = 1; /* were off initially */ original_D_length = D_length = 2; /* was 0 initially */ SILENT = Num_Pat = PSIZE = SIMPLEPATTERN = prev_num_of_matched = num_of_matched = files_matched = 0; WORDBOUND = DELIMITER = 0; COMP_DIR[0] = '\0'; FREQ_FILE[0] = '\0'; HASH_FILE[0] = '\0'; STRING_FILE[0] = '\0'; BYTECOUNT = OFF; PRINTOFFSET = OFF; PRINTRECORD = ON; PRINTNONEXISTENTFILE = OFF; glimpse_clientdied = 0; /* added 15th Feb 95 */ /* Pattern, OldPattern, execfd, Numfiles are set in agrep_init: so no need to initialize */ reinit_value();}voidcompute_next(M, Next, Next1)int M; unsigned *Next, *Next1;{ int i, j=0, n, k, temp; int mid, pp; int MM, base; unsigned V[WORD]; base = WORD - M; temp = Bit[base]; Bit[base] = 0; for (i=0; i<WORD; i++) V[i] = 0; for (i=1; i<M; i++) { j=0; while (table[i][j] > 0 && j < 10) { V[i] = V[i] | Bit[base + table[i][j++]]; } } Bit[base]=temp; if(M <= SHORTREG) { k = exponen(M); pp = 2*k; for(i=k; i<pp ; i++) { n = i; Next[i]= (k>>1); for(j=M; j>=1; j--) { if(n & Bit[WORD]) Next[i] = Next[i] | V[j]; n = (n>>1); } } return; } if(M > MAXREG) fprintf(stderr, "%s: regular expression too long\n", Progname); MM = M; if(M & 1) M=M+1; k = exponen(M/2); pp = 2*k; mid = MM/2; for(i=k; i<pp ; i++) { n = i; Next[i]= (Bit[base]>>1); for(j=MM; j>mid ; j--) { if(n & Bit[WORD]) Next[i] = Next[i] | V[j-mid]; n = (n>>1); } n=i-k; Next1[i-k] = 0; for(j = 0; j<mid; j++) { if(n & Bit[WORD]) Next1[i-k] = Next1[i-k] | V[MM-j]; n = (n>>1); } } return;}intexponen(m)int m;{ int i, ex; ex= 1; for (i=0; i<m; i++) ex <<= 1; /* was ex *= 2 */ return(ex);}intre1(Text, M, D)int Text, M, D;{ register unsigned i, c, r0, r1, r2, r3, CMask, Newline, Init0, r_NO_ERR; register unsigned end; register unsigned hh, LL=0, k; /* Lower part */ int FIRST_TIME=ON, num_read , j=0, base; unsigned A[MaxRerror+1], B[MaxRerror+1]; unsigned Next[MaxNext], Next1[MaxNext]; CHAR *buffer; int FIRST_LOOP = 1; r_NO_ERR = NO_ERR_MASK; if(M > 30) { fprintf(stderr, "%s: regular expression too long\n", Progname); if (!EXITONERROR){ errno = AGREP_ERROR; return -1; } else exit(2); } base = WORD - M; hh = M/2; for(i=WORD, j=0; j < hh ; i--, j++) LL = LL | Bit[i]; if(FIRST_IN_RE) compute_next(M, Next, Next1); /*SUN: try: change to memory allocation */ FIRST_IN_RE = 0; Newline = '\n'; Init[0] = Bit[base]; if(HEAD) Init[0] = Init[0] | Bit[base+1]; for(i=1; i<= D; i++) Init[i] = Init[i-1] | Next[Init[i-1]>>hh] | Next1[Init[i-1]&LL]; Init1 = Init[0] | 1; Init0 = Init[0];
?? 快捷鍵說(shuō)明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -