?? agrep.c
字號:
/*
AGREP APPROXIMATE PATTERN - GREP.
Copyright (c) 1994-1997 Sun Wu, Udi Manber, Burra Gopal, Tom Gries (for OS/2)
All Rights Reserved.
[fix] 3.35 TG 11.12.97 agrep.c: -f function did not work because of a simple
problem in the commandline parser: the patternfile preprocessing
prepf() was done before definition of the codepage. Oops.
[fix] 3.34 newmgrep.c: input buffer was used before its start address
[chg] 3.33 TG 02.03.97 when no target filename(s) were given:
AGREP displays an error message now
instead of reading from stdin.
Solves the following problem:
When one uses "AGREP <needle> *" and there are
no files in that subdirectory,
the 3.32 has waited for stdin (=haystack, target).
[new] verbose option -V5 dumps codepage table
[chg] compiled with emx 0.9c
[chg] TG 18.02.97 new homepage, see AGREPHLP.C.
I keep the same version number.
[chg] 3.32 TG 15.01.97 new links, helppage revised
[new] 3.31 TG 16.12.96 new option -i0
[fix] 3.29 TG 14.10.96 see version.h
[new] 3.21 TG 07.10.96 Verbose option -V
[new] 3.20 TG 06.10.96 multi-codepage support
now implemented CP437 and CP850
[new] 3.19 TG 05.10.96 environment variable AGREPOPTS
[new] 3.14 TG 25.09.96 new option -i#:
let letters match letters, digits match digits,
others match others
[new] 3.12 dynamic metasymbol assignment (in preparation)
[chg] returncode = total number of hits
[new] 3.11 TG 23.09.96 option -ia maps all ISO characters to nearest ASCII
[chg] 3.10 TG 22.09.96 handling and check for metasymbols in searchstring
[fix] 3.09 TG 22.09.96 BITAP.C: type CHAR instead of type char
[new] AGREP.C: Grand Total
[fix] 3.08 TG 16.09.96
- A dummy printf("") statement is now used to avoid the
following memory(?) problem:
now and then, when using the delimiter option and
on big files and piping AGREP's output to a file..
Example: AGREP -d "/AN " -i "next;block" infile > outfile
..AGREP has crashed with SYS3175 (access violation).
- provisonally un-commenting the diagnostic error message
for OUTPUT_OVERFLOW (in AGRPEP.H)
[chg] 3.07 [TG]
- improved performance for CP850 upper to lower translation
(now using a look-up table)
- improved help screens
[chg] 3.04 R.M. Thomas [MT] & Th. Gries [TG] July/August 1996:
- conditional compiling using the __EMX__ flag
- EMX: use of _wildcard() function to expand wildcards
- using constants AGREP_VERSION, AGREP_DATE in agrep.h
[ini] 3.x bgopal: (1993-1994)
- Added a library interface and removed some bugs.
- Also selectively modified many routines to work
with our text-compression algo.
*/
/* definition of an environmentvariable which value is prepended to
to value of the actual command line. [TG]
Example:
SET AGREPOPTS=-i to let AGREP search case-insensitive
*/
unsigned char metasymb[16]; /* we define 16 metasymbols */
#include "agrep.h"
#include "version.h"
#include "codepage.h"
#include "checkfil.h"
char AGREPOPT_STR[MAX_LINE_LEN];
extern char CODEPAGE_STR[MAX_LINE_LEN]; /* holds the selected codepage identifier */
extern int CODEPAGE; /* corresponding number */
extern char CP_MAPPING; /* -i option = case(in)sensitive search ?
is one of 'i', 'a', '#' or 0 */
extern struct CODEPAGE_struct CP[CODEPAGES][CPSIZE];
extern unsigned char LUT[256];
#define PRINT(s)
#ifndef __BORLANDC__
extern char **environ;
extern int errno;
#endif
int pattern_index; /* index in argv where the pattern is */
int glimpse_isserver=0; /* so that there is no user interaction */
int glimpse_call = 0; /* So that usage message is not printed twice */
int glimpse_clientdied=0; /* to quit search if glimpseserver's client dies */
int agrep_initialfd; /* Where does input come from? File/Memory? */
CHAR *agrep_inbuffer;
int agrep_inlen;
int agrep_inpointer;
FILE *agrep_finalfp; /* Where does output go to? File/Memory? */
CHAR *agrep_outbuffer;
int agrep_outlen;
int agrep_outpointer;
int execfd; /* used by exec called within agrep_search, set in agrep_init */
int multifd = -1; /* fd for multipattern search used in ^^ , set in ^^^^^^^^ */
extern char *pat_spool;
#if DOTCOMPRESSED
extern char *tc_pat_spool;
#endif /* DOTCOMPRESSED */
char *multibuf=NULL; /* buffer to put the multiple patterns in */
int multilen = 0; /* length of the multibuf: not the #of multi-patterns! */
extern int pos_cnt; /* to re-initialize it to 0 for reg-exp search */
unsigned Mask[MAXSYM];
unsigned Init1, NO_ERR_MASK, Init[MaxError];
unsigned Bit[WORD+1];
CHAR buffer[BlockSize+Maxline+1]; /* should not be used anywhere: 10/18/93 */
unsigned Next[MaxNext], Next1[MaxNext];
unsigned wildmask, endposition, D_endpos;
int LIMITOUTPUT; /* maximum number of matches we are going to allow */
int LIMITPERFILE; /* maximum number of matches per file we are going to allow */
int LIMITTOTALFILE; /* maximum number of files we are going to allow */
int EXITONERROR; /* return -1 or exit on error? */
int REGEX, FASTREGEX, RE_ERR, FNAME, WHOLELINE, SIMPLEPATTERN;
int COUNT, HEAD, TAIL, LINENUM, INVERSE, I, S, DD, AND, SGREP, JUMP;
int NOOUTPUTZERO;
int Num_Pat, PSIZE, prev_num_of_matched, num_of_matched, files_matched;
int SILENT, NOPROMPT, BESTMATCH, NOUPPER, ISO2ASCII;
int NOMATCH, TRUNCATE, FIRST_IN_RE, FIRSTOUTPUT;
int WORDBOUND, DELIMITER, D_length, tc_D_length, original_D_length;
int EATFIRST, OUTTAIL;
int BYTECOUNT;
int PRINTOFFSET;
int PRINTRECORD;
int VERBOSE=1; /* Verbose default: AGREP shows the Grand Total */
int FILEOUT;
int DNA;
int APPROX;
int PAT_FILE; /* multiple patterns from a given file */
char PAT_FILE_NAME[MAX_LINE_LEN];
int PAT_BUFFER; /* multiple patterns from a given buffer */
int CONSTANT;
int RECURSIVE;
int total_line; /* used in mgrep */
int D;
int M;
int TCOMPRESSED;
int EASYSEARCH; /* 1 used only for compressed files: LITTLE/BIG */
int ALWAYSFILENAME = OFF;
int POST_FILTER = OFF;
int NEW_FILE = OFF; /* only when post-filter is used */
int PRINTFILENUMBER = OFF;
int PRINTPATTERN = OFF;
int MULTI_OUTPUT = OFF; /* should mgrep print the matched line multiple times for each matched pattern or just once? */
/* invisible to the user, used only by glimpse: cannot use -l since it is incompatible with stdin and -A is used for the index search (done next) */
/* Stuff to handle complicated boolean patterns */
int AComplexBoolean = 0;
ParseTree *AParse = NULL;
int anum_terminals = 0;
ParseTree aterminals[MAXNUM_PAT];
char amatched_terminals[MAXNUM_PAT];
#if MEASURE_TIMES
/* timing variables */
int OUTFILTER_ms;
int FILTERALGO_ms;
int INFILTER_ms;
#endif /*MEASURE_TIMES*/
CHAR **Textfiles = NULL; /* array of filenames to be searched */
int Numfiles = 0; /* indicates how many files in Textfiles */
int copied_from_argv = 0; /* were filenames copied from argv (should I free 'em)? */
CHAR old_D_pat[MaxDelimit * 2] = "\n"; /* to hold original D_pattern */
CHAR original_old_D_pat[MaxDelimit * 2] = "\n";
CHAR Pattern[MAXPAT], OldPattern[MAXPAT];
CHAR CurrentFileName[MAX_LINE_LEN];
int SetCurrentFileName = 0; /* dirty glimpse trick to make filters work: output seems to come from another file */
int CurrentByteOffset;
int SetCurrentByteOffset = 0;
CHAR Progname[MAXNAME];
/* string which delimits records -- defaults to newline */
CHAR D_pattern[MaxDelimit * 2] = "\n; ";
CHAR tc_D_pattern[MaxDelimit * 2] = "\n";
CHAR original_D_pattern[MaxDelimit * 2] = "\n; ";
char COMP_DIR[MAX_LINE_LEN];
char FREQ_FILE[MAX_LINE_LEN], HASH_FILE[MAX_LINE_LEN], STRING_FILE[MAX_LINE_LEN]; /* interfacing with tcompress */
int NOFILENAME, /* Boolean flag, set for -h option */
FILENAMEONLY; /* Boolean flag, set for -l option */
extern int init();
int table[WORD][WORD];
CHAR *agrep_saved_pattern = NULL; /* to prevent multiple prepfs for each boolean search: crd@hplb.hpl.hp.com */
#ifdef _WIN32
#include <direct.h>
#include <io.h>
int agrep_usage(); /* agrep.c */
int exec(); /* agrep.c */
int exponen(); /* agrep.c */
int r_output(); /* agrep.c */
int file_out(); /* agrep.c */
void agrep_online_help(); /* agrephlp.c */
int bitap(); /* bitap.c */
int fill_buf(); /* bitap.c */
int check_file(); /* checkfil.c */
int checksg(); /* checksg.c */
int get_current_codepage(); /* codepage.c */
int compat(); /* compat.c */
int maskgen(); /* maskgen.c */
int mgrep(); /* newmgrep.c */
int prepf(); /* newmgrep.c */
int preprocess(); /* preproce.c */
void destroy_tree(); /* putils.c */
int recursive(); /* recursiv.c */
int sgrep(); /* sgrep.c */
int initialize_common(); /* dummyfil.c */
int tuncompressible_filename(); /* dummyfil.c */
int quick_tcompress(); /* dummyfil.c */
#endif
/* Called when multipattern search and pattern has not changed */
void reinit_value_partial()
{
num_of_matched = prev_num_of_matched = 0;
errno = 0;
FIRST_IN_RE = ON;
}
/* This must be called before every agrep_search to reset agrep globals */
void reinit_value()
{
int i, j;
/* Added on 7th Oct 1994 */
if (AParse) {
if (AComplexBoolean) destroy_tree(AParse);
AComplexBoolean = 0;
AParse = 0;
PAT_BUFFER = 0;
if (multibuf != NULL) free(multibuf); /* this was allocated for arbit booleans, not multipattern search */
multibuf = NULL;
multilen = 0;
/* Cannot free multifd here since that is always allocated for multipattern search */
}
for (i=0; i<anum_terminals; i++) {
free(aterminals[i].data.leaf.value);
memset(&aterminals[i], '\0', sizeof(ParseTree));
}
anum_terminals = 0;
Bit[WORD] = 1;
for (i = WORD - 1; i > 0 ; i--) Bit[i] = Bit[i+1] << 1;
for (i=0; i< MAXSYM; i++) Mask[i] = 0;
/* bg: new things added on Mar 13 94 */
Init1 = 0;
NO_ERR_MASK = 0;
memset(Init, '\0', MaxError * sizeof(unsigned));
memset(Next, '\0', MaxNext * sizeof(unsigned));
memset(Next1, '\0', MaxNext * sizeof(unsigned));
wildmask = endposition = D_endpos = 0;
for (i=0; i<WORD; i++)
for (j=0; j<WORD; j++)
table[i][j] = 0;
strcpy(D_pattern, original_D_pattern);
D_length = original_D_length;
strcpy(old_D_pat, original_old_D_pat);
/* Changed on Dec 26th: bg */
FASTREGEX = REGEX = 0;
HEAD = TAIL = ON; /* were off initially */
RE_ERR = 0;
AND = 0;
M = 0;
pos_cnt = 0; /* added 31 Jan 95 */
reinit_value_partial();
}
/* This must be called before every agrep_init to reset agrep options */
void initial_value()
{
SetCurrentFileName = 0; /* 16/9/94 */
SetCurrentByteOffset = 0; /* 23/9/94 */
/* courtesy: crd@hplb.hpl.hp.com */
if (agrep_saved_pattern) {
free(agrep_saved_pattern);
agrep_saved_pattern= NULL;
}
/* bg: new stuff on 17/Feb/94 */
if (multifd != -1) close(multifd);
multifd = -1;
if (multibuf != NULL) free(multibuf);
multibuf = NULL;
multilen = 0;
if (pat_spool != NULL) free(pat_spool);
pat_spool = NULL;
#if DOTCOMPRESSED
if (tc_pat_spool != NULL) free(tc_pat_spool);
tc_pat_spool = NULL;
#endif /* DOTCOMPRESSED */
LIMITOUTPUT = 0; /* means infinity = current semantics */
LIMITPERFILE = 0; /* means infinity = current semantics */
LIMITTOTALFILE = 0; /* means infinity = current semantics */
EASYSEARCH = 1;
DNA = APPROX = PAT_FILE = PAT_BUFFER = CONSTANT = total_line = D = TCOMPRESSED = 0;
PAT_FILE_NAME[0] = '\0';
EXITONERROR = NOFILENAME = FILENAMEONLY = FILEOUT = ALWAYSFILENAME = NEW_FILE = POST_FILTER = 0;
original_old_D_pat[0] = old_D_pat[0] = '\n';
original_old_D_pat[1] = old_D_pat[1] = '\0';
original_D_pattern[0] = D_pattern[0] = '\n';
original_D_pattern[1] = D_pattern[1] = ';';
original_D_pattern[2] = D_pattern[2] = ' ';
original_D_pattern[3] = D_pattern[3] = '\0';
strcpy(tc_D_pattern, "\n");
tc_D_length = 1;
/* the functions agrep_init and agrep_search take care of Textfiles and Numfiles */
agrep_inpointer = 0;
agrep_outpointer = 0;
agrep_outlen = 0;
#if MEASURE_TIMES
OUTFILTER_ms = FILTERALGO_ms = INFILTER_ms = 0;
#endif /*MEASURE_TIMES*/
MULTI_OUTPUT = 0;
PRINTPATTERN = 0;
PRINTFILENUMBER = 0;
JUMP = FNAME = BESTMATCH = NOPROMPT = NOUPPER = ISO2ASCII = 0 ;
RECURSIVE = 0;
COUNT = LINENUM = WHOLELINE = SGREP = 0;
NOOUTPUTZERO = 0;
EATFIRST = INVERSE = TRUNCATE = OUTTAIL = 0;
NOMATCH = FIRSTOUTPUT = ON; /* were off initally */
I = DD = S = 1; /* were off initially */
original_D_length = D_length = 2; /* was 0 initially */
SILENT = Num_Pat = PSIZE = SIMPLEPATTERN = prev_num_of_matched = num_of_matched = files_matched = 0;
WORDBOUND = DELIMITER = 0;
COMP_DIR[0] = '\0';
FREQ_FILE[0] = '\0';
HASH_FILE[0] = '\0';
STRING_FILE[0] = '\0';
BYTECOUNT = OFF;
PRINTOFFSET = OFF;
PRINTRECORD = ON;
glimpse_clientdied = 0; /* added 15th Feb 95 */
/* Pattern, OldPattern, execfd, Numfiles are set in agrep_init: so no need to initialize */
reinit_value();
}
void compute_next(M, Next, Next1)
int M;
unsigned *Next, *Next1;
{
int i, j=0, n, k, temp;
int mid, pp;
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -