?? io.c
字號:
/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. *//* ./glimpse/index/io.c */#include "glimpse.h"#include <stdio.h>#include <sys/stat.h>#include <errno.h>extern char INDEX_DIR[MAX_LINE_LEN];extern int memory_usage;#include "utils.c"int REAL_INDEX_BUF = DEF_REAL_INDEX_BUF, MAX_ALL_INDEX = DEF_MAX_ALL_INDEX, FILEMASK_SIZE = DEF_FILEMASK_SIZE, REAL_PARTITION = DEF_REAL_PARTITION;/* Escapes single quotes in "original" string with backquote (\) s.t. it can be passed on to the shell as a file name: returns its second argument for printf *//* Called before passing any argument to the system() routine in glimpse or glimspeindex source code *//* Works only if the new name is going to be passed as argument to the shell within two ''s */char *escapesinglequote(original, new) char *original, *new;{ char *oldnew = new; while (*original != '\0') { if (*original == '\'') { *new ++ = '\''; /* close existing ' : this guy will be a part of a file name starting from a ' */ *new ++ = '\\'; /* add escape character */ *new ++ = '\''; /* add single quote from original here */ } *new ++ = *original ++; /* start the real single quote to continute existing file name if *original was ' */ } *new = *original; return oldnew;}/* --------------------------------------------------------------------get_array_of_lines()input: an input filename, address of the table, maximum number of entriesof the table, and a overflow handling flag.output: a set of strings in the table.when overflow is ON, the function returns after the table is filled.otherwise the function will exit if overflow occurs.In normal return, the function returns the number of entries read.----------------------------------------------------------------------*/get_array_of_lines(inputfile, table, max_entry, overflow_ok)char *inputfile;char **table[];int max_entry; /* max number of entries in the table */int overflow_ok; /* flag for handling overflow */{ int tx=0; /* index for table */ FILE *file_in; unsigned char buffer[MAX_NAME_BUF]; char *np; int line_length; int num_lines; if((file_in = fopen(inputfile, "r")) == NULL) { if (overflow_ok) return 0; fprintf(stderr, "can't open for reading: %s\n", inputfile); exit(2); } fgets(buffer, MAX_NAME_BUF, file_in); sscanf(buffer, "%d", &num_lines); if ((num_lines < 0) || (num_lines > MaxNum24bPartition)) { fclose(file_in); if (overflow_ok) return 0; fprintf(stderr, "Error in reading: %s\n", inputfile); exit(2); } while(fgets(buffer, MAX_NAME_BUF, file_in)) { line_length = strlen(buffer); if (line_length == 1) continue; buffer[line_length-1] = '\0'; /* discard the '\n' */#if BG_DEBUG np = (char *) my_malloc(sizeof(char) * (line_length + 2));#else /*BG_DEBUG*/ np = (char *) my_malloc(sizeof(char) * (line_length + 2));#endif /*BG_DEBUG*/ if(np == NULL) { int i=0; fclose(file_in); for (i=0; i<tx; i++) {#if BG_DEBUG memory_usage -= (strlen(LIST_GET(table, i)) + 2);#endif /*BG_DEBUG*/ if (LIST_GET(table, i) != NULL) { my_free(LIST_GET(table, i), 0); LIST_SUREGET(table, i) = NULL; } } if (overflow_ok) { fclose(file_in); return 0; } fprintf(stderr, "malloc failure in get_array_of_lines\n"); exit(2); } LIST_ADD(table, tx, np, char*); tx ++; /* table[tx++] = (unsigned char *)np; */ strcpy(np, buffer); if(tx > max_entry) { fclose(file_in); if(overflow_ok) { fclose(file_in); return(tx); } fprintf(stderr, "overflow in get_array_of_lines()\n"); exit(2); } } fclose(file_in); return(tx); /* return number of lines read */}/* --------------------------------------------------------------------get_table():input: an input filename, address of the table, maximum number of entriesof the table, and a overflow handling flag.output: a set of integers in the table.when overflow_ok is ON, the function returns after the table is filled.otherwise the function will exit if overflow occurs.In normal return, the function returns the number of entries read.----------------------------------------------------------------------*/int get_table(inputfile, table, max_entry, overflow_ok)char *inputfile;int table[];int max_entry;int overflow_ok;{ int val = 0; int c = 0; FILE *file_in; int tx=0; /* number of entries read */ if((file_in = fopen(inputfile, "r")) == NULL) { if (overflow_ok) return 0; fprintf(stderr, "can't open %s for reading\n", inputfile); exit(2); } while((c = getc(file_in)) != EOF) { val = c << 24; if ((c = getc(file_in)) == EOF) break; val |= c << 16; if ((c = getc(file_in)) == EOF) break; val |= c << 8; if ((c = getc(file_in)) == EOF) break; val |= c; table[tx++] = val; if(tx > max_entry) { if(!overflow_ok) { fprintf(stderr, "in get_table: table overflow\n"); exit(2); } break; } } fclose(file_in); return(tx);}get_index_type(s, dashn, num, attr, delim)char s[];int *dashn, *num, *attr;char delim[];{ FILE *fp = fopen(s, "r"); char buf[MAX_LINE_LEN]; *dashn = *num = *attr = 0; *delim = '\0'; if (fp == NULL) return 0; fscanf(fp, "%s\n%%%d\n%%%d%s\n", buf, num, attr, delim); /* printf("get_index_type(): %s %d %d %s\n", buf, num, attr, delim); */ fclose(fp); if (strstr(buf, "1234567890")) *dashn = ON; return *num;}/* Read offset from srcbuf first so that you can use it with srcbuf=destbuf */get_block_numbers(srcbuf, destbuf, partfp) unsigned char *srcbuf, *destbuf; FILE *partfp;{ int offset, pat_size; static int printederror = 0; /* Does not do caching of blocks seen so far: done in OS hopefully */ offset = (srcbuf[0] << 24) | (srcbuf[1] << 16) | (srcbuf[2] << 8) | (srcbuf[3]); pat_size = decode32b(offset); if (-1 == fseek(partfp, pat_size, 0)) { if (!printederror) { fprintf(stderr, "Warning! Error in the format of the index!\n"); printederror = 1; } } destbuf[0] = '\n'; destbuf[1] = '\0'; destbuf[2] = '\0'; destbuf[3] = '\0'; if (fgets(destbuf, REAL_INDEX_BUF - MAX_WORD_BUF - 1, partfp) == NULL) { destbuf[0] = '\n'; destbuf[1] = '\0'; destbuf[2] = '\0'; destbuf[3] = '\0'; }}int num_filter=0;int filter_len[MAX_FILTER];CHAR *filter[MAX_FILTER];CHAR *filter_command[MAX_FILTER];struct stat filstbuf;read_filters(index_dir, dofilter)char *index_dir;int dofilter;{ int len; int patlen; int patpos; int commandpos; FILE *filterfile; char filterbuf[MAX_LINE_LEN]; char tempbuf[MAX_LINE_LEN]; char s[MAX_LINE_LEN]; num_filter = 0; memset(filter, '\0', sizeof(CHAR *) * MAX_FILTER); memset(filter_command, '\0', sizeof(CHAR *) * MAX_FILTER); memset(filter_len, '\0', sizeof(int) * MAX_FILTER); if (!dofilter) return; sprintf(s, "%s/%s", index_dir, FILTER_FILE); filterfile = fopen(s, "r"); if(filterfile == NULL) { /* fprintf(stderr, "can't open filter file %s\n", s); -- no need */ num_filter = 0; } else if (fstat(fileno(filterfile), &filstbuf) == -1) { num_filter = 0; } else { while((num_filter < MAX_FILTER) && fgets(filterbuf, MAX_LINE_LEN, filterfile)) { if ((len = strlen(filterbuf)) < 1) continue; filterbuf[len-1] = '\0'; commandpos = 0; while ((commandpos < len) && ((filterbuf[commandpos] == ' ') || (filterbuf[commandpos] == '\t'))) commandpos ++; /* leading spaces */ if (commandpos >= len) continue; if (filterbuf[commandpos] == '\'') { commandpos ++; patpos = commandpos; patlen = 0; while (commandpos < len) { if (filterbuf[commandpos] == '\\') { commandpos += 2; patlen += 2; } else if (filterbuf[commandpos] != '\'') { commandpos ++; patlen ++; } else break; } if ((commandpos >= len) || (patlen <= 0)) continue; commandpos ++; } else { patpos = commandpos; patlen = 0; while ((commandpos < len) && (filterbuf[commandpos] != ' ') && (filterbuf[commandpos] != '\t')) { commandpos ++; patlen ++; } while ((commandpos < len) && ((filterbuf[commandpos] == ' ') || (filterbuf[commandpos] == '\t'))) commandpos ++; if (commandpos >= len) continue; } memcpy(tempbuf, &filterbuf[patpos], patlen); tempbuf[patlen] = '\0'; if ((filter_len[num_filter] = convert2agrepregexp(tempbuf, patlen)) == 0) continue; /* inplace conversion */ filter[num_filter] = (unsigned char *) strdup(tempbuf); filter_command[num_filter] = (unsigned char *)strdup(&filterbuf[commandpos]); num_filter ++; } fclose(filterfile); }}/* 1 if filter application was successful and the output (>1B) is in outname, 2 if some pattern matched but there is no output, 0 otherwise: sep 15-18 '94 *//* memagrep is initialized in partition.c for calls from dir.c, and it is already done by the time we call this function from main.c */apply_filter(inname, outname) char *inname, *outname; /* outname is in-out, inname is in */{ int i; char name[MAX_LINE_LEN], es1[MAX_LINE_LEN], es2[MAX_LINE_LEN]; int name_len = strlen(inname); char s[MAX_LINE_LEN]; FILE *dummyout; FILE *dummyin; char dummybuf[4]; char prevoutname[MAX_LINE_LEN]; char newoutname[MAX_LINE_LEN]; char tempoutname[MAX_LINE_LEN]; char tempinname[MAX_LINE_LEN]; int ret = 0; int unlink_prevoutname = 0; if (num_filter <= 0) return 0; if ((dummyout = fopen("/dev/null", "w")) == NULL) return 0; /* ready for memgrep */ name[0] = '\n'; special_get_name(inname, name_len, tempinname); name_len = strlen(tempinname); strcpy(name+1, tempinname); strcpy(prevoutname, tempinname); strcpy(newoutname, outname); /* Current properly filtered output is always in prevoutname */ for(i=0; i<num_filter; i++) { if (filter_len[i] > 0) { char *suffix; name[name_len + 1] = '\0'; if ((suffix = strstr(name+1, filter[i])) != NULL) { /* Chris Dalton */ if (ret == 0) ret = 2; /* yes, it matched: now apply the command and get the output */ /* printf("filtering %s\n", name); */ sprintf(s, "exec %s '%s' > '%s'", filter_command[i], escapesinglequote(prevoutname, es1), escapesinglequote(newoutname, es2)); system(s); if (((dummyin = my_fopen(newoutname, "r")) == NULL) || (fread(dummybuf, 1, 1, dummyin) <= 0)) { if (dummyin != NULL) fclose(dummyin); unlink(newoutname); continue; } /* Filter was successful: output exists and has atleast 1 byte in it */ fclose(dummyin); if (unlink_prevoutname) { unlink(prevoutname); strcpy(tempoutname, prevoutname); strcpy(prevoutname, newoutname); strcpy(newoutname, tempoutname); } else { strcpy(prevoutname, newoutname); sprintf(newoutname, "%s.o", prevoutname); } ret = 1; unlink_prevoutname = 1;#if 1 /* if the matched text was a proper suffix of the name, */ /* remove the suffix just processed before examining the */ /* name again. Chris Dalton */ /* And I don't know what the equivalent thing is with */ /* memagrep_search: since it doesn't return a pointer to */ /* the place where the match occured. Burra Gopal */ if (strcmp(filter[i], suffix) == 0) { name_len -= strlen(suffix); *suffix= '\0'; }#endif /*1*/ if (strlen(newoutname) >= MAX_LINE_LEN - 1) break; } } else { /* must call memagrep */ name[name_len + 1] = '\n'; /* memagrep wants names to end with '\n': '\0' is not necessary */ /* printf("i=%d filterlen=%d filter=%s inlen=%d input=%s\n", i, -filter_len[i], filter[i], len_current_dir_buf, current_dir_buf); */ if (((filter_len[i] == -2) && (filter[i][0] == '.') && (filter[i][1] == '*')) || (memagrep_search(-filter_len[i], filter[i], name_len + 2, name, 0, dummyout) > 0)) { if (ret == 0) ret = 2; /* yes, it matched: now apply the command and get the output */ /* printf("filtering %s\n", name); */ sprintf(s, "exec %s '%s' > '%s'", filter_command[i], escapesinglequote(prevoutname, es1), escapesinglequote(newoutname, es2)); system(s); if (((dummyin = my_fopen(newoutname, "r")) == NULL) || (fread(dummybuf, 1, 1, dummyin) <= 0)) { if (dummyin != NULL) fclose(dummyin); unlink(newoutname); continue; } /* Filter was successful: output exists and has atleast 1 byte in it */ fclose(dummyin); if (unlink_prevoutname) { unlink(prevoutname); strcpy(tempoutname, prevoutname); strcpy(prevoutname, newoutname); strcpy(newoutname, tempoutname); } else { strcpy(prevoutname, newoutname); sprintf(newoutname, "%s.o", prevoutname); } ret = 1; unlink_prevoutname = 1; if (strlen(newoutname) >= MAX_LINE_LEN - 1) break; } } } if (ret == 1) strcpy(outname, prevoutname); else { /* dummy filter that copies input to output: caller can use tempinname but this has easy interface */ sprintf(s, "exec %s '%s' > '%s'\n", SYSTEM_CAT, escapesinglequote(tempinname, es1), escapesinglequote(outname, es2)); system(s); } fclose(dummyout); return ret;}/* Use a modified wais stoplist to do this with simple strcmp's in a for loop */static_stop_list(word) char *word;{ return 0;}/* This is the stuff that used to be present in the old build_in.c *//* Some variables used throughout */FILE *TIMEFILE; /* file descriptor for sorting .glimpse_filenames by time */#if BG_DEBUGFILE *LOGFILE; /* file descriptor for LOG output */#endif /*BG_DEBUG*/FILE *STATFILE; /* file descriptor for statistical data about indexed files */FILE *MESSAGEFILE; /* file descriptor for important messages meant for the user */char INDEX_DIR[MAX_LINE_LEN];char sync_path[MAX_LINE_LEN];struct stat istbuf;struct stat excstbuf;struct stat incstbuf;int ICurrentFileOffset;int NextICurrentFileOffset;/* Some options used throughout */int GenerateHash = OFF;int KeepFilenames = OFF;int OneFilePerBlock = OFF;int total_size = 0;int total_deleted = 0;int MAXWORDSPERFILE = 0;int NUMERICWORDPERCENT = DEF_NUMERIC_WORD_PERCENT;int AddToIndex = OFF;int DeleteFromIndex = OFF;int PurgeIndex = ON;int FastIndex = OFF;int BuildDictionary = OFF;int BuildDictionaryExisting = OFF;int CompressAfterBuild = OFF;int IncludeHigherPriority = OFF;int FilenamesOnStdin = OFF;int ExtractInfo = OFF;int InfoAfterFilename = OFF;int FirstWordOfInfoIsKey = OFF;int UseFilters = OFF;int ByteLevelIndex = OFF;int RecordLevelIndex = OFF; /* When we want a -o like index but want to do booleans on a per-record basis directly from index: robint@zedcor.com */ /* This type of index doesn't make sense with attributes since they span > 1 record; hence StructuredIndex == -2 => this = ON */int StoreByteOffset = OFF; /* In RecordLevelIndex, store record # for each word or byte offset of the record: record # is the default (12/12/96) */char rdelim[MAX_LINE_LEN];char old_rdelim[MAX_LINE_LEN];int rdelim_len = 0;/* int IndexUnderscore = OFF; */int IndexableFile = OFF;int MAX_INDEX_PERCENT = DEF_MAX_INDEX_PERCENT;int MAX_PER_MB = DEF_MAX_PER_MB;int I_THRESHOLD = DEF_I_THRESHOLD;int BigHashTable = OFF;int IndexEverything = OFF;int HashTableSize = MAX_64K_HASH;int BuildTurbo = OFF;int SortByTime = OFF;int AddedMaxWordsMessage = OFF;int AddedMixedWordsMessage = OFF;int icount=0; /* count the number of my_malloc for indices structure */int hash_icount=0; /* to see how much was added to the current hash table */int save_icount=0; /* to see how much was added to the index by the current file */int numeric_icount=0; /* to see how many numeric words were there in the current file */int mask_int[32] = MASK_INT;int p_table[MAX_PARTITION];int memory_usage = 0;char *my_malloc(len) int len;{ char *s; static int i=100;
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -