?? build_in.c
字號(hào):
printf("%d\n", system(s));#endif#if 0 printf("merged\n"); sprintf(s, "exec %s -10 '%s/%s'\n", SYSTEM_HEAD, escapesinglequote(INDEX_DIR, es1), INDEX_FILE); system(s);#endif /*0*/}/* --------------------------------------------------------------------build_hash():input: a set of filenames in name_list[], a partition table p_table[]output: a hash table hash_table[].-----------------------------------------------------------------------*/build_hash(){ int fd; /* opened file number */ int i, pn; /* pn: current partition */ int num_read; char word[256]; struct stat stbuf; int offset; int toread; unsigned char *buffer; /* running pointer for getword = place where reads begin */ unsigned char *bx; /* running pointer for read-loop, initially buffer */ unsigned char *buffer_end; /* place where getword should stop */ unsigned char *buffer_begin;/* constant pointer to beginning */ unsigned char *next_record; /* pointer that tells where the current record ends: if buffer (returned by getword) is >= this, increment ICurrentFileOffset */ unsigned char *last_record; /* pointer that tells where the last record ends: may or may not be > buffer_end, but surely <= bx the last byte read */ int residue; /* extra variable to store buffer_begin + BLOCK_SIZE - buffer_end */ int tried_once = 0; int attribute; int ret; char outname[MAX_LINE_LEN]; char *unlinkname = NULL; int pid = getpid(); if (StructuredIndex) region_initialize(); init_hash_table();#ifdef debug printf("entering build_hash(), part_num=%d\n", part_num);#endif tried_once = 0;try_again_1: buffer_begin = buffer = (unsigned char *) my_malloc(sizeof(char)* BLOCK_SIZE + 10); /* always read in units of BLOCK_SIZE or less */ if(buffer == NULL) { fprintf(stderr, "not enough memory in build_hash\n"); if (tried_once) return; traverse1(); init_hash_table(); tried_once = 1; goto try_again_1; } bx = buffer; if (OneFilePerBlock) { for(i=0; i<file_num; i++) { unlinkname = NULL; if ((disable_list != NULL) && (i<old_file_num) && (disable_list[block2index(i)] & mask_int[i%(8*sizeof(int))])) continue; if (LIST_GET(name_list, i) == NULL) continue; if ((ret = tuncompress_file(LIST_GET(name_list, i), outname, TC_EASYSEARCH | TC_OVERWRITE | TC_NOPROMPT)) > 0) { /* do not remove old .TZ file */ if (StructuredIndex && (-1 == region_create(outname))) { fprintf(stderr, "permission denied or non-existent file: %s\n", LIST_GET(name_list, i)); remove_filename(i, -1); continue; } if (((fd = my_open(outname, O_RDONLY, 0)) == -1) ) { fprintf(stderr, "permission denied or non-existent file: %s\n", LIST_GET(name_list, i)); remove_filename(i, -1); if (StructuredIndex) region_destroy(); /* cannot happen! */ unlink(outname); continue; } unlinkname = outname; goto index_file1; } /* Try to apply the filter */ sprintf(outname, "%s/.glimpse_apply.%d", INDEX_DIR, pid); if ((ret = apply_filter(LIST_GET(name_list, i), outname)) == 1) { /* Some pattern matched AND some filter was successful */ if (StructuredIndex && (-1 == region_create(outname))) { fprintf(stderr, "permission denied or non-existent file: %s\n", LIST_GET(name_list, i)); remove_filename(i, -1); continue; } if (((fd = my_open(outname, O_RDONLY)) == -1) ) { /* error: shouldn't have returned 1! */ fprintf(stderr, "permission denied or non-existent file: %s\n", LIST_GET(name_list, i)); remove_filename(i, -1); if (StructuredIndex) region_destroy(); /* cannot happen! */ unlink(outname); continue; } unlinkname = outname; goto index_file1; } else if (ret == 2) { /* Some pattern matched but no filter was successful */ if (filetype(LIST_GET(name_list, i), 0, NULL, NULL)) { /* try to index input file if it satisfies filetype */ remove_filename(i, -1); unlink(outname); continue; } unlinkname = outname; } if (StructuredIndex && (-1 == region_create(LIST_GET(name_list, i)))) { fprintf(stderr, "permission denied or non-existent file: %s\n", LIST_GET(name_list, i)); remove_filename(i, -1); continue; } if (((fd = my_open(LIST_GET(name_list, i), O_RDONLY, 0)) == -1) ) { fprintf(stderr, "permission denied or non-existent file: %s\n", LIST_GET(name_list, i)); remove_filename(i, -1); if (StructuredIndex) region_destroy(); /* cannot happen! */ if (unlinkname != NULL) unlink(unlinkname); continue; } index_file1:#ifdef SW_DEBUG if (AddToIndex || FastIndex) printf("adding words of %s in %d\n", LIST_GET(name_list,i), i); printf("%s\n", LIST_GET(name_list, i));#endif /* my_stat(LIST_GET(name_list, i), &stbuf); Chris Dalton */ fstat(fd, &stbuf);#ifdef SW_DEBUG printf("filesize: %d\n", stbuf.st_size);#endif#ifdef UDI_DEBUG printf("%s ", LIST_GET(name_list, i)); printf("size: %d ", stbuf.st_size);#endif /* buffer always points to a BLOCK_SIZE block of allocated memory */ buffer = buffer_begin; residue = 0; if (RecordLevelIndex) { if (!StoreByteOffset) NextICurrentFileOffset = ICurrentFileOffset = 1; else NextICurrentFileOffset = ICurrentFileOffset = 0; } for (offset = 0; offset < stbuf.st_size; offset += BLOCK_SIZE) { offset -= residue; if (!RecordLevelIndex) NextICurrentFileOffset = ICurrentFileOffset = offset; toread = offset + BLOCK_SIZE >= stbuf.st_size ? stbuf.st_size - offset : BLOCK_SIZE; lseek(fd, offset, SEEK_SET); bx= buffer; num_read = 0; while ((toread > 0) && ((num_read = read(fd, bx, toread)) < toread)) { if (num_read <= 0) { buffer = bx; fprintf(stderr, "read error on file %s at offset %d\n", LIST_GET(name_list, i), offset); goto break_break1; /* C doesn't have break; break; */ } bx += num_read; toread -= num_read; } if (num_read >= toread) { bx += num_read; toread -= num_read; } buffer_end = bx; residue = 0; if (buffer_end == buffer_begin + BLOCK_SIZE) { if (RecordLevelIndex) { buffer_end = backward_delimiter(buffer_end /* NOT bx */, buffer, rdelim, rdelim_len, 0); } else { while ((INDEXABLE(*(buffer_end-1))) && (buffer_end > buffer_begin + MAX_WORD_SIZE)) buffer_end --; } residue = buffer_begin + BLOCK_SIZE - buffer_end; /* if (residue > 0) printf("residue = %d in %s at %d\n", residue, LIST_GET(name_list, i), offset); */ } if (RecordLevelIndex) { next_record = forward_delimiter(buffer, buffer_end, rdelim, rdelim_len, 0); } bx = buffer; PrintedLongWordWarning = 0; while ((buffer=(unsigned char *) getword(LIST_GET(name_list, i), word, buffer, buffer_end, &attribute, &next_record)) < buffer_end) { if (RecordLevelIndex) { if (buffer >= next_record) { next_record = forward_delimiter(buffer, buffer_end, rdelim, rdelim_len, 0); if (StoreByteOffset) ICurrentFileOffset += next_record - buffer; else ICurrentFileOffset ++; } } /* printf("%s\n", word); */ if(word[0] == '\0') continue; if(icount - hash_icount >= I_THRESHOLD) {#if BG_DEBUG fprintf(LOGFILE, "reached I_THRESHOLD at %d\n", icount - hash_icount);#endif /*BG_DEBUG*/ traverse1(); init_hash_table(); hash_icount = icount; } insert_h(word, i, attribute); } if (word[0] != '\0') { /* printf("%s\n", word); */ if(icount - hash_icount >= I_THRESHOLD) {#if BG_DEBUG fprintf(LOGFILE, "reached I_THRESHOLD at %d\n", icount - hash_icount);#endif /*BG_DEBUG*/ traverse1(); init_hash_table(); hash_icount = icount; } insert_h(word, i, attribute); } if (RecordLevelIndex) { if (buffer >= next_record) { /* next_record = forward_delimiter(buffer, buffer_end, rdelim, rdelim_len, 0); */ ICurrentFileOffset ++; } } buffer = buffer_begin; next_record = buffer; } break_break1: close(fd); if (unlinkname != NULL) unlink(unlinkname);#ifdef UDI_DEBUG printf("add to index: %d\n",icount-save_icount);#endif if ((MAXWORDSPERFILE > 0) && (icount-save_icount > MAXWORDSPERFILE)) { fprintf(MESSAGEFILE, "%d words are contributed by %s\n", icount-save_icount, LIST_GET(name_list, i)); AddedMaxWordsMessage = ON; } if (IndexNumber && NUMERICWORDPERCENT && (numeric_icount * 100 > (icount - save_icount) * NUMERICWORDPERCENT) && (icount - save_icount > MIN_WORDS)) { fprintf(MESSAGEFILE, "NUMBERS occur in %d%% of %d words contributed by %s\n", (numeric_icount * 100)/(icount - save_icount), icount - save_icount, LIST_GET(name_list, i)); AddedMixedWordsMessage = ON; } numeric_icount=0; save_icount=icount; if (StructuredIndex) region_destroy(); } traverse1(); init_hash_table(); hash_icount = icount; my_free(buffer_begin, BLOCK_SIZE + 10); return; } for(pn=1; pn < part_num; pn++) /* partition # 0 is not accessed */ { if (pn == '\n') continue; /* There cannot be a partition # '\n' or 0: see partition.c */ for(i=p_table[pn]; i<p_table[pn+1]; i++) { unlinkname = NULL; if ((disable_list != NULL) && (i<old_file_num) && (disable_list[block2index(i)] & mask_int[i%(8*sizeof(int))])) continue; if (LIST_GET(name_list, i) == NULL) continue; if (BuildDictionaryExisting) { if (((fd = my_open(LIST_GET(name_list, i), O_RDONLY, 0)) == -1) ) { fprintf(stderr, "permission denied or non-existent file: %s\n", LIST_GET(name_list, i)); remove_filename(i, -1); continue; } if (!CompressAfterBuild) unlinkname = LIST_GET(name_list, i); /* not needed anymore */ goto index_file2; } if ((ret = tuncompress_file(LIST_GET(name_list, i), outname, TC_EASYSEARCH | TC_OVERWRITE | TC_NOPROMPT)) > 0) { /* do not remove old .TZ file */ if (StructuredIndex && (-1 == region_create(outname))) { fprintf(stderr, "permission denied or non-existent file: %s\n", LIST_GET(name_list, i)); remove_filename(i, -1); continue; } if (((fd = my_open(outname, O_RDONLY, 0)) == -1) ) { fprintf(stderr, "permission denied or non-existent file: %s\n", LIST_GET(name_list, i)); remove_filename(i, -1); if (StructuredIndex) region_destroy(); /* cannot happen! */ unlink(outname); continue; } if (BuildDictionary && CompressAfterBuild) strcpy(LIST_GET(name_list, i), outname); /* name of clear file will be smaller, so enough space */ else unlinkname = outname; goto index_file2; } /* Try to apply the filter */ sprintf(outname, "%s/.glimpse_apply.%d", INDEX_DIR, pid); if ((ret = apply_filter(LIST_GET(name_list, i), outname)) == 1) { /* Some pattern matched AND some filter was successful */ if (StructuredIndex && (-1 == region_create(outname))) { fprintf(stderr, "permission denied or non-existent file: %s\n", LIST_GET(name_list, i)); remove_filename(i, -1); continue; } if (((fd = my_open(outname, O_RDONLY)) == -1) ) { /* error: shouldn't have returned 1! */ fprintf(stderr, "permission denied or non-existent file: %s\n", LIST_GET(name_list, i)); remove_filename(i, -1); if (StructuredIndex) region_destroy(); /* cannot happen! */ unlink(outname); continue; } unlinkname = outname; goto index_file2; } else if (ret == 2) { /* Some pattern matched but no filter was successful */ if (filetype(LIST_GET(name_list, i), 0, NULL, NULL)) { /* try to index input file if it satisfies filetype */ remove_filename(i, -1); unlink(outname); continue; } unlinkname = outname; } if (StructuredIndex && (-1 == region_create(LIST_GET(name_list, i)))) { fprintf(stderr, "permission denied or non-existent file: %s\n", LIST_GET(name_list, i)); remove_filename(i, -1); continue; } if (((fd = my_open(LIST_GET(name_list, i), O_RDONLY)) == -1) ) { fprintf(stderr, "permission denied or non-existent file: %s\n", LIST_GET(name_list, i)); remove_filename(i, -1); if (StructuredIndex) region_destroy(); /* cannot happen! */ if (unlinkname != NULL) unlink(unlinkname); continue; } index_file2:#ifdef SW_DEBUG if (AddToIndex || FastIndex) printf("adding words of %s in %d\n", LIST_GET(name_list, i), pn); printf("%s\n", LIST_GET(name_list, i));#endif /* my_stat(LIST_GET(name_list, i), &stbuf); Chris Dalton */ fstat(fd, &stbuf);#ifdef SW_DEBUG printf("filesize: %d\n", stbuf.st_size);#endif#ifdef UDI_DEBUG printf("%s ", LIST_GET(name_list, i)); printf("size: %d ", stbuf.st_size);#endif /* buffer always points to a BLOCK_SIZE block of allocated memory */ buffer = buffer_begin; residue = 0; if (RecordLevelIndex) { if (!StoreByteOffset) NextICurrentFileOffset = ICurrentFileOffset = 1; else NextICurrentFileOffset = ICurrentFileOffset = 0; } for (offset = 0; offset < stbuf.st_size; offset += BLOCK_SIZE) { offset -= residue; if (!RecordLevelIndex) NextICurrentFileOffset = ICurrentFileOffset = offset; toread = offset + BLOCK_SIZE >= stbuf.st_size ? stbuf.st_size - offset : BLOCK_SIZE; lseek(fd, offset, SEEK_SET); bx= buffer; num_read = 0; while ((toread > 0) && ((num_read = read(fd, bx, toread)) < toread)) { if (num_read <= 0) { buffer = bx; fprintf(stderr, "read error on file %s at offset %d\n", LIST_GET(name_list, i), offset); goto break_break2; /* C doesn't have break; break; */ } bx += num_read; toread -= num_read; } if (num_read >= toread) { bx += num_read; toread -= num_read; } buffer_end = bx; residue = 0; if (buffer_end == buffer_begin + BLOCK_SIZE) { if (RecordLevelIndex) { buffer_end = backward_delimiter(buffer_end /* NOT bx */, buffer, rdelim, rdelim_len, 0); } else { while ((INDEXABLE(*(buffer_end-1))) && (buffer_end > buffer_begin + MAX_WORD_SIZE)) buffer_end --; } residue = buffer_begin + BLOCK_SIZE - buffer_end;
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -