?? invert10_31.c
字號:
FILE *pTxtFile;
if( (pTxtFile = fopen( sFileName, "wt" )) == NULL )
{
return 0;
}
fprintf(pTxtFile,"%s",StringBuffer);
fclose(pTxtFile);
return 0;
}
/*功能:釋放文檔內存*/
int MyDocFree(DocNode *pDocNode)
{
DocNode *preDocNode;
if (pDocNode == NULL) {
return EPAR;
}
if (pDocNode->pNext == NULL) {
if(pDocNode->iPos)
free(pDocNode->iPos);
pDocNode->iPos = NULL;
if(pDocNode)
free(pDocNode);
return 0;
}
preDocNode = pDocNode;
pDocNode = pDocNode->pNext;
while (pDocNode->pNext != NULL)
{
if(pDocNode->iPos)
free(pDocNode->iPos);
pDocNode->iPos = NULL;
if(pDocNode)
free(pDocNode);
pDocNode = preDocNode->pNext;
}
if(pDocNode->iPos)
free(pDocNode->iPos);
pDocNode->iPos = NULL;
if(pDocNode)
free(pDocNode);
if(preDocNode->iPos)
free(preDocNode->iPos);
preDocNode->iPos = NULL;
preDocNode->pNext = NULL;
if(preDocNode)
free(preDocNode);
return 0;
}
/*功能:釋放倒排表內存*/
int MyInvtFree(InvertNode *pInvtNode)
{
DocNode *pDocNode;
InvertNode *preInvtNode;
if (pInvtNode == NULL)
{
return EPAR;
}
if (pInvtNode->pNextNode == NULL)
{
/*首先釋放掉當前結點的DocNode鏈表*/
pDocNode = pInvtNode->pDocNode;
MyDocFree(pDocNode);
pInvtNode->pDocNode = NULL;
/*釋放當前倒排結點*/
return 0;
}
preInvtNode = pInvtNode;
pInvtNode = pInvtNode->pNextNode;
while (pInvtNode->pNextNode != NULL)
{
preInvtNode->pNextNode = pInvtNode->pNextNode;
/*首先釋放掉當前結點的DocNode鏈表*/
pDocNode = pInvtNode->pDocNode;
MyDocFree(pDocNode);
pInvtNode->pDocNode = NULL;
/*釋放當前倒排結點*/
pInvtNode->pNextNode = NULL;
if(pInvtNode)
free(pInvtNode);
pInvtNode = preInvtNode->pNextNode;
}
/*首先釋放掉當前結點的DocNode鏈表*/
pDocNode = pInvtNode->pDocNode;
MyDocFree(pDocNode);
pInvtNode->pDocNode = NULL;
/*釋放當前倒排結點*/
if(pInvtNode)
free(pInvtNode);
/*首先釋放掉當前結點的DocNode鏈表*/
pDocNode = preInvtNode->pDocNode;
MyDocFree(pDocNode);
preInvtNode->pDocNode = NULL;
/*釋放當前倒排結點*/
preInvtNode->pNextNode = NULL;
preInvtNode = NULL;
return 0;
}
/*該函數(shù)功能:建立倒排索引表,(索引號主要是GB碼減去176和161)
入口參數(shù):fNode正向表以連表的形式讀入,
DocCount文檔個數(shù)
*/
int InvertBuild(const ForwardNode *fNode, const int DocCount, char *filepath)
{
InvertNode **InvertIndex, *EIntIdx;
int *IndexError;
InvertNode *pCurIvtNode;
char * sFileName, *sCurDir;
unsigned char uTemp[3];
int i,j;
int count;
int test = 0;
char *sCurFile;
if(fNode == NULL)
{
printf("error is: %d\n", EFNODE);
printf("正向表不存在!\n");
return EFNODE;
}
if(DocCount < 0)
{
printf("error is: %d\n", EDOCNUM);
return EDOCNUM;
}
IndexError = (int *) malloc (sizeof(int));
sFileName = (char *) malloc (MAXPATHLEN * sizeof(char));
sCurDir = (char *) malloc (MAXPATH * sizeof(char));
sCurFile = (char *) malloc(MAXPATH * sizeof(char));
*IndexError = 0;
/*首先調用漢字的索引建立函數(shù)*/
InvertIndex = NULL;
InvertIndex = IndexBuild(InvertIndex, IndexError);
if (*IndexError != 0)
{
return EINDEXBUILD;
}
for(i = 0; i < (GBHTTOPNUM - GBHTBTMNUM + 1) ; i++)
for(j = 0; j < (GBLWTOPNUM - GBLWBTMNUM + 1) ; j++)
{
InvertIndex[i][j].lWordNum = 0;
InvertIndex[i][j].pDocNode = NULL;
InvertIndex[i][j].pNextNode = NULL;
InvertIndex[i][j].lDocNum = 0;
strcpy(InvertIndex[i][j].sWords,"\0");
}
/*調用英文的索引建立函數(shù),26個英文字母*/
EIntIdx = NULL;
EIntIdx = EIdxBuild(EIntIdx, IndexError);
if (*IndexError != 0)
{
return EINDEXBUILD;
}
for(i = 0; i < 26 ; i++)
{
EIntIdx[i].lWordNum = 0;
EIntIdx[i].pDocNode = NULL;
EIntIdx[i].pNextNode = NULL;
EIntIdx[i].lDocNum = 0;
strcpy(EIntIdx[i].sWords, "\0");
}
/*將正向表內容寫入倒排索引*/
if(InvertIdxBld(InvertIndex, EIntIdx, fNode, DocCount) != 0)
{
printf("error is InvertIdxBld, \n the Number is: %d\n", EINDEXBUILD);
return EINDEXBUILD;
}
count = 0;
/*將國標漢字倒排索引寫入文件*/
for(i = 0; i < (GBHTTOPNUM - GBHTBTMNUM + 1) ; i++)
for(j = 0; j < (GBLWTOPNUM - GBLWBTMNUM + 1) ; j++)
{
if (InvertIndex[i][j].lWordNum != 0)
{
pCurIvtNode = &InvertIndex[i][j];
strcpy(sFileName, filepath);
mkdir(sFileName);
uTemp[0] = (i + GBHTBTMNUM );
uTemp[1] = (j + GBLWBTMNUM );
uTemp[2] = '\0';
/*printf("%c%c\n",uTemp[0],uTemp[1]);*/
if(MAXPATH > (strlen(sFileName) + strlen(uTemp)))
{
strcat(sFileName, uTemp);
}
else
{
sFileName = (char *) realloc (sFileName, (MAXPATH + RELCSIZE) * sizeof(char));
if(sFileName == NULL)
{
printf("error is memory is not enough!\n");
return EMALLOC;
}
strcat(sFileName, uTemp);
}
if(MAXPATH > strlen(sFileName))
{
strcpy(sCurDir, sFileName);
}
else
{
sCurDir = (char *) realloc (sCurDir, (MAXPATH + RELCSIZE) * sizeof(char));
if(sCurDir == NULL)
{
printf("error is memory is not enough!\n");
return EMALLOC;
}
strcpy(sCurDir, sFileName);
}
mkdir(sCurDir);
if(sFileName)
free(sFileName);
//得到當前應該寫入的倒排索引文檔的名字
sFileName = GetWrtFlName(sCurDir);
if (wrtlst(pCurIvtNode,sFileName,sCurDir, 10000) != 0)
{
printf("error is GB invert Index write, the number is: %d\n", EFILEWRT);
return EFILEWRT;
}
count++;
}
}
/*將英文詞寫入倒排文件*/
for(i = 0; i < 26 ; i++)
{
if (EIntIdx[i].lWordNum != 0)
{
pCurIvtNode = &EIntIdx[i];
/*目前假定目錄為e:\invt\*/
strcpy(sFileName, filepath);
mkdir(sFileName);
uTemp[0] = (i + LOWERA ) ;
uTemp[1] = '\0';
/*printf("%c%c\n",uTemp[0],uTemp[1]);*/
/*組成二級目錄*/
if(MAXPATH > (strlen(sFileName) + strlen(uTemp)))
{
strcat(sFileName, uTemp);
}
else
{
sFileName = (char *) realloc (sFileName, (MAXPATH + RELCSIZE) * sizeof(char));
if(sFileName == NULL)
{
printf("error is memory is not enough!\n");
return EMALLOC;
}
strcat(sFileName, uTemp);
}
if(MAXPATH > strlen(sFileName))
{
strcpy(sCurDir, sFileName);
}
else
{
sCurDir = (char *) realloc (sCurDir, (MAXPATH + RELCSIZE) * sizeof(char));
if(sCurDir == NULL)
{
printf("error is memory is not enough!\n");
return EMALLOC;
}
strcpy(sCurDir, sFileName);
}
mkdir(sCurDir);
/*初始的倒排文件名字為1.txt*/
if(sFileName)
free(sFileName);
//得到當前應該寫入的倒排索引文檔的名字
sFileName = GetWrtFlName(sCurDir);
/*得到當前路徑的名字------end*/
if (wrt2engfile(pCurIvtNode,sFileName,sCurDir, 10000) != 0)
{
printf("error is English invert Index write, the number is: %d\n", EFILEWRT);
return EFILEWRT;
}
count++;
}
}
/*將英文詞寫入倒排文件---------end*/
printf("The number of word in file is: %d\n", count);
/*將倒排表所占內存釋放*/
for(i = 0; i < (GBHTTOPNUM - GBHTBTMNUM + 1) ; i++)
{
for(j = 0; j < (GBLWTOPNUM - GBLWBTMNUM + 1) ; j++)
{
if (InvertIndex[i][j].lWordNum != 0)
{
MyInvtFree(&InvertIndex[i][j]);
}
}
if(InvertIndex[i])
free(InvertIndex[i]);
}
for(i = 0; i < 26; i++)
{
if (EIntIdx[i].lWordNum != 0)
{
MyInvtFree(&EIntIdx[i]);
}
}
printf("free is over!\n");
if(InvertIndex)
free(InvertIndex);
if(IndexError)
free(IndexError);
if(sFileName)
free(sFileName);
if(sCurDir)
free(sCurDir);
if(sCurFile)
free(sCurFile);
if(EIntIdx)
free(EIntIdx);
printf("InvertBuild is over!\n");
return 0;
}
/*功能:釋放正向表內存*/
int Myfree(ForwardNode *fNode, int DocCt)
{
WordNode *pWordNode, *preWordNode;
int i;
if(!fNode->wFWordNode) return -1;
for(i = 0; i < DocCt; i++)
{
pWordNode = (fNode + i)->wFWordNode;
preWordNode = pWordNode;
if (pWordNode->pnext == NULL) {
if(pWordNode->iPos)
free(pWordNode->iPos);
if(pWordNode->sWords)
free(pWordNode->sWords);
continue;
}
pWordNode = pWordNode->pnext;
while (pWordNode->pnext != NULL) {
preWordNode->pnext = pWordNode->pnext;
if (pWordNode->iPos)
free(pWordNode->iPos);
if(pWordNode->sWords)
free(pWordNode->sWords);
pWordNode->pnext = NULL;
if(pWordNode)
free(pWordNode);
pWordNode = preWordNode->pnext;
}
if(pWordNode->iPos)
free(pWordNode->iPos);
if(pWordNode->sWords)
free(pWordNode->sWords);
pWordNode->pnext = NULL;
if(pWordNode)
free(pWordNode);
if(preWordNode->iPos)
free(preWordNode->iPos);
if(preWordNode->sWords)
free(preWordNode->sWords);
preWordNode->pnext = NULL;
(fNode + i)->wFWordNode = NULL;
}
return 0;
}
/*功能:建立倒排表
入口參數(shù):strBuf 分詞后的緩存
DocCt 本次處理的文檔數(shù)
sSource 源文檔所在的路徑和名稱
*/
int invt_main(char *strBuf, int DocCt, char *sSource, long lPos)
{
int *DocCount, m;
char *sCurDir;
char *sRcdFile, *sFilePath, *sScFile;
ForwardNode **fNode;
DocCount = (int *) malloc (sizeof(int));
sCurDir = (char *) malloc(MAXPATH * sizeof(char));
sFilePath = (char *) malloc(MAXPATH * sizeof(char));
sRcdFile = (char *) malloc(MAXPATH * sizeof(char));
sScFile = (char *) malloc(MAXPATH * sizeof(char));
fNode = (ForwardNode **) malloc (sizeof(ForwardNode *));
*fNode = NULL;
/*建立正向表--------*/
/*當前路徑*/
/*strcpy(sFilePath,"d:\\invt1_1020_Finish_0");*/
/*strcpy(sFilePath,"d:\\tempinvt");*/
/*strcpy(sFilePath,"d:\\invt1_1020_Finish_2");*/
/*strcpy(sFilePath,"d:\\invt1_1020_Finish_3");*/
/*strcpy(sFilePath,"d:\\invt1_1102_Finish_4");*/
/*strcpy(sFilePath,"d:\\invt1_1020_Finish_5");*/
strcpy(sFilePath,"d:\\invt0_4");
/* strcpy(sFilePath, "d:\\invt_for_search");*/
/*存儲文檔個數(shù)的文件名字*/
strcpy(sRcdFile, "Record.txt");
m = ForwardBld(sFilePath, sRcdFile, sSource, fNode, DocCt, strBuf);
/*建立正向表--------end*/
strcat(sFilePath,"\\");
/*建立倒排索引,索引用GB碼減去176和161,寫倒排表,當?shù)古疟沓^最大行數(shù)后,另寫一個文件。
這里不需要以雙指針做參數(shù),只需將地址值傳入即可*/
if(m == 0)
InvertBuild(*fNode, DocCt, sFilePath);
else
printf("建立正向表錯誤!\n");
/*將正向表內存釋放,在主函數(shù)中完成索引釋放*/
Myfree(*fNode, DocCt);
if(*fNode)
free(*fNode);
if(fNode)
free(fNode);
if(sCurDir)
free(sCurDir);
if(sFilePath)
free(sFilePath);
if(sRcdFile)
free(sRcdFile);
if(sScFile)
free(sScFile);
if(DocCount)
free(DocCount);
printf("invt_main is over!\n\n\n");
return 0;
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -