?? invert10_31.c
字號(hào):
/* 若文件不存在, 以創(chuàng)建方式打開文件 */
stream = fopen(sDir, "w+");
if(stream == NULL)
{
printf("error is: %d\n", EFILEOPEN);
return EFILEOPEN;
}
/*第一次創(chuàng)建時(shí)寫入1.txt*/
fprintf(stream, "%s", "1.txt");
fclose(stream);
}
/*將文件名字記錄在$curfile.txt文件中*/
else
{
fprintf(stream, "%s", sFileName);
fclose(stream);
}
return 0;
}
int wrtlst(InvertNode *pCurIvtIdx, char * sFileName, char * sCurDir, long nKbyte)
{
FILE *stream;
long curFileSize = 0;
DocNode *pCurDocNode;
int i, nFileNum = 0, malloccount;
int bdown = 0;
char *newFileName = NULL, *curFileName, *curPFN, *strbuf, *tempbuf;
if((pCurIvtIdx == NULL) || (sFileName == NULL) || (sCurDir == NULL) || (nKbyte <= 0))
{
printf("error is: %d\n", EWT2FLP);
return EWT2FLP;
}
curFileName = (char*)malloc(sizeof(char)* (20 + strlen(sCurDir)));
newFileName = (char*)malloc(sizeof(char)* (20 + strlen(sCurDir)));
curPFN = (char *) malloc (sizeof(char) * ( 20 + strlen(sCurDir)));
strbuf = (char *) malloc(sizeof(char) * MAXLINELEN * MAXWORD);
tempbuf = (char *) malloc(sizeof(char) * MAXLINELEN * MAXWORD);
malloccount = 1;
strbuf[0] = '\0';
/*將文檔內(nèi)容寫入字符串*/
while (pCurIvtIdx != NULL)
{
pCurDocNode = pCurIvtIdx->pDocNode;
if (pCurDocNode != NULL)
{
/*printf("%s ", pCurIvtIdx->sWords);*/
strcat(strbuf,"@");
sprintf(tempbuf,"%s", pCurIvtIdx->sWords);
strcat(strbuf, tempbuf);
sprintf(tempbuf," #%d ", pCurIvtIdx->lDocNum);
strcat(strbuf, tempbuf);
while (pCurDocNode != NULL)
{
sprintf(tempbuf,"%s ,%f,%d,", pCurDocNode->sDocID, pCurDocNode->fWeight, pCurDocNode->iFreq);
strcat(strbuf, tempbuf);
/*為了縮小倒排文檔,只最多只寫3個(gè)位置*/
/*for(i = 0; i < pCurDocNode->iFreq - 1; i++)*/
for(i = 0; (i < pCurDocNode->iFreq - 1) && (i < 2); i++)
{
sprintf(tempbuf, "%d+", pCurDocNode->iPos[i]);
strcat(strbuf, tempbuf);
}
sprintf(tempbuf, "%d,", pCurDocNode->iPos[i]);
strcat(strbuf, tempbuf);
sprintf(tempbuf,"%s ", pCurDocNode->sFileURL);
strcat(strbuf, tempbuf);
sprintf(tempbuf,";\n");
strcat(strbuf, tempbuf);
pCurDocNode = pCurDocNode->pNext;
}
}
pCurIvtIdx = pCurIvtIdx->pNextNode;
}
/*將字串內(nèi)容寫入文檔*/
strcpy(curPFN, sCurDir);
strcat(curPFN, "\\");
strcat(curPFN, sFileName);/**/
sprintf(curFileName, "%s", sFileName);
if ((stream = fopen(curPFN, "a+")) == NULL)
{
/* 若文件不存在, 以創(chuàng)建方式打開文件 */
stream = fopen(curPFN, "wt");
}
else
{
/* 若文件已經(jīng)存在,把文件指針指向文件末;
* 若文件已經(jīng)存在, 且大小超過指定要求,關(guān)閉當(dāng)前文件, 新建文件
*/
curFileSize = filesize(stream);
while (curFileSize >= nKbyte)
{
fclose(stream);
create_next_file_name(curFileName, &newFileName);
sprintf(curFileName, "%s",newFileName);
/*獲得當(dāng)前路徑和名稱*/
strcpy(curPFN, sCurDir);
strcat(curPFN, "\\");
strcat(curPFN, curFileName);
if ((stream = fopen(curPFN, "a+")) == NULL)
{
stream = fopen(curPFN, "w+");
}
curFileSize = filesize(stream);
}
fseek(stream, 0L, SEEK_END);
}
/*將文檔名稱寫入文件*/
if(WrtFlName(curFileName, sCurDir) != 0)
{
printf("wrtfilname error!\n");
return EINVTWRT;
}
/*printf("%s\n",strbuf);*/
fprintf(stream, "%s", strbuf);
fclose(stream);
if(curFileName)
free(curFileName);
if(newFileName)
free(newFileName);
if(curPFN)
free(curPFN);
if (strbuf) {
free(strbuf);
}
if (tempbuf) {
free(tempbuf);
}
return 0;
}
int wrtlst2file(InvertNode *pCurIvtIdx, char * sFileName, char * sCurDir, long nKbyte)
{
FILE *stream;
int bFileExist = -1;
long curFileSize = 0;
DocNode *pCurDocNode, *pPreDoc, *ptemp;
int i, nFileNum = 0;
int bdown = 0;
char *newFileName = NULL, *curFileName, *sIdxname, *curPFN;
if((pCurIvtIdx == NULL) || (sFileName == NULL) || (sCurDir == NULL) || (nKbyte <= 0))
{
printf("error is: %d\n", EWT2FLP);
return EWT2FLP;
}
curFileName = (char*)malloc(sizeof(char)* (20 + strlen(sCurDir)));
newFileName = (char*)malloc(sizeof(char)* (20 + strlen(sCurDir)));
sIdxname = (char *) malloc (sizeof(char) * (20 + strlen(sCurDir)));
curPFN = (char *) malloc (sizeof(char) * ( 20 + strlen(sCurDir)));
strcpy(curPFN, sCurDir);
strcat(curPFN, "\\");
strcat(curPFN, sFileName);/**/
sprintf(curFileName, "%s", sFileName);
if ((stream = fopen(curPFN, "a+")) == NULL)
{
/* 若文件不存在, 以創(chuàng)建方式打開文件 */
bFileExist = SENGFALSE;
stream = fopen(curPFN, "wt");
}
else
{
/* 若文件已經(jīng)存在,把文件指針指向文件末;
* 若文件已經(jīng)存在, 且大小超過指定要求,關(guān)閉當(dāng)前文件, 新建文件
*/
bFileExist = SENGTRUE;
curFileSize = filesize(stream);
while (curFileSize >= nKbyte)
{
fclose(stream);
create_next_file_name(curFileName, &newFileName);
sprintf(curFileName, "%s",newFileName);
/*獲得當(dāng)前路徑和名稱*/
strcpy(curPFN, sCurDir);
strcat(curPFN, "\\");
strcat(curPFN, curFileName);
if ((stream = fopen(curPFN, "a+")) == NULL)
{
stream = fopen(curPFN, "w+");
}
curFileSize = filesize(stream);
}
fseek(stream, 0L, SEEK_END);
/* */
}
while (pCurIvtIdx != NULL)
{
/* 判斷文件是否已經(jīng)超過指定大小 */
curFileSize = filesize(stream);
if (curFileSize >= nKbyte)
{
/* 若文件超過指定大小, 則按照規(guī)則新建文件*/
fclose(stream);
create_next_file_name(curFileName, &newFileName);
sprintf(curFileName, "%s",newFileName);
strcpy(curPFN, sCurDir);
strcat(curPFN, "\\");
strcat(curPFN, curFileName);
stream = fopen(curPFN, "wt");
curFileSize = 0;
}
/*將文檔名字寫入*/
if(WrtFlName(curFileName, sCurDir) != 0)
{
printf("wrtfilname error!\n");
return EINVTWRT;
}
pCurDocNode = pCurIvtIdx->pDocNode;
bdown = 0;
if (pCurDocNode != NULL)
{
bdown = 1;
fprintf(stream, "@");
fprintf(stream, "%s #%d ", pCurIvtIdx->sWords, pCurIvtIdx->lDocNum);
/*fprintf(idx, "%s ,%s;\n", pCurIvtIdx->sWords, curFileName); */
pPreDoc = pCurDocNode;
while (pCurDocNode != NULL)
{
fprintf(stream, "%s ,%f,%d,",pCurDocNode->sDocID, pCurDocNode->fWeight, pCurDocNode->iFreq);
for(i = 0; i < pCurDocNode->iFreq - 1; i++)
{
fprintf(stream, "%d+", pCurDocNode->iPos[i]);
}
fprintf(stream, "%d,", pCurDocNode->iPos[i]);
fprintf(stream, "%s ", pCurDocNode->sFileURL);
fprintf(stream, ";\n");
pPreDoc->pNext = pCurDocNode->pNext;
ptemp = pCurDocNode;
pCurDocNode = pPreDoc->pNext;
}
}
fclose(stream);
/*釋放當(dāng)前節(jié)點(diǎn)所占的內(nèi)存*/
pCurIvtIdx = pCurIvtIdx->pNextNode;
}
if(curFileName)
free(curFileName);
if(newFileName)
free(newFileName);
if(sIdxname)
free(sIdxname);
if(curPFN)
free(curPFN);
return 0;
}
/*將英文詞寫入倒排文檔,與中文倒排文檔不同的是英文倒排有兩層目錄
第一層目錄名是單詞的第一個(gè)字母,
第二層目錄名是單詞的第二個(gè)字母,
如果是單個(gè)字母的詞,則直接建在第一層目錄下
入口參數(shù):InvertNode *pCurIvtIdx 倒排表指針
char * sFileName, 要寫入文件的名字"1.txt"
char * sCurDir, 當(dāng)前路徑“e:\\invt\\a”
long nKbyte 倒排索引文件大小的最大值
*/
int wrt2engfile(InvertNode *pCurIvtIdx, char * sFileName, const char * sCurDir, long nKbyte)
{
FILE *stream;
/*FILE *idx;*/
int bFileExist = -1;
long curFileSize = 0;
DocNode *pCurDocNode, *pPreDoc, *ptemp;
InvertNode *pPreIdx, *ptempIdx;
int i, nFileNum = 0;
char *sTemp;
int sStrlenth;
char *newFileName = NULL, *curFileName, *sIdxname, *curPFN, *Dir;
if((pCurIvtIdx == NULL) || (sFileName == NULL) || (sCurDir == NULL) || (nKbyte <= 0))
{
printf("error is: %d\n", EWT2FLP);
return EWT2FLP;
}
curFileName = (char*)malloc(sizeof(char)* (20 + strlen(sCurDir)));
newFileName = (char*)malloc(sizeof(char)* (20 + strlen(sCurDir)));
sIdxname = (char *) malloc (sizeof(char) * (20 + strlen(sCurDir)));
curPFN = (char *) malloc (sizeof(char) * ( 20 + strlen(sCurDir)));
sTemp = (char *) malloc (sizeof(char) * MAXWORD);
Dir = (char *) malloc (sizeof(char) * MAXPATHLEN);
pPreIdx = pCurIvtIdx;
while (pCurIvtIdx != NULL)
{
/*首先找到所寫的文檔信息應(yīng)該在的路徑*/
strcpy(sTemp,pCurIvtIdx->sWords);
sStrlenth = strlen(sTemp);
if(sStrlenth == 0)
{
pPreIdx->pNextNode = pCurIvtIdx->pNextNode;
ptempIdx = pCurIvtIdx;
pCurIvtIdx = pPreIdx->pNextNode;
continue;
}
/*如果該單詞只有一個(gè)字母,則在當(dāng)前目錄下記錄*/
if(sStrlenth == 1)
{
strcpy(Dir, sCurDir);
mkdir(Dir);
strcpy(curPFN, sCurDir);
/*得到應(yīng)該得到的文件名字*/
strcpy(curFileName,sFileName);
strcat(curPFN, "\\");
strcat(curPFN,curFileName);
if ((stream = fopen(curPFN, "a+")) == NULL)
{
/* 若文件不存在, 以創(chuàng)建方式打開文件 */
stream = fopen(curPFN, "wt");
}
else
{
/* 若文件已經(jīng)存在,把文件指針指向文件末;
* 若文件已經(jīng)存在, 且大小超過指定要求,關(guān)閉當(dāng)前文件, 新建文件
*/
curFileSize = filesize(stream);
while (curFileSize >= nKbyte)
{
fclose(stream);
create_next_file_name(curFileName, &newFileName);
sprintf(curFileName, "%s",newFileName);
/*獲得當(dāng)前路徑和名稱*/
strcpy(curPFN, sCurDir);
strcat(curPFN, "\\");
strcat(curPFN, curFileName);
if ((stream = fopen(curPFN, "a+")) == NULL)
{
stream = fopen(curPFN, "w+");
}
curFileSize = filesize(stream);
}
fseek(stream, 0L, SEEK_END);
/* */
}
strcpy(curPFN,sCurDir);
strcat(curPFN, "\\");
if(WrtFlName(curFileName, curPFN) != 0)
{
printf("wrtfilname error!\n");
return EINVTWRT;
}
}
/*有一個(gè)以上的字母,為了排除為0的情況*/
else if(sStrlenth > 1)
{
sTemp[0] = sTemp[1];
sTemp[1] = '\0';
strcpy(Dir, sCurDir);
strcat(Dir, "\\");
strcat(Dir, sTemp);
mkdir(Dir);
if(curFileName)
free(curFileName);
curFileName = GetWrtFlName(Dir);
strcpy(curPFN, sCurDir);
strcat(curPFN, "\\");
strcat(curPFN, sTemp);
strcat(curPFN, "\\");
strcat(curPFN, curFileName);/**/
if ((stream = fopen(curPFN, "a+")) == NULL)
{
/* 若文件不存在, 以創(chuàng)建方式打開文件 */
bFileExist = SENGFALSE;
stream = fopen(curPFN, "wt");
}
else
{
/* 若文件已經(jīng)存在,把文件指針指向文件末;
* 若文件已經(jīng)存在, 且大小超過指定要求,關(guān)閉當(dāng)前文件, 新建文件
*/
bFileExist = SENGTRUE;
curFileSize = filesize(stream);
while (curFileSize >= nKbyte)
{
fclose(stream);
create_next_file_name(curFileName, &newFileName);
sprintf(curFileName, "%s",newFileName);
/*獲得當(dāng)前路徑和名稱*/
strcpy(curPFN, sCurDir);
strcat(curPFN, "\\");
strcat(curPFN, sTemp);
strcat(curPFN, "\\");
strcat(curPFN, curFileName);
if ((stream = fopen(curPFN, "a+")) == NULL)
{
stream = fopen(curPFN, "w+");
}
curFileSize = filesize(stream);
}
fseek(stream, 0L, SEEK_END);
/* */
}
strcpy(curPFN, sCurDir);
strcat(curPFN, "\\");
strcat(curPFN, sTemp);
strcat(curPFN, "\\");
if(WrtFlName(curFileName, curPFN) != 0)
{
printf("wrtfilname error!\n");
return EINVTWRT;
}
}
pCurDocNode = pCurIvtIdx->pDocNode;
if (pCurDocNode != NULL)
{
fprintf(stream, "@");
fprintf(stream, "%s #%d ", pCurIvtIdx->sWords, pCurIvtIdx->lDocNum);
pPreDoc = pCurDocNode;
while (pCurDocNode != NULL)
{
fprintf(stream, "%s ,%f,%d,",pCurDocNode->sDocID, pCurDocNode->fWeight, pCurDocNode->iFreq);
for(i = 0; (i < pCurDocNode->iFreq - 1) && (i < 2); i++)
{
fprintf(stream, "%d+", pCurDocNode->iPos[i]);
}
fprintf(stream, "%d,", pCurDocNode->iPos[i]);
fprintf(stream, "%s ", pCurDocNode->sFileURL);
fprintf(stream, ";\n");
pPreDoc->pNext = pCurDocNode->pNext;
ptemp = pCurDocNode;
pCurDocNode = pPreDoc->pNext;
}
}
fclose(stream);
/*釋放當(dāng)前節(jié)點(diǎn)所占的內(nèi)存*/
pPreIdx->pNextNode = pCurIvtIdx->pNextNode;
ptempIdx = pCurIvtIdx;
pCurIvtIdx = pPreIdx->pNextNode;
}
if(curFileName)
free(curFileName);
if(newFileName)
free(newFileName);
if(sIdxname)
free(sIdxname);
if(curPFN)
free(curPFN);
if(sTemp)
free(sTemp);
if(Dir)
free(Dir);
return 0;
}
int IdxFlWrt(char * sFileName,char * StringBuffer)
{
?? 快捷鍵說(shuō)明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -