?? wicoo.c

?? 查詞典源代碼,希望對大家自然語言處理技術的學習帶來幫助

?? C

字號:

#include <bow/libbow.h>bow_wi2dvf *bow_wicoo_from_barrel  (bow_barrel *barrel){  bow_wi2dvf *wicoo;  bow_dv_heap *heap;  float num_words_in_wv;  int wvi1, wvi2;  bow_wv *wv;  int di;  bow_dv *dv;  wicoo = bow_wi2dvf_new (0);  /* Add statistics for all word co-occurrences. */  /* And prepare to set IDF to Pr(w) */  heap = bow_test_new_heap (barrel);  wv = NULL;  bow_verbosify (bow_progress,		 "Calculating word co-occurrences          ");  while ((di = bow_nontest_next_wv (heap, barrel, &wv))	 != -1)    {      if (di % 10 == 0)	bow_verbosify (bow_progress, "\b\b\b\b\b\b\b%7d", di);      /* Calculate the total number of words in WV */      num_words_in_wv = 0;      for (wvi1 = 0; wvi1 < wv->num_entries; wvi1++)	{#if 0	  /* Only count those words that are part of the vocabulary. */	  if (bow_str2int_no_add (rainbowh_arg_state.vocab_map,				  bow_int2word (wv->entry[wvi1].wi))	      != -1)#endif	    num_words_in_wv += wv->entry[wvi1].count;	}      for (wvi1 = 0; wvi1 < wv->num_entries; wvi1++)	{	  for (wvi2 = 0; wvi2 < wv->num_entries; wvi2++)	    {	      /* Set COUNT to co-occurrence count.		 Set WEIGHT to probabilistic sampling of document,		 then word. */	      bow_wi2dvf_add_wi_di_count_weight		(&wicoo, wv->entry[wvi1].wi, wv->entry[wvi2].wi,		 wv->entry[wvi2].count,		 wv->entry[wvi2].count / num_words_in_wv);	    }	  dv = bow_wi2dvf_dv (wicoo, wv->entry[wvi1].wi);	  /* This relies on IDF being initialized to zero in bow_dv_new() */	  dv->idf += wv->entry[wvi1].count / num_words_in_wv;	}    }  /* Normalize the IDF's so they are equal to Pr(w) in the corpus. */  {    int wi;    double idf_total = 0;    for (wi = 0; wi < wicoo->size; wi++)      {	dv = bow_wi2dvf_dv (wicoo, wi);	if (dv)	  idf_total += dv->idf;      }    for (wi = 0; wi < wicoo->size; wi++)      {	dv = bow_wi2dvf_dv (wicoo, wi);	if (dv)	  dv->idf /= idf_total;      }  }  bow_verbosify (bow_progress, "\n");  return wicoo;}voidbow_wicoo_pr_w_w (bow_wi2dvf *wicoo, int wi1, int wi2){}voidbow_wicoo_print_word_entropy (bow_wi2dvf *wicoo, int wi){  bow_dv *coov;  float total_num_coo_words;  float pr_w_w;  float total_pr_w_w;  int coovi;  float entropy;  int wi2, max_wi;  int m_est_m;  float m_est_p;  bow_dv *dv2;  coov = bow_wi2dvf_dv (wicoo, wi);  if (!coov)    return;  total_num_coo_words = 0;  for (coovi = 0; coovi < coov->length; coovi++)    total_num_coo_words += coov->entry[coovi].weight;  entropy = 0;  max_wi = bow_num_words ();  m_est_m = wicoo->num_words / 100;  total_pr_w_w = 0;  for (wi2 = 0, coovi = 0; wi2 < max_wi; wi2++)    {      dv2 = bow_wi2dvf_dv (wicoo, wi2);      if (!dv2)	continue;      m_est_p = dv2->idf;      while (coov->entry[coovi].di < wi2 && coovi < coov->length)	coovi++;      if (coov->entry[coovi].di == wi2)	{	  /* Found word WI2 in vector. */	  pr_w_w = (((float)coov->entry[coovi].weight + m_est_m * m_est_p)		    / (total_num_coo_words + m_est_m));	}      else	{	  /* Word WI2 does not co-occur with WI. */	  pr_w_w = ((m_est_m * m_est_p)		    / (total_num_coo_words + m_est_m));	}#if 1      printf ("%-30s %12.7f %s\n",	      bow_int2word (wi), pr_w_w, bow_int2word (wi2));#endif      /* pr_w_w = (float)coov->entry[coovi].weight / total_num_coo_words; */      total_pr_w_w += pr_w_w;      entropy -= pr_w_w * log (pr_w_w);    }  assert (total_pr_w_w > 0.99 && total_pr_w_w < 1.01);  printf ("%-15.7f %s\n", entropy, bow_int2word (wi));}/* Shrink the weights of WV toward documents in BARREL, according to   their distance to WV. */voidbow_barrel_shrink_wv (bow_barrel *barrel, bow_wv *wv){  return;}

?? 文件大小 2 K

?? 上傳用戶 god_dog

?? 所屬分類 Linux/Unix編程

??? 相關標簽

#詞典 #源代碼 #家 #帶來

?? 快捷鍵說明

復制代碼 Ctrl + C

搜索代碼 Ctrl + F

全屏模式 F11

切換主題 Ctrl + Shift + D

顯示快捷鍵 ?

增大字號 Ctrl + =

減小字號 Ctrl + -

亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

?? wicoo.c

?? 快捷鍵說明