?? svm_learn.c
字號(hào):
/* Learns an SVM regression model based on the training data in docs/label. The resulting model is returned in the structure model. */void svm_learn_regression(DOC *docs, double *value, long int totdoc, long int totwords, LEARN_PARM *learn_parm, KERNEL_PARM *kernel_parm, KERNEL_CACHE *kernel_cache, MODEL *model) /* docs: Training vectors (x-part) */ /* class: Training value (y-part) */ /* totdoc: Number of examples in docs/label */ /* totwords: Number of features (i.e. highest feature index) */ /* learn_parm: Learning paramenters */ /* kernel_parm: Kernel paramenters */ /* kernel_cache:Initialized Cache of size 2*totdoc */ /* model: Returns learning result (assumed empty before called) */{ long *inconsistent,i,j; long inconsistentnum; long upsupvecnum; double loss,model_length,example_length; double maxdiff,*lin,*a,*c; long runtime_start,runtime_end; long iterations; long *unlabeled; double r_delta_sq=0,r_delta,r_delta_avg; double *xi_fullset; /* buffer for storing xi on full sample in loo */ double *a_fullset; /* buffer for storing alpha on full sample in loo */ TIMING timing_profile; SHRINK_STATE shrink_state; DOC *docs_org; long *label; /* set up regression problem in standard form */ docs_org=docs; docs = (DOC *)my_malloc(sizeof(DOC)*2*totdoc); label = (long *)my_malloc(sizeof(long)*2*totdoc); c = (double *)my_malloc(sizeof(double)*2*totdoc); for(i=0;i<totdoc;i++) { docs[i]=docs_org[i]; docs[i].docnum=i; label[i]=+1; c[i]=value[i]; docs[2*totdoc-1-i]=docs_org[i]; docs[2*totdoc-1-i].docnum=2*totdoc-1-i; label[2*totdoc-1-i]=-1; c[2*totdoc-1-i]=value[i]; } totdoc*=2; runtime_start=get_runtime(); timing_profile.time_kernel=0; timing_profile.time_opti=0; timing_profile.time_shrink=0; timing_profile.time_update=0; timing_profile.time_model=0; timing_profile.time_check=0; timing_profile.time_select=0; kernel_cache_statistic=0; learn_parm->totwords=totwords; /* make sure -n value is reasonable */ if((learn_parm->svm_newvarsinqp < 2) || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; } init_shrink_state(&shrink_state,totdoc,(long)20000); inconsistent = (long *)my_malloc(sizeof(long)*totdoc); unlabeled = (long *)my_malloc(sizeof(long)*totdoc); a = (double *)my_malloc(sizeof(double)*totdoc); a_fullset = (double *)my_malloc(sizeof(double)*totdoc); xi_fullset = (double *)my_malloc(sizeof(double)*totdoc); lin = (double *)my_malloc(sizeof(double)*totdoc); learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); model->at_upper_bound=0; model->b=0; model->supvec[0]=0; /* element 0 reserved and empty for now */ model->alpha[0]=0; model->lin_weights=NULL; model->totwords=totwords; model->totdoc=totdoc; model->kernel_parm=(*kernel_parm); model->sv_num=1; model->loo_error=-1; model->loo_recall=-1; model->loo_precision=-1; model->xa_error=-1; model->xa_recall=-1; model->xa_precision=-1; inconsistentnum=0; r_delta=estimate_r_delta(docs,totdoc,kernel_parm); r_delta_sq=r_delta*r_delta; r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); if(learn_parm->svm_c == 0.0) { /* default value for C */ learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); if(verbosity>=1) printf("Setting default regularization parameter C=%.4f\n", learn_parm->svm_c); } for(i=0;i<totdoc;i++) { /* various inits */ inconsistent[i]=0; a[i]=0; lin[i]=0; unlabeled[i]=0; if(label[i] > 0) { learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio; } else if(label[i] < 0) { learn_parm->svm_cost[i]=learn_parm->svm_c; } } /* caching makes no sense for linear kernel */ if(kernel_parm->kernel_type == LINEAR) { kernel_cache = NULL; } if(verbosity==1) { printf("Optimizing"); fflush(stdout); } /* train the svm */ iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, kernel_parm,kernel_cache,&shrink_state,model, inconsistent,unlabeled,a,lin,c,&timing_profile, &maxdiff,(long)-1, (long)1); if(verbosity>=1) { if(verbosity==1) printf("done. (%ld iterations)\n",iterations); printf("Optimization finished (maxdiff=%.5f).\n",maxdiff); runtime_end=get_runtime(); if(verbosity>=2) { printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", ((float)runtime_end-(float)runtime_start)/100.0, (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start), (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start), (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start), (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start), (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start), (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start), (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start)); } else { printf("Runtime in cpu-seconds: %.2f\n", (runtime_end-runtime_start)/100.0); } if(learn_parm->remove_inconsistent) { inconsistentnum=0; for(i=0;i<totdoc;i++) if(inconsistent[i]) inconsistentnum++; printf("Number of SV: %ld (plus %ld inconsistent examples)\n", model->sv_num-1,inconsistentnum); } else { upsupvecnum=0; for(i=1;i<model->sv_num;i++) { if(fabs(model->alpha[i]) >= (learn_parm->svm_cost[(model->supvec[i])->docnum]- learn_parm->epsilon_a)) upsupvecnum++; } printf("Number of SV: %ld (including %ld at upper bound)\n", model->sv_num-1,upsupvecnum); } if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { loss=0; model_length=0; for(i=0;i<totdoc;i++) { if((lin[i]-model->b)*(double)label[i] < (-learn_parm->eps+(double)label[i]*c[i])-learn_parm->epsilon_crit) loss+=-learn_parm->eps+(double)label[i]*c[i]-(lin[i]-model->b)*(double)label[i]; model_length+=a[i]*label[i]*lin[i]; } model_length=sqrt(model_length); fprintf(stdout,"L1 loss: loss=%.5f\n",loss); fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); example_length=estimate_sphere(model,kernel_parm); fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", length_of_longest_document_vector(docs,totdoc,kernel_parm)); } if(verbosity>=1) { printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); } } if(learn_parm->alphafile[0]) write_alphas(learn_parm->alphafile,a,label,totdoc); /* this makes sure the model we return does not contain pointers to the temporary documents */ for(i=1;i<model->sv_num;i++) { j=model->supvec[i]->docnum; if(j >= (totdoc/2)) { j=totdoc-j-1; } model->supvec[i]=&(docs_org[j]); } shrink_state_cleanup(&shrink_state); free(docs); free(label); free(inconsistent); free(unlabeled); free(c); free(a); free(a_fullset); free(xi_fullset); free(lin); free(learn_parm->svm_cost);}long optimize_to_convergence(DOC *docs, long int *label, long int totdoc, long int totwords, LEARN_PARM *learn_parm, KERNEL_PARM *kernel_parm, KERNEL_CACHE *kernel_cache, SHRINK_STATE *shrink_state, MODEL *model, long int *inconsistent, long int *unlabeled, double *a, double *lin, double *c, TIMING *timing_profile, double *maxdiff, long int heldout, long int retrain) /* docs: Training vectors (x-part) */ /* label: Training labels/value (y-part, zero if test example for transduction) */ /* totdoc: Number of examples in docs/label */ /* totwords: Number of features (i.e. highest feature index) */ /* laern_parm: Learning paramenters */ /* kernel_parm: Kernel paramenters */ /* kernel_cache: Initialized/partly filled Cache */ /* shrink_state: State of active variables */ /* model: Returns learning result */ /* inconsistent: examples thrown out as inconstistent */ /* unlabeled: test examples for transduction */ /* a: alphas */ /* lin: linear component of gradient */ /* c: upper bounds on alphas */ /* maxdiff: returns maximum violation of KT-conditions */ /* heldout: marks held-out example for leave-one-out (or -1) */ /* retrain: selects training mode (1=regular / 2=holdout) */{ long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink; long inconsistentnum,choosenum,already_chosen=0,iteration; long misclassified,supvecnum=0,*active2dnum,inactivenum; long *working2dnum,*selexam; long activenum; double criterion,eq; double *a_old; long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ long transductcycle; long transduction; double epsilon_crit_org; double *selcrit; /* buffer for sorting */ CFLOAT *aicache; /* buffer to keep one row of hessian */ double *weights; /* buffer for weight vector in linear case */ QP qp; /* buffer for one quadratic program */ epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ if(kernel_parm->kernel_type == LINEAR) { learn_parm->epsilon_crit=2.0; kernel_cache=NULL; /* caching makes no sense for linear kernel */ } learn_parm->epsilon_shrink=2; (*maxdiff)=1; learn_parm->totwords=totwords; chosen = (long *)my_malloc(sizeof(long)*totdoc); last_suboptimal_at = (long *)my_malloc(sizeof(long)*totdoc); key = (long *)my_malloc(sizeof(long)*(totdoc+11)); selcrit = (double *)my_malloc(sizeof(double)*totdoc); selexam = (long *)my_malloc(sizeof(long)*totdoc); a_old = (double *)my_malloc(sizeof(double)*totdoc); aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); qp.opt_ce0 = (double *)my_malloc(sizeof(double)); qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize *learn_parm->svm_maxqpsize); qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); weights=(double *)my_malloc(sizeof(double)*(totwords+1)); choosenum=0; transductcycle=0; transduction=0; if(!retrain) retrain=1; iteration=1; if(kernel_cache) { kernel_cache->time=iteration; /* for lru cache */ kernel_cache_reset_lru(kernel_cache); } for(i=0;i<totdoc;i++) { /* various inits */ chosen[i]=0; a_old[i]=a[i]; last_suboptimal_at[i]=1; if(inconsistent[i]) inconsistentnum++; if(unlabeled[i]) { transduction=1; } } activenum=compute_index(shrink_state->active,totdoc,active2dnum); inactivenum=totdoc-activenum; clear_index(working2dnum); /* repeat this loop until we have convergence */ for(;retrain;iteration++) { if(kernel_cache) kernel_cache->time=iteration; /* for lru cache */ if(verbosity>=2) { printf( "Iteration %ld: ",iteration); fflush(stdout); } else if(verbosity==1) { printf("."); fflush(stdout); } if(verbosity>=2) t0=get_runtime(); if(verbosity>=3) { printf("\nSelecting working set... "); fflush(stdout); } if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; i=0; for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ if((chosen[j]>=(learn_parm->svm_maxqpsize/ minl(learn_parm->svm_maxqpsize, learn_parm->svm_newvarsinqp))) || (inconsistent[j]) || (j == heldout)) { chosen[j]=0; choosenum--; } else { chosen[j]++; working2dnum[i++]=j; } } working2dnum[i]=-1; if(retrain == 2) { choosenum=0; for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */ chosen[j]=0; } clear_index(working2dnum); for(i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */ if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) { chosen[i]=99999; choosenum++; a[i]=0; } } if(learn_parm->biased_hyperplane) { eq=0; for(i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */ eq+=a[i]*label[i]; } for(i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) { if((eq*label[i] > 0) && (a[i] > 0)) { chosen[i]=88888; choosenum++; if((eq*label[i]) > a[i]) { eq-=(a[i]*label[i]); a[i]=0; } else { a[i]-=(eq*label[i]); eq=0; } } } } compute_index(chosen,totdoc,working2dnum); } else { /* select working set according to steepest gradient */ if(iteration % 101) { already_chosen=0; if((minl(learn_parm->svm_newvarsinqp, learn_parm->svm_maxqpsize-choosenum)>=4) && (kernel_parm->kernel_type != LINEAR)) { /* select part of the working set from cache */ already_chosen=select_next_qp_subproblem_grad_cache( label,unlabeled,a,lin,c,totdoc, (long)(minl(learn_parm->svm_maxqpsize-choosenum, learn_parm->svm_newvarsinqp)
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -