?? naive_synthesis.c
字號:
*//************************************************************************//* bw - bandwidth m - order data - a pointer to double array of size (2*bw) containing a synthesized function. result - a pointer to double array of size (bw-m) and containing the computed Legendre coefficients, starting with the Pmm coefficient. workspace - a pointer to double array of size (32*bw) timing - 1 to turn on timing info runtime - a double slot for writing time loops - number of timing loops*/void Naive_Analysis_Timing(double *data, int bw, int m, double *result, int timing, double *runtime, int loops, double *workspace){ double *prev, *prevprev, *temp1, *temp2, *temp3, *temp4, *x_i, *eval_args; double *wdata; const double *weight_vec; int i, j, l; double total_time, tstart = 0.0, tstop; prevprev = workspace; prev = prevprev + (2*bw); temp1 = prev + (2*bw); temp2 = temp1 + (2*bw); temp3 = temp2 + (2*bw); temp4 = temp3 + (2*bw); x_i = temp4 + (2*bw); eval_args = x_i + (2*bw); wdata = eval_args + (2*bw); /* get the evaluation nodes */ ns_EvalPts(2*bw,x_i); ns_ArcCosEvalPts(2*bw,eval_args); /* start the timer if (timing) gettimeofday(&ftstart,0); */ if (timing) tstart = csecond(); /* main timing loop */ for (l=0; l< loops; l++) { /* set initial values of first two Pmls */ for (i=0; i<2*bw; i++) prevprev[i] = 0.0; if (m == 0) { for (i=0; i<2*bw; i++) { prev[i] = 0.5; } } else ns_Pmm_L2(m, eval_args, 2*bw, prev); /* make sure result is zeroed out */ for (i=0; i<(bw-m); i++) result[i] = 0.0; /* apply quadrature weights */ weight_vec = get_weights(bw); for (i=0; i<(2*bw); i++) wdata[i] = data[i] * weight_vec[i]; /* compute Pmm coefficient */ for (j=0; j<(2*bw); j++) result[0] += wdata[j] * prev[j]; /* now generate remaining pmls while computing coefficients */ for (i=0; i<bw-m-1; i++) { ns_vec_mul(ns_L2_cn(m,m+i),prevprev,temp1,2*bw); ns_vec_pt_mul(prev, x_i, temp2, 2*bw); ns_vec_mul(ns_L2_an(m,m+i), temp2, temp3, 2*bw); ns_vec_add(temp3, temp1, temp4, 2*bw); /* temp4 now contains P(m,m+i+1) */ /* compute this coefficient */ for (j=0; j<(2*bw); j++) result[i+1] += wdata[j] * temp4[j]; /* now update Pi and P(i+1) */ memcpy(prevprev, prev, sizeof(double) * 2 * bw); memcpy(prev, temp4, sizeof(double) * 2 * bw); } } /* closes main timing loop */ if (timing) { tstop = csecond(); total_time = tstop - tstart; *runtime = total_time; fprintf(stdout,"\n"); fprintf(stdout,"Program: Naive Legendre Transform \n"); fprintf(stdout,"m = %d\n", m); fprintf(stdout,"Bandwidth = %d\n", bw);#ifndef WALLCLOCK fprintf(stdout,"Total elapsed cpu time: %f seconds.\n\n", total_time); #else fprintf(stdout,"Total elapsed wall time: %f seconds.\n\n", total_time); #endif } else *runtime = 0.0;}/************************************************************************//* bw - bandwidth m - order data - a pointer to double array of size (2*bw) containing a synthesized function. result - a pointer to double array of size (bw-m) and containing the computed Legendre coefficients, starting with the Pmm coefficient. workspace - a pointer to double array of size (32*bw) timing - 1 to turn on timing info runtime - a double slot for writing time loops - number of timing loops Just like the above routine EXCEPT I precompute the legendre functions before turning on the stopwatch.*/void Naive_Analysis_TimingX(double *data, int bw, int m, double *result, int timing, double *runtime, int loops, double *workspace){ double *prev, *prevprev, *temp1, *temp2, *temp3, *temp4, *x_i, *eval_args; double *wdata; const double *weight_vec; int i, j, l; double *storeplm, *storeplm_ptr; double result0, result1, result2, result3; double total_time, tstart = 0, tstop; prevprev = workspace; prev = prevprev + (2*bw); temp1 = prev + (2*bw); temp2 = temp1 + (2*bw); temp3 = temp2 + (2*bw); temp4 = temp3 + (2*bw); x_i = temp4 + (2*bw); eval_args = x_i + (2*bw); wdata = eval_args + (2*bw); storeplm = (double *) malloc(sizeof(double) * 2 * bw * (bw - m)); storeplm_ptr = storeplm; /* get the evaluation nodes */ ns_EvalPts(2*bw,x_i); ns_ArcCosEvalPts(2*bw,eval_args); /* set initial values of first two Pmls */ for (i=0; i<2*bw; i++) prevprev[i] = 0.0; if (m == 0) for (i=0; i<2*bw; i++) prev[i] = 0.5; else ns_Pmm_L2(m, eval_args, 2*bw, prev); memcpy(storeplm, prev, (size_t) sizeof(double) * 2 * bw); for(i = 0; i < bw - m - 1; i++) { ns_vec_mul(ns_L2_cn(m,m+i),prevprev,temp1,2*bw); ns_vec_pt_mul(prev, x_i, temp2, 2*bw); ns_vec_mul(ns_L2_an(m,m+i), temp2, temp3, 2*bw); ns_vec_add(temp3, temp1, temp4, 2*bw); /* temp4 now contains P(m,m+i+1) */ storeplm += (2 * bw); memcpy(storeplm, temp4, (size_t) sizeof(double) * 2 * bw); memcpy(prevprev, prev, (size_t) sizeof(double) * 2 * bw); memcpy(prev, temp4, (size_t) sizeof(double) * 2 * bw); } storeplm = storeplm_ptr; /* start the timer */ if (timing) tstart = csecond(); /* main timing loop */ for (l=0; l< loops; l++) { /* make sure result is zeroed out */ for (i=0; i<(bw-m); i++) result[i] = 0.0; /* apply quadrature weights */ weight_vec = get_weights(bw); ns_vec_pt_mul(data, weight_vec, wdata, 2 * bw); storeplm = storeplm_ptr; for (i = 0; i < bw - m; i++) { result0 = 0.0; result1 = 0.0; result2 = 0.0; result3 = 0.0; for(j = 0; j < (2 * bw) % 4; ++j) result0 += wdata[j] * storeplm[j]; for( ; j < (2 * bw); j += 4) { result0 += wdata[j] * storeplm[j]; result1 += wdata[j + 1] * storeplm[j + 1]; result2 += wdata[j + 2] * storeplm[j + 2]; result3 += wdata[j + 3] * storeplm[j + 3]; } result[i] = result0 + result1 + result2 + result3; storeplm += (2 * bw); } } /* closes main timing loop */ if (timing) { tstop = csecond(); total_time = tstop - tstart; *runtime = total_time; fprintf(stdout,"\n"); fprintf(stdout,"Program: Naive Legendre Transform \n"); fprintf(stdout,"m = %d\n", m); fprintf(stdout,"Bandwidth = %d\n", bw);#ifndef WALLCLOCK fprintf(stdout,"Total elapsed cpu time: %f seconds.\n\n", total_time); #else fprintf(stdout,"Total elapsed wall time: %f seconds.\n\n", total_time); #endif } else *runtime = 0.0; storeplm = storeplm_ptr; free(storeplm);}/************************************************************************//* bw - bandwidth m - order data - a pointer to double array of size (2*bw) containing a synthesized function. plmtable - a pointer to a double array of size (2*bw*(bw-m)); contains the precomputed plms result - a pointer to double array of size (bw-m) and containing the computed Legendre coefficients, starting with the Pmm coefficient. workspace - array of size 2 * bw; A minimal, hacked version of the above routine, except that as input ones of the arguments is the precomputed table of necessary associated Legendre functions.*/void Naive_AnalysisX(double *data, int bw, int m, double *result, double *plmtable, double *workspace){ int i, j; const double *weight_vec; double result0, result1, result2, result3; register double *wdata; wdata = workspace; /* make sure result is zeroed out */ for (i=0; i<(bw-m); i++) result[i] = 0.0; /* apply quadrature weights */ weight_vec = get_weights(bw); /* ns_vec_pt_mul(data, weight_vec, wdata, 2 * bw); */ for(i = 0; i < 2 * bw; i++) wdata[i] = data[i] * weight_vec[i]; for (i = 0; i < bw - m; i++) { result0 = 0.0; result1 = 0.0; result2 = 0.0; result3 = 0.0; for(j = 0; j < (2 * bw) % 4; ++j) result0 += wdata[j] * plmtable[j]; for( ; j < (2 * bw); j += 4) { result0 += wdata[j] * plmtable[j]; result1 += wdata[j + 1] * plmtable[j + 1]; result2 += wdata[j + 2] * plmtable[j + 2]; result3 += wdata[j + 3] * plmtable[j + 3]; } result[i] = result0 + result1 + result2 + result3; plmtable += (2 * bw); }}/************************************************************************//* This is the procedure that synthesizes a function from a list of coefficients of a Legendre series. Function is synthesized at the (2*bw) Chebyshev nodes. Associated Legendre functions are assumed to be precomputed. bw - bandwidth m - order plmtable - precomputed associated Legendre functions coeffs - a pointer to double array of size (bw-m). First coefficient is coefficient for Pmm result - a pointer to double array of size (2*bw) and containing the synthesized function*/void Naive_SynthesizeX(double *coeffs, int bw, int m, double *result, double *plmtable){ int i, j; double tmpcoef; /* make sure result is zeroed out for (i=0; i<(2*bw); i++) result[i] = 0.0; */ /* add in Pmm contribution */ tmpcoef = coeffs[0]; if (tmpcoef != 0.0) { if (m == 0) for (j=0; j<(2*bw); j++) result[j] = tmpcoef; else for (j=0; j<(2*bw); j++) result[j] = tmpcoef * plmtable[j]; } plmtable += ( 2 * bw ); /* now generate remaining pmls while synthesizing function */ if (m == 0) { for (i=0; i<bw-m-1; i++) { /* add in contribution */ tmpcoef = coeffs[i+1] * 2.0; if (tmpcoef != 0.0) { for (j=0; j<(2*bw); j++) result[j] += (tmpcoef * plmtable[j]); plmtable += (2 * bw); } } } else { for (i=0; i<bw-m-1; i++) { /* add in contribution */ tmpcoef = coeffs[i+1]; if (tmpcoef != 0.0) { for (j=0; j<(2*bw); j++) result[j] += (tmpcoef * plmtable[j]); plmtable += (2 * bw); } } }}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -