?? sphere.cpp
字號:
//// spectral toolkit // copyright (c) 2005 university corporation for atmospheric research// licensed under the gnu general public license//#include "alloc.h"#include "sphere.h"#include "decompose.h"#include "legendre.h"#include "alf.h"using namespace spectral;/// Constructor for spherical harmonic transform.////// \param n number of latitude points between the equator and pole/// \param k number of fields to transformsphere::sphere(int n,int k){ Nodes=2; G=new group(Nodes); T=new thread*[Nodes]; T[0]=new thread(2*n,k,2*n,0,k/2,0,G); T[1]=new thread(2*n,k,2*n-1,1,(k+1)/2,k/2,G); }/// Constructor for multiple thread spherical harmonic transform.////// \param N number of latitude points between the equator and pole/// \param K number of fields to transform/// \param threads number of threads (>2) used for transformsphere::sphere(int N,int K,int threads){ Nodes=max(threads,2); G=new group(Nodes); T=new thread*[Nodes]; int KL,K0,ML,M0; int M=2*N; for(int n=0;n<Nodes;n++) { decompose(K,Nodes,n,KL,K0); if(n%2) { decompose(M/2,Nodes/2,n/2,ML,M0); T[n]=new thread(M,K,2*ML-1,M0*2+1,KL,K0,G); } else { decompose((M+1)/2,(Nodes+1)/2,n/2,ML,M0); T[n]=new thread(M,K,ML*2,M0*2,KL,K0,G); } }}/// Destructor for spherical harmonic transform.sphere::~sphere(){ for(int i=0;i<Nodes;i++) delete T[i]; delete [] T; delete G;}/// Analysis method for multiple instance spherical harmonic transform. /// Computes the spectral coefficients/// \f[ \xi_n^m = \sum_{j=0}^{2N-1} \hat{\xi}_j^m \overline{P_n^m}(\mu_j) w_j \f]/// where \f$ \mu_j \f$ are the Gaussian latitude points, \f$ w_j \f$ the Gaussian/// weights, and/// \f[ \hat{\xi}_j^m = \frac{1}{4N} \sum_{k=0}^{4N-1} g_{j,k} e^{-2 \pi i k m/4N} \f]/// the Fourier transform of the initial data along a longitude. The \f$ \xi_n^m \f$ are/// computed for \f$ m=0, \ldots, 2N-1 \f$ and \f$ n(m)=m, \ldots, 2N-1 \f$ for each \f$ m \f$./// The input array g is of size g[K][2N][4N] and the spectral coefficients are stored in the output array /// sc as \f$ \xi_n^m \f$ = sc[k][m][n]. The triangular array formed with alloct may be used to store /// sc, but a rectangular array will work as well.void sphere::analysis(real ***g,complex ***sc){ for(int i=0;i<Nodes;i++) T[i]->analysis(g,sc); for(int i=0;i<Nodes;i++) T[i]->wait();}/// Synthesis method for multiple instance spherical harmonic transform./// Assembles the 3-d real array in physical space from the spectral/// coeffcients by/// \f[ g_{j,k} = \sum_{m=0}^{4N-1} \sum_{n=m}^{2N} \xi_n^m \overline{P_n^m}(j) e^{2 \pi i k m/4N} \f]/// for each instance.void sphere::synthesis(complex ***sc,real ***g){ for(int i=0;i<Nodes;i++) T[i]->synthesis(sc,g); for(int i=0;i<Nodes;i++) T[i]->wait();}/// Internal thread analysis driver.void sphere::thread::analysis(real ***g_,complex ***sc_){ g=g_; sc=sc_; Method=ANALYSIS; spawn();}/// Internal thread synthesis driver.void sphere::thread::synthesis(complex ***sc_,real ***g_){ g=g_; sc=sc_; Method=SYNTHESIS; spawn();}/// Internal thread method started when thread is spawned.void sphere::thread::start(){ switch(Method) { case ANALYSIS: Analysis(); break; case SYNTHESIS: Synthesis(); break; }}/// Internal thread join method.void sphere::thread::wait(){ join();}/// Internal thread constructor. Each thread works over a specified range of M=2N/// for a specified parity, computing every other m value./// \param nlat number of latitudes/// \param ninst number of instances/// \param ml length of m-block/// \param m0 start of m-block/// \param kl length of k-block/// \param k0 start of k-block/// \param GROUP group object for syncsphere::thread::thread(int nlat,int ninst,int ml,int m0,int kl,int k0,group *GROUP){ M=nlat; K=ninst; N=(M+1)/2; M0=m0; ML=ml; K0=k0; KL=kl; Group=GROUP; w=alloc<real>(M); RFFT = new rfft(2*M); SC=alloc<complex>(2,K,N); G=alloc<complex>(2,K,N); legendre Gaussian(M); for(int i=0;i<M;i++) w[i]=Gaussian.weight(i)/(real)(2*M); M0=m0; N=(M+1)/2; gen=alloct<generator>(M); seed =alloc<real>(2,N,N); P = alloc<real>(2,N,N); Pmn = alloc<real>(2,2,N,N); for(int n=M0;n<M;n++) { alf A(n,M0); for(int i=0;i<N;i++) seed[(M0+n)%2][n/2][i]=A(Gaussian.point(i)); } for(int m=0;m<M;m++) { for(int n=m;n<M;n++) { if(m<2) { gen[m][n].c0 = 0.0; gen[m][n].c1 = 0.0; gen[m][n].c2 = 0.0; } else { real A=(real)n; real B=(real)m; gen[m][n].c0 = std::sqrt(((2.0*A+1.0)*(A+B-2.0)*(A+B-3.0))/((2.0*A-3.0)*(A+B)*(A+B-1))); gen[m][n].c1 = std::sqrt(((2.0*A+1.0)*(A-B)*(A-B-1.0))/((2.0*A-3.0)*(A+B)*(A+B-1.0))); gen[m][n].c2 =-std::sqrt(((A-B+2.0)*(A-B+1.0))/((A+B)*(A+B-1.0))); } } }}/// Internal thread desctuctor.sphere::thread::~thread(){ delete RFFT; dealloc<real>(w); dealloc<complex>(SC); dealloc<complex>(G); dealloc<real>(Pmn); dealloc<real>(P); dealloc<real>(seed); dealloc<generator>(gen); }/// Internal thread analysis engine. Does forward recursion over the Pnm's on each pass.void sphere::thread::Analysis(){ for(int k=K0;k<KL+K0;k++) { RFFT->analysis(g[k],M); for(int i=0;i<N-M%2;i++) { for(int j=0;j<2*M;j++) { real t1=g[k][i ][j]; real t2=g[k][M-i-1][j]; g[k][i ][j] = w[i]*(t1+t2); g[k][M-i-1][j] = w[i]*(t1-t2); } } if(M%2) { for(int j=0;j<2*M;j++) g[k][N-1][j]= w[N-1]*g[k][N-1][j]; } } sync(Group); int mx=M0; for(int m=M0;m<ML+M0;m+=2) { if(m==0) { for(int k=0;k<K;k++) { for(int n=0;n<N;n++) { G[0][k][n].re = g[k][n ][0]; G[0][k][n].im = 0.0; G[1][k][n].re = g[k][M-n-1][0]; G[1][k][n].im = 0.0; } } } else if((M%2==0)&&(m==M-1)) { for(int k=0;k<K;k++) { for(int n=0;n<N;n++) { G[0][k][n].re = g[k][n ][m*2-1]; G[0][k][n].im = 0.0; G[1][k][n].re = g[k][M-n-1][m*2-1]; G[1][k][n].im = 0.0; } } } else { for(int k=0;k<K;k++) { for(int n=0;n<N;n++) { G[0][k][n].re = g[k][n ][m*2-1]; G[0][k][n].im = g[k][n ][m*2]; G[1][k][n].re = g[k][M-n-1][m*2-1]; G[1][k][n].im = g[k][M-n-1][m*2]; } } } int p=(mx/2+mx%2)%2; for(int n=mx;n<M;n++) { int ip=(n+mx)%2; if(mx==M0) { for(int j=0;j<N;j++) { P[ip][n/2-(mx+ip)/2][j] = Pmn[p][ip][n/2][j] = seed[ip][n/2][j]; } } else { for(int j=0;j<N;j++) P[ip][n/2-(mx+ip)/2][j] = Pmn[p][ip][n/2][j] = Pmn[1-p][ip][n/2-1][j]*gen[mx][n].c0 + Pmn[1-p][ip][n/2][j]*gen[mx][n].c2 + Pmn[p][ip][n/2-1][j]*gen[mx][n].c1; } } multiply(G[0],P[0],SC[0],K,M/2-(mx )/2,N); multiply(G[1],P[1],SC[1],K,M/2-(mx+1)/2,N); mx+=2; for(int k=0;k<K;k++) for(int n=m;n<M;n++) sc[k][m][n]=SC[(m+n)%2][k][(n-m)/2]; }}/// Internal thread synthesis engine. Does backard recursion over the Pnm's on each pass. void sphere::thread::Synthesis(){ int mx=M0; for(int m=M0;m<ML+M0;m+=2) { for(int k=0;k<K;k++) for(int n=m;n<M;n++) SC[(n+m)%2][k][(n-m)/2]=sc[k][m][n]; int p=(mx/2+mx%2)%2; for(int n=mx;n<M;n++) { int ip=(n+mx)%2; if(mx==M0) { for(int j=0;j<N;j++) P[ip][j][n/2-(mx+ip)/2] = Pmn[p][ip][n/2][j] = seed[ip][n/2][j]; } else { for(int j=0;j<N;j++) P[ip][j][n/2-(mx+ip)/2] = Pmn[p][ip][n/2][j] = Pmn[1-p][ip][n/2-1][j]*gen[mx][n].c0 + Pmn[1-p][ip][n/2][j]*gen[mx][n].c2 + Pmn[p][ip][n/2-1][j]*gen[mx][n].c1; } } multiply(SC[0],P[0],G[0],K,N,M/2-(mx )/2); multiply(SC[1],P[1],G[1],K,N,M/2-(mx+1)/2); mx+=2; if(m==0) { for(int k=0;k<K;k++) { for(int n=0;n<N;n++) { real t1 = G[0][k][n].re; real t2 = G[1][k][n].re; g[k][n][m] = t1+t2; g[k][M-n-1][m] = t1-t2; g[k][n][2*M-1] = 0.0; g[k][M-n-1][2*M-1] = 0.0; } } } else if((M%2==0)&&(m==M-1)) { for(int k=0;k<K;k++) { for(int n=0;n<N;n++) { real t1 = G[0][k][n].re; real t2 = G[1][k][n].re; g[k][n][m*2-1] = t1 + t2; g[k][M-n-1][m*2-1] = t1-t2; g[k][n][m*2] = 0.0; g[k][M-n-1][m*2] = 0.0; } } } else { for(int k=0;k<K;k++) { for(int n=0;n<N;n++) { real t1 = G[0][k][n].re; real t2 = G[0][k][n].im; real t3 = G[1][k][n].re; real t4 = G[1][k][n].im; g[k][n][m*2-1] = t1 + t3; g[k][n][m*2] = t2 + t4; g[k][M-n-1][m*2-1] = t1 - t3; g[k][M-n-1][m*2] = t2 - t4; } } } } sync(Group); for(int k=K0;k<KL+K0;k++) RFFT->synthesis(g[k],M);}/// Internal thread multiplication. Performs matrix multiply for sum assembly.void sphere::thread::multiply(complex **A,real **B,complex **C,int M1,int N1,int K1){ complex c00,c01,c10,c11; const int BlockM1=32; const int BlockN1=32; for(int mb=0;mb<M1;mb+=BlockM1) { int M1B=min(mb+BlockM1,M1); for(int nb=0;nb<N1;nb+=BlockN1) { int N1B=min(nb+BlockN1,N1); for(int m=mb;m<M1B-1;m+=2) { for(int n=nb;n<N1B-1;n+=2) { c00.re=c00.im=0.0; c01.re=c01.im=0.0; c10.re=c10.im=0.0; c11.re=c11.im=0.0; for(int k=0;k<K1;k++) { c00.re+=A[m][k].re*B[n][k]; c00.im+=A[m][k].im*B[n][k]; c01.re+=A[m][k].re*B[n+1][k]; c01.im+=A[m][k].im*B[n+1][k]; c10.re+=A[m+1][k].re*B[n][k]; c10.im+=A[m+1][k].im*B[n][k]; c11.re+=A[m+1][k].re*B[n+1][k]; c11.im+=A[m+1][k].im*B[n+1][k]; } C[m ][n ].re=c00.re; C[m ][n ].im=c00.im; C[m ][n+1].re=c01.re; C[m ][n+1].im=c01.im; C[m+1][n ].re=c10.re; C[m+1][n ].im=c10.im; C[m+1][n+1].re=c11.re; C[m+1][n+1].im=c11.im; } } if((M1B-mb)%2==1) { for(int n=nb;n<N1B;n++) { c00.re=c00.im=0.0; for(int k=0;k<K1;k++) { c00.re+=A[M1B-1][k].re*B[n][k]; c00.im+=A[M1B-1][k].im*B[n][k]; } C[M1B-1][n].re=c00.re; C[M1B-1][n].im=c00.im; } } if((N1B-nb)%2==1) { for(int m=mb;m<M1B;m++) { c00.re=c00.im=0.0; for(int k=0;k<K1;k++) { c00.re+=A[m][k].re*B[N1B-1][k]; c00.im+=A[m][k].im*B[N1B-1][k]; } C[m][N1B-1].re=c00.re; C[m][N1B-1].im=c00.im; } } } }}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -