?? tm2d.br
字號:
//-----------------------------------------
// TM2D.br
// Simple 2D TM FDTD GPU Implementation
// With CPML Boundaries
// Matthew J. Inman and Atef Z. Elsherbeni
// July 2007
//-----------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
// GPU Function For H Fields
kernel void process_field_H( float Hx[][], float Chxh[][], float Chxe[][],
float Hy[][], float Chyh[][], float Chye[][],
float Ez[][], float Khx[], float Khy[],
float Bhx[], float Bhy[], float Chx[], float Chy[],
float psiHxy[][], float psiHyx[][],
float dx, float dy,
iter float2 it<>,
out float o_Hx<>, out float o_Hy<>, out float o_psiHxy<>, out float o_psiHyx<> )
{
float2 t0 = float2(0.0f, 1.0f);
float2 t1 = float2(1.0f, 0.0f);
float pxy, pyx;
pxy = (Bhy[it.y] * psiHxy[it]) + (Chy[it.y] * (Ez[it+t0]-Ez[it]));
pyx = (Bhx[it.x] * psiHyx[it]) + (Chx[it.x] * (Ez[it+t1]-Ez[it]));
o_psiHxy=pxy;
o_psiHyx=pyx;
o_Hx = (Chxh[it] * Hx[it] ) + (Khy[it.y] * Chxe[it] / dy * (Ez[it+t0]-Ez[it])) + (Chxe[it] * pxy);
o_Hy = (Chyh[it] * Hy[it] ) + (Khx[it.x] * Chye[it] / dx * (Ez[it+t1]-Ez[it])) + (Chye[it] * pyx);
}
// GPU Function For E Fields
kernel void process_field_Ez( float Ez[][], float Ceze[][], float Cezh[][],
float Hx[][], float Hy[][],
float Cs[][], float Kex[], float Key[],
float Bex[], float Bey[], float Cex[], float Cey[],
float psiEzx[][], float psiEzy[][],
float dx, float dy,
float gauss, iter float2 it<>,
out float o_Ez<>, out float o_psiEzx<>, out float o_psiEzy<> )
{
float2 t0 = float2(0.0f, -1.0f);
float2 t1 = float2(-1.0f, 0.0f);
float pzx, pzy;
pzx = (Bex[it.x] * psiEzx[it]) + (Cex[it.x] * (Hy[it]-Hy[it+t1]));
pzy = (Bey[it.y] * psiEzy[it]) + (Cey[it.y] * (Hx[it]-Hx[it+t0]));
o_psiEzx=pzx;
o_psiEzy=pzy;
o_Ez = (Ceze[it] * Ez[it]) + (Kex[it.x] * (Cezh[it] / dx) * (Hy[it]-Hy[it+t1])) +
(Key[it.y] * (Cezh[it] / dy) * (Hx[it]-Hx[it+t0])) - (Cs[it]*gauss) +
(Cezh[it] * pzx) + (Cezh[it] * pzy);
}
// Copy Kernel To Extract Single Points From a Stream
kernel void Copy( float input<>, out float output<> ) { output = input; }
int main(int argc, char* argv[]) {
// Initialize Variables
int i, j, N, nx, ny, xsize, ysize, insizex, insizey;
int iPML, kappa, m;
float sigmax, sigmay, amax;
float sig1, sig2, a1, a2, k1, k2, ii;
float eps0, mu0, c, pi, dx, dy, dx2, dy2, dfactor, dt, Nc, tau, t0,fmax, ce;
float* t=NULL;
float* gauss=NULL;
float* outputPort=NULL;
float* aHx=NULL;
float* aHy=NULL;
float* aEz=NULL;
float* aCeze=NULL;
float* aChxh=NULL;
float* aChyh=NULL;
float* aCezh=NULL;
float* aChxe=NULL;
float* aChye=NULL;
float* aCs=NULL;
float* outputEz = NULL;
float* outputHx = NULL;
float* outputHy = NULL;
float* aKex=NULL;
float* aKey=NULL;
float* aKhx=NULL;
float* aKhy=NULL;
float* abex=NULL;
float* abey=NULL;
float* acex=NULL;
float* acey=NULL;
float* abhx=NULL;
float* abhy=NULL;
float* achx=NULL;
float* achy=NULL;
float* apsi=NULL;
FILE* pFile;
FILE* pFile2;
FILE* pFile3;
// Define Constants
pi=3.14159265;
c=2.998e8;
// Broken Down because of floating point
// problems with certain compliers
eps0=8.854;
eps0=eps0*1e-6;
eps0=eps0*1e-6;
mu0= 4*pi;
mu0= mu0*1e-4;
mu0= mu0*1e-3;
// Define dx & dy
dx=1e-3;
dy=1e-3;
dx2=(1/dx)*(1/dx);
dy2=(1/dy)*(1/dy);
// Define timesteps and domain size
// N,xsize,ysize are all defined
// By runtime arguments
N = atoi(argv[1]);
xsize = atoi(argv[2]);
ysize = atoi(argv[3]);
nx=(int) xsize;
ny=(int) ysize;
dfactor=.9;
dt=(1/( c* sqrt( dx2 + dy2)))*dfactor;
ce=dt/(2*eps0);
// Define source waveform constants
Nc=25;
tau=(Nc*dt)/(2*sqrt(2.0));
t0=4.5*tau;
fmax=1/(tau);
// Define PML Constants
iPML=10;
kappa=8;
m=4;
sigmax = (0.8*m+1) / (150*pi*dx);
sigmay = (0.8*m+1) / (150*pi*dy);
amax = (fmax/2.1)*2*pi*(eps0/10);
// Print out Constants for Checking
pFile = fopen ("InputData.txt","wt");
fprintf (pFile,"pi=%f eps0=%e mu0=%e c=%f \n",pi,eps0,mu0,c);
fprintf (pFile,"dx=%e dy=%e dx2=%e dy2=%e \n",dx,dy,dx2,dy2);
fprintf (pFile,"dt=%e tau=%e t0=%e \n",dt,tau,t0);
// Dynamically Allocate Arrays to Allow matrix size to be
// Set at Runtime
gauss= (float*)malloc(N*sizeof(float));
t= (float*)malloc(N*sizeof(float));
outputPort= (float*)malloc(1*sizeof(float));
aHx= (float*)malloc(xsize*ysize*sizeof(float));
aHy= (float*)malloc(xsize*ysize*sizeof(float));
aEz= (float*)malloc(xsize*ysize*sizeof(float));
aCeze= (float*)malloc(xsize*ysize*sizeof(float));
aChxh= (float*)malloc(xsize*ysize*sizeof(float));
aChyh= (float*)malloc(xsize*ysize*sizeof(float));
aCezh= (float*)malloc(xsize*ysize*sizeof(float));
aChxe= (float*)malloc(xsize*ysize*sizeof(float));
aChye= (float*)malloc(xsize*ysize*sizeof(float));
aCs= (float*)malloc(xsize*ysize*sizeof(float));
outputEz= (float*)malloc(xsize*ysize*sizeof(float));
outputHx= (float*)malloc(xsize*ysize*sizeof(float));
outputHy= (float*)malloc(xsize*ysize*sizeof(float));
// PML Arrays
aKex = (float*)malloc(xsize*sizeof(float));
aKhx = (float*)malloc(xsize*sizeof(float));
aKey = (float*)malloc(ysize*sizeof(float));
aKhy = (float*)malloc(ysize*sizeof(float));
abex = (float*)malloc(xsize*sizeof(float));
abhx = (float*)malloc(xsize*sizeof(float));
abey = (float*)malloc(ysize*sizeof(float));
abhy = (float*)malloc(ysize*sizeof(float));
acex = (float*)malloc(xsize*sizeof(float));
achx = (float*)malloc(xsize*sizeof(float));
acey = (float*)malloc(ysize*sizeof(float));
achy = (float*)malloc(ysize*sizeof(float));
apsi = (float*)malloc(xsize*ysize*sizeof(float));
// Precalcute input pulse
t[0]=dt;
gauss[0]=0;
for (i=1; i<N; i++) {
t[i]=t[i-1]+dt;
gauss[i]= exp(-( ((t[i]-t0)*(t[i]-t0))/(tau*tau)));
}
// Define Exterior Size
insizex = xsize;
insizey = ysize;
// Initialize our C field and constant arrays
// 1D X Arrays
for (i=0; i<nx; i++) {
aKex[i]=1;
aKhx[i]=1;
abex[i]=0;
abhx[i]=0;
acex[i]=0;
achx[i]=0;
}
// 1D Y Arrays
for (i=0; i<ny; i++) {
aKey[i]=1;
aKhy[i]=1;
abey[i]=0;
abhy[i]=0;
acey[i]=0;
achy[i]=0;
}
// 2D Arrays
for (j=0; j<ny; j++) {
for (i=0; i<nx; i++) {
aHy[nx*j+i]=0;
aHx[nx*j+i]=0;
aEz[nx*j+i]=0;
aCs[nx*j+i]=0;
aCeze[nx*j+i]=1.0;
aChxh[nx*j+i]=1.0;
aChyh[nx*j+i]=1.0;
aCezh[nx*j+i]=(dt/eps0);
aChxe[nx*j+i]=(dt/mu0);
aChye[nx*j+i]=(dt/mu0);
apsi[nx*j+i]=0;
}
}
// Set PEC Boundary (Easy way, fill with PEC then fill internal with air)
for (j=0; j<ny; j++) {
for (i=0; i<nx; i++) {
aCeze[nx*j+i]=(1-(ce*1e30))/(1+(ce*1e30));
aCezh[nx*j+i]=((2*ce)/(1+(ce*1e30)));
}
}
for (j=2; j<ny-2; j++) {
for (i=2; i<nx-2; i++) {
aCeze[nx*j+i]=1.0;
aCezh[nx*j+i]=(dt/eps0);
}
}
// Initialize PML Boundaries
for (i=1; i<1+iPML; i++) {
ii=(float) i;
ii=(1+iPML)-ii;
sig1=pow(((ii-0.5)/iPML),m)*sigmax;
sig2=(mu0/eps0)*pow(((ii)/iPML),m)*sigmax;
a1=pow(((iPML-(ii-1+.5))/iPML),m)*amax;
a2=(mu0/eps0)*pow(((iPML-(ii-1))/iPML),m)*amax;
k1=1+(kappa-1)*pow(((ii-0.5)/iPML),m);
k2=1+(kappa-1)*pow(((ii)/iPML),m);
aKex[i+1]=1/k1;
aKhx[i]=1/k2;
aKex[nx-i-1]=1/k1;
aKhx[nx-i-1]=1/k2;
abex[i+1]=exp((-dt/eps0)*((sig1/k1)+a1));
acex[i+1]=((sig1/dx)/((sig1*k1)+(a1*k1*k1)))*(exp((-dt/eps0)*((sig1/k1)+a1))-1);
abex[nx-i-1]=exp((-dt/eps0)*((sig1/k1)+a1));
acex[nx-i-1]=((sig1/dx)/((sig1*k1)+(a1*k1*k1)))*(exp((-dt/eps0)*((sig1/k1)+a1))-1);
abhx[i]=exp((-dt/mu0)*((sig2/k2)+a2));
achx[i]=((sig2/dx)/((sig2*k2)+(a2*k2*k2)))*(exp((-dt/mu0)*((sig2/k2)+a2))-1);
abhx[nx-i-1]=exp((-dt/mu0)*((sig2/k2)+a2));
achx[nx-i-1]=((sig2/dx)/((sig2*k2)+(a2*k2*k2)))*(exp((-dt/mu0)*((sig2/k2)+a2))-1);
aKey[i+1]=1/k1;
aKhy[i]=1/k2;
aKey[ny-i-1]=1/k1;
aKhy[ny-i-1]=1/k2;
abey[i+1]=exp((-dt/eps0)*((sig1/k1)+a1));
acey[i+1]=((sig1/dx)/((sig1*k1)+(a1*k1*k1)))*(exp((-dt/eps0)*((sig1/k1)+a1))-1);
abey[ny-i-1]=exp((-dt/eps0)*((sig1/k1)+a1));
acey[ny-i-1]=((sig1/dx)/((sig1*k1)+(a1*k1*k1)))*(exp((-dt/eps0)*((sig1/k1)+a1))-1);
abhy[i]=exp((-dt/mu0)*((sig2/k2)+a2));
achy[i]=((sig2/dx)/((sig2*k2)+(a2*k2*k2)))*(exp((-dt/mu0)*((sig2/k2)+a2))-1);
abhy[ny-i-1]=exp((-dt/mu0)*((sig2/k2)+a2));
achy[ny-i-1]=((sig2/dx)/((sig2*k2)+(a2*k2*k2)))*(exp((-dt/mu0)*((sig2/k2)+a2))-1);
}
// Place Our Point Source (Placed in the approximate center)
aCs[nx*(ysize/2)+(xsize/2)]=1/dx;
// Place Sample Scattering Object Er=10.2
// This places a rectangular scattering
// Dielectric box 10 cells from the point source
// That is 16 cells wide and 10 Cells Deep
for (j=((ysize/2)-8); j<((ysize/2)+9); j++) {
for (i=((xsize/2)+10); i<((xsize/2)+20); i++) {
aCeze[nx*j+i]=1;
aCezh[nx*j+i]=(2*ce)/(10.2);
}
}
fprintf (pFile,"Cezh=%e Chxe=%e\n",aCezh[10],aChxe[10]);
// Begin Code to Initialize and Use GPU
{
// Set the limits of the calculations to the interior points
iter float2 it<insizey,insizex> = iter ( float2(0.0f, 0.0f), float2((float)insizex, (float)insizey));
// Initialize the streams
float Obs<1,1>;
float Hy<insizey,insizex>, Hx<insizey,insizex>;
float o_Ez<insizey,insizex>, o_Hy<insizey,insizex>, o_Hx<insizey,insizex>;
float Ceze<insizey,insizex>, Chxh<insizey,insizex>, Chyh<insizey,insizex>;
float Cezh<insizey,insizex>, Chxe<insizey,insizex>, Chye<insizey,insizex>, Cs<insizey,insizex>;
float Ez<insizey,insizex>;
float Kex<insizex>,Khx<insizex>,Bex<insizex>,Bhx<insizex>,Cex<insizex>,Chx<insizex>;
float Key<insizey>,Khy<insizey>,Bey<insizey>,Bhy<insizey>,Cey<insizey>,Chy<insizey>;
float psiEzx<insizey,insizex>, o_psiEzx<insizey,insizex>;
float psiEzy<insizey,insizex>, o_psiEzy<insizey,insizex>;
float psiHxy<insizey,insizex>, o_psiHxy<insizey,insizex>;
float psiHyx<insizey,insizex>, o_psiHyx<insizey,insizex>;
// Copy our C arrays into the Streams so the GPU can process them
streamRead(Ez, aEz);
streamRead(Hy, aHy);
streamRead(Hx, aHx);
streamRead(o_Ez, aEz);
streamRead(o_Hy, aHy);
streamRead(o_Hx, aHx);
streamRead(Ceze, aCeze);
streamRead(Chxh, aChxh);
streamRead(Chyh, aChyh);
streamRead(Cezh, aCezh);
streamRead(Chxe, aChxe);
streamRead(Chye, aChye);
streamRead(Cs, aCs);
streamRead(Kex, aKex);
streamRead(Khx, aKhx);
streamRead(Bex, abex);
streamRead(Bhx, abhx);
streamRead(Cex, acex);
streamRead(Chx, achx);
streamRead(Key, aKey);
streamRead(Khy, aKhy);
streamRead(Bey, abey);
streamRead(Bhy, abhy);
streamRead(Cey, acey);
streamRead(Chy, achy);
streamRead(psiEzx, apsi);
streamRead(o_psiEzx, apsi);
streamRead(psiEzy, apsi);
streamRead(o_psiEzy, apsi);
streamRead(psiHxy, apsi);
streamRead(o_psiHxy, apsi);
streamRead(psiHyx, apsi);
streamRead(o_psiHyx, apsi);
// Do the requested number of iterations
pFile2 = fopen("Port.txt","wt");
for(i=0; i<N; i++){
// We use to 2 sets of field streams to have a "New" and "Old" set
if(i%2==0) {
process_field_H(Hx, Chxh, Chxe, Hy, Chyh, Chye, Ez, Khx, Khy, Bhx, Bhy,
Chx, Chy, psiHxy, psiHyx, dx, dy, it, o_Hx, o_Hy, o_psiHxy, o_psiHyx);
process_field_Ez(Ez, Ceze, Cezh, o_Hx, o_Hy, Cs, Kex, Key, Bex, Bey,
Cex, Cey, psiEzx, psiEzy, dx, dy, gauss[i], it, o_Ez, o_psiEzx, o_psiEzy);
// Save a single Ez point for use later
// Ez point is here is 10 cells from center
Copy(o_Ez.domain( int2( (xsize/2)-10 , ysize/2 ), int2( (xsize/2)-10 , ysize/2 ) ), Obs);
streamWrite(Obs,outputPort);
fprintf (pFile2,"%e\n",outputPort[0]);
} else {
process_field_H(o_Hx, Chxh, Chxe, o_Hy, Chyh, Chye, o_Ez, Khx, Khy, Bhx,
Bhy, Chx, Chy, o_psiHxy, o_psiHyx, dx, dy, it, Hx, Hy, psiHxy, psiHyx);
process_field_Ez(o_Ez, Ceze, Cezh, Hx, Hy, Cs, Kex, Key, Bex, Bey,
Cex, Cey, o_psiEzx, o_psiEzy, dx, dy, gauss[i], it, Ez, psiEzx, psiEzy);
// Save a single Ez point for use later
// Ez point is here is 10 cells from center
Copy(Ez.domain( int2( (xsize/2)-10 , ysize/2 ), int2( (xsize/2)-10 , ysize/2 ) ), Obs);
streamWrite(Obs,outputPort);
fprintf (pFile2,"%e\n",outputPort[0]);
}
fprintf (pFile,"%e %e\n",gauss[i],t[i]);
}
// Copy the proper streams back into C arrays so we can output them
if(i%2==0) {
streamWrite(Ez, outputEz);
streamWrite(Hx, outputHx);
streamWrite(Hy, outputHy);
} else {
streamWrite(o_Ez, outputEz);
streamWrite(o_Hx, outputHx);
streamWrite(o_Hy, outputHy);
}
// Output the field states into a text file
fclose (pFile);
fclose (pFile2);
pFile = fopen ("Ezdata.txt","wt");
pFile2 = fopen ("Hxdata.txt","wt");
pFile3 = fopen ("Hydata.txt","wt");
for (j=0; j<ny; j++) {
for (i=0; i<nx; i++) {
fprintf (pFile,"%e ",outputEz[nx*j+i]);
fprintf (pFile2,"%e ",outputHx[nx*j+i]);
fprintf (pFile3,"%e ",outputHy[nx*j+i]);
}
fprintf (pFile,"\n");
fprintf (pFile2,"\n");
fprintf (pFile3,"\n");
}
fclose (pFile);
fclose (pFile2);
fclose (pFile3);
}
printf("Program complete\n");
return 0;
}
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -