?? cuda_matrix.cu
字號:
/*
今天CUDA技術群里meteor兄提了個問題如下
引用
x x x x y y y y
x x x x y y y y
x x x x y y y y
x x x x y y y y
z z z z a a a a
z z z z a a a a
z z z z a a a a
z z z z a a a a
比如這個矩陣
我想對x那些數加一,對y那些數加二
對z那些書加三,對a那些數加四
介于meteor兄是新手,本著互相學習,下面是我寫的一段很很簡單的程序,完成上述操作,希望對meteor兄有所幫助
*/
[quote]
C/C++ code
#include <stdio.h>
__global__ void testkernel(int *d_A, size_t size)
{
int dx = blockDim.x * blockIdx.x + threadIdx.x;
int dy = blockDim.y * blockIdx.y + threadIdx.y;
if( blockIdx.x == 0 && blockIdx.y == 0 )
d_A[dx*size+dy] += 1;
if( blockIdx.x == 0 && blockIdx.y == 1 )
d_A[dx*size+dy] += 2;
if( blockIdx.x == 1 && blockIdx.y == 0 )
d_A[dx*size+dy] += 3;
if( blockIdx.x == 1 && blockIdx.y == 1 )
d_A[dx*size+dy] += 4;
}
int main( int argc, char** argv)
{
int h_A[8][8] = {{1,1,1,1,2,2,2,2},
{1,1,1,1,2,2,2,2},
{1,1,1,1,2,2,2,2},
{1,1,1,1,2,2,2,2},
{3,3,3,3,4,4,4,4},
{3,3,3,3,4,4,4,4},
{3,3,3,3,4,4,4,4},
{3,3,3,3,4,4,4,4}};
int *d_A, *h_B;
size_t size = 8 * 8 * sizeof(int);
size_t rsize = 8;
dim3 dimgrid(2,2);
dim3 dimblock(4,4);
h_B = (int*)malloc(size);
cudaMalloc( (void **) &d_A, size );
cudaMemcpy( d_A, h_A, size, cudaMemcpyHostToDevice );
testkernel<<<dimgrid,dimblock>>>(d_A,rsize);
cudaMemcpy( h_B, d_A, size, cudaMemcpyDeviceToHost );
for(int i = 0; i < 8; i++)
{
for(int j = 0;j < 8; j++)
printf("%2d ",h_B[i*rsize+j]);
printf("\n");
}
cudaFree(d_A);
free(h_B);
}
[/quote]
介于meteor兄不理解blockDim.x和threadIdx.x,下面借上面這個例子解釋,具體的請參見Programme Guide
blockDim就是指block的維度,這里每個block是4*4的,所以blockDim.x=4 blockDim.y = 4
threadIdx就是指block里的線程的索引號,這里每block是4*4維的,每個block里有16個thread,每個thread的threadIdx.x從0到3,threadIdx.y從0到3,和數組一樣,這樣解釋行嗎?
以上程序測試通過。。。。
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -