确定2Dkernel 的thread 全局ID
假如我configure 了一个kernel:
1 2 3 4
| int row; int col; dim3 block(12, 12); dim3 grid((row + block.x - 1) / block.x, (col + block.y - 1) / block.y);
|
那么在__globla__
中的全局thread ID 用如下方法确定:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
| __global__ void func(struct Points* dev_a, struct Points* dev_b, struct Points p1, struct Points p2, float* dev_c, const int row, const int col){
int ix = blockIdx.x * blockDim.x + threadIdx.x; int iy = blockIdx.y * blockDim.y + threadIdx.y; int tid = ix * col + iy;
if (ix < row && iy < col){ dev_b[tid].x = dev_a[tid].x + p1.x; dev_b[tid].y = dev_a[tid].y + p1.y;
Line(dev_b[tid], p1, p2); getValue(dev_b[tid], dev_c[tid]); } }
|
其中int tid = ix * col + iy;
用x和y两个方向的分量来确定threads的全局ID。
同理,在求矩阵转置时的kernel是如下实现的:
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| __global__ void transpose(int *m, int *mt){
int idx = blockIdx.x*blockDim.x + threadIdx.x; int idy = blockIdx.y*blockDim.y + threadIdx.y;
int tidM, tidT;
if (idx < N && idy <N){ tidM = idx * N + idy; tidT = idy * N + idx;
mt[tidT] = m[tidM]; } }
|
其中tidM = idx * N + idy;
为原矩阵的thread ID。tidT = idy * N + idx;
是转置后的矩阵thread ID。