资源介绍
几个cuda源代码示例
1.cuda矩阵例子.cu
2.实现矩阵的乘法.cu
3....
/*****************************************************************************************/
/核心代码部分,实现了矩阵的乘法运算/核心代码部分,实现了矩阵的乘法运算
/*****************************************************************************************/
/
// Matrix multiplication kernel –thread specification
__global__ void MatrixMulKernel(Matrix M, Matrix N, Matrix P)
{
// 2D Thread ID
int tx = threadIdx.x;
int ty = threadIdx.y;
// Pvalue is used to store the element of the matrix
// that is computed by the thread
float Pvalue = 0;
// 并行计算体现在次,多个thread并行进行
// 每个thread计算一行与一列的乘积
for (int k = 0; k < M.width; ++k)
{
float Melement = M.elements[ty * M.pitch + k];
float Nelement = Nd.elements[k * N.pitch + tx];
Pvalue += Melement * Nelement;
}
// Write the matrix to device memory;
// each thread writes one element
P.elements[ty * P.pitch + tx] = Pvalue;
}