In this post, we will see CUDA Matrix Addition | CUDA Program for Matrices Addition | CUDA Programming | cuda matrix addition,cuda programming,cuda programming tutorial,cuda programming c++,cuda programming model,cuda programming tutorial for beginners,cuda programming for beginners,cuda programming nvidia,cuda programming linux
Watch following Video:
Watch on YouTube: https://www.youtube.com/watch?v=jTAxCbcxwJA
Here, two cases are considered.
1. Two dimensional blocks and one thread per block.
2. One block and two dimensional threads in that block.
1. Two dimesional blocks and one thread per block
Watch following Video:
Watch on YouTube: https://www.youtube.com/watch?v=jTAxCbcxwJA
Here, two cases are considered.
1. Two dimensional blocks and one thread per block.
2. One block and two dimensional threads in that block.
1. Two dimesional blocks and one thread per block
#include<stdio.h>
#include<cuda.h>
__global__ void matadd(int *l,int *m, int *n)
{
int x=blockIdx.x;
int y=blockIdx.y;
int id=gridDim.x * y +x;
n[id]=l[id]+m[id];
}
int main()
{
int a[2][3];
int b[2][3];
int c[2][3];
int *d,*e,*f;
int i,j;
printf("\n Enter elements of first matrix of size 2 * 3\n");
for(i=0;i<2;i++)
{
for(j=0;j<3;j++)
{
scanf("%d",&a[i][j]);
}
}
printf("\n Enter elements of second matrix of size 2 * 3\n");
for(i=0;i<2;i++)
{
for(j=0;j<3;j++)
{
scanf("%d",&b[i][j]);
}
}
cudaMalloc((void **)&d,2*3*sizeof(int));
cudaMalloc((void **)&e,2*3*sizeof(int));
cudaMalloc((void **)&f,2*3*sizeof(int));
cudaMemcpy(d,a,2*3*sizeof(int),cudaMemcpyHostToDevice);
cudaMemcpy(e,b,2*3*sizeof(int),cudaMemcpyHostToDevice);
dim3 grid(3,2);
/* Here we are defining two dimensional Grid(collection of blocks) structure. Syntax is dim3 grid(no. of columns,no. of rows) */
matadd<<<grid,1>>>(d,e,f);
cudaMemcpy(c,f,2*3*sizeof(int),cudaMemcpyDeviceToHost);
printf("\nSum of two matrices:\n ");
for(i=0;i<2;i++)
{
for(j=0;j<3;j++)
{
printf("%d\t",c[i][j]);
}
printf("\n");
}
cudaFree(d);
cudaFree(e);
cudaFree(f);
return 0;
}
#include<cuda.h>
__global__ void matadd(int *l,int *m, int *n)
{
int x=blockIdx.x;
int y=blockIdx.y;
int id=gridDim.x * y +x;
n[id]=l[id]+m[id];
}
int main()
{
int a[2][3];
int b[2][3];
int c[2][3];
int *d,*e,*f;
int i,j;
printf("\n Enter elements of first matrix of size 2 * 3\n");
for(i=0;i<2;i++)
{
for(j=0;j<3;j++)
{
scanf("%d",&a[i][j]);
}
}
printf("\n Enter elements of second matrix of size 2 * 3\n");
for(i=0;i<2;i++)
{
for(j=0;j<3;j++)
{
scanf("%d",&b[i][j]);
}
}
cudaMalloc((void **)&d,2*3*sizeof(int));
cudaMalloc((void **)&e,2*3*sizeof(int));
cudaMalloc((void **)&f,2*3*sizeof(int));
cudaMemcpy(d,a,2*3*sizeof(int),cudaMemcpyHostToDevice);
cudaMemcpy(e,b,2*3*sizeof(int),cudaMemcpyHostToDevice);
dim3 grid(3,2);
/* Here we are defining two dimensional Grid(collection of blocks) structure. Syntax is dim3 grid(no. of columns,no. of rows) */
matadd<<<grid,1>>>(d,e,f);
cudaMemcpy(c,f,2*3*sizeof(int),cudaMemcpyDeviceToHost);
printf("\nSum of two matrices:\n ");
for(i=0;i<2;i++)
{
for(j=0;j<3;j++)
{
printf("%d\t",c[i][j]);
}
printf("\n");
}
cudaFree(d);
cudaFree(e);
cudaFree(f);
return 0;
}
Output:
Enter elements of first matrix of size 2 * 3
1 2 3 4 5 6
Enter elements of second matrix of size 2 * 3
2 3 4 5 6 7
Sum of two matrices:
3 5 7
9 11 13
1 2 3 4 5 6
Enter elements of second matrix of size 2 * 3
2 3 4 5 6 7
Sum of two matrices:
3 5 7
9 11 13
#include<stdio.h>
#include<cuda.h>
__global__ void matadd(int *l,int *m, int *n)
{
int x=threadIdx.x;
int y=threadIdx.y;
int id=blockDim.x * y +x;
n[id]=l[id]+m[id];
}
int main()
{
int a[2][3];
int b[2][3];
int c[2][3];
int *d,*e,*f;
int i,j;
printf("\n Enter elements of first matrix of size 2 * 3\n");
for(i=0;i<2;i++)
{
for(j=0;j<3;j++)
{
scanf("%d",&a[i][j]);
}
}
printf("\n Enter elements of second matrix of size 2 * 3\n");
for(i=0;i<2;i++)
{
for(j=0;j<3;j++)
{
scanf("%d",&b[i][j]);
}
}
cudaMalloc((void **)&d,2*3*sizeof(int));
cudaMalloc((void **)&e,2*3*sizeof(int));
cudaMalloc((void **)&f,2*3*sizeof(int));
cudaMemcpy(d,a,2*3*sizeof(int),cudaMemcpyHostToDevice);
cudaMemcpy(e,b,2*3*sizeof(int),cudaMemcpyHostToDevice);
dim3 threadBlock(3,2);
/* Here we are defining two dimensional Block(collection of threads) structure. Syntax is dim3 threadBlock(no. of columns,no. of rows) */
matadd<<<1,threadBlock>>>(d,e,f);
cudaMemcpy(c,f,2*3*sizeof(int),cudaMemcpyDeviceToHost);
printf("\nSum of two matrices:\n ");
for(i=0;i<2;i++)
{
for(j=0;j<3;j++)
{
printf("%d\t",c[i][j]);
}
printf("\n");
}
cudaFree(d);
cudaFree(e);
cudaFree(f);
return 0;
}
#include<cuda.h>
__global__ void matadd(int *l,int *m, int *n)
{
int x=threadIdx.x;
int y=threadIdx.y;
int id=blockDim.x * y +x;
n[id]=l[id]+m[id];
}
int main()
{
int a[2][3];
int b[2][3];
int c[2][3];
int *d,*e,*f;
int i,j;
printf("\n Enter elements of first matrix of size 2 * 3\n");
for(i=0;i<2;i++)
{
for(j=0;j<3;j++)
{
scanf("%d",&a[i][j]);
}
}
printf("\n Enter elements of second matrix of size 2 * 3\n");
for(i=0;i<2;i++)
{
for(j=0;j<3;j++)
{
scanf("%d",&b[i][j]);
}
}
cudaMalloc((void **)&d,2*3*sizeof(int));
cudaMalloc((void **)&e,2*3*sizeof(int));
cudaMalloc((void **)&f,2*3*sizeof(int));
cudaMemcpy(d,a,2*3*sizeof(int),cudaMemcpyHostToDevice);
cudaMemcpy(e,b,2*3*sizeof(int),cudaMemcpyHostToDevice);
dim3 threadBlock(3,2);
/* Here we are defining two dimensional Block(collection of threads) structure. Syntax is dim3 threadBlock(no. of columns,no. of rows) */
matadd<<<1,threadBlock>>>(d,e,f);
cudaMemcpy(c,f,2*3*sizeof(int),cudaMemcpyDeviceToHost);
printf("\nSum of two matrices:\n ");
for(i=0;i<2;i++)
{
for(j=0;j<3;j++)
{
printf("%d\t",c[i][j]);
}
printf("\n");
}
cudaFree(d);
cudaFree(e);
cudaFree(f);
return 0;
}
Output:
Enter elements of first matrix of size 2 * 3
1 2 3 4 5 6
Enter elements of second matrix of size 2 * 3
2 3 4 5 6 7
Sum of two matrices:
3 5 7
9 11 13
1 2 3 4 5 6
Enter elements of second matrix of size 2 * 3
2 3 4 5 6 7
Sum of two matrices:
3 5 7
9 11 13
Nice Post for beginners!
ReplyDeleteTake a look at this another good article.
http://www.cstechera.in/2015/07/vector-operations-in-cuda.html