# **LAB 6 PPL**

## Add 2 vectors using N threads



In [29]:
%%writefile add.cu
#include<stdio.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"

__global__ void add(int *a, int *b,int *c, int n)
{
  int i=threadIdx.x;

  if(i<n)
    c[i]=a[i]+b[i];
}

int main()
{
    int n;
    printf("Enter a number : ");
    scanf("%d",&n);

    int arr1[n],arr2[n],res[n];
    printf("Enter %d elements of first array : ",n);
    for(int i=0;i<n;i++)
      scanf("%d",&arr1[i]);

    printf("Enter %d elements of second array : ",n);
    for(int i=0;i<n;i++)
      scanf("%d",&arr2[i]);

    int *d_A1,*d_A2,*d_res;

    cudaMalloc((void**)&d_A1,n*sizeof(int));
    cudaMalloc((void**)&d_A2,n*sizeof(int));
    cudaMalloc((void**)&d_res,n*sizeof(int));

    cudaMemcpy(d_A1,arr1,n*sizeof(int),cudaMemcpyHostToDevice);
    cudaMemcpy(d_A2,arr2,n*sizeof(int),cudaMemcpyHostToDevice);

    add<<<1,n>>>(d_A1,d_A2,d_res,n);

    cudaMemcpy(res,d_res,n*sizeof(int),cudaMemcpyDeviceToHost);
    printf("Result : ");
    for(int i=0;i<n;i++)
      printf("%d ",res[i]);

    cudaFree(d_A1);
    cudaFree(d_A2);
    cudaFree(d_res);

    return 0;
}

Overwriting add.cu


In [30]:
!nvcc add.cu -o add
!./add

Enter a number : 2
Enter 2 elements of first array : 1
2
Enter 2 elements of second array : 4
5
Result : 5 7 

## Add vector elements of size >256

In [4]:
%%writefile addBlocks.cu
#include<stdio.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"

__global__ void add(int *a, int *b,int *c, int n)
{
  int i=blockIdx.x*blockDim.x+threadIdx.x;

  if(i<n)
    c[i]=a[i]+b[i];
}

int main()
{
    int n;
    printf("Enter a number (>256) : ");
    scanf("%d",&n);

    int arr1[n],arr2[n],res[n];
    for(int i=0;i<n;i++)
    {
      arr1[i]=i+1;
      arr2[i]=i+10;
    }

    int *d_A1,*d_A2,*d_res;

    cudaMalloc((void**)&d_A1,n*sizeof(int));
    cudaMalloc((void**)&d_A2,n*sizeof(int));
    cudaMalloc((void**)&d_res,n*sizeof(int));

    cudaMemcpy(d_A1,arr1,n*sizeof(int),cudaMemcpyHostToDevice);
    cudaMemcpy(d_A2,arr2,n*sizeof(int),cudaMemcpyHostToDevice);

    add<<<ceil(n/256.0),256>>>(d_A1,d_A2,d_res,n);

    cudaMemcpy(res,d_res,n*sizeof(int),cudaMemcpyDeviceToHost);
    printf("Result : ");
    for(int i=0;i<n;i++)
      printf("%d ",res[i]);

    cudaFree(d_A1);
    cudaFree(d_A2);
    cudaFree(d_res);

    return 0;
}

Overwriting addBlocks.cu


In [5]:
!nvcc addBlocks.cu -o addB
!./addB

Enter a number (>256) : 200
Result : 11 13 15 17 19 21 23 25 27 29 31 33 35 37 39 41 43 45 47 49 51 53 55 57 59 61 63 65 67 69 71 73 75 77 79 81 83 85 87 89 91 93 95 97 99 101 103 105 107 109 111 113 115 117 119 121 123 125 127 129 131 133 135 137 139 141 143 145 147 149 151 153 155 157 159 161 163 165 167 169 171 173 175 177 179 181 183 185 187 189 191 193 195 197 199 201 203 205 207 209 211 213 215 217 219 221 223 225 227 229 231 233 235 237 239 241 243 245 247 249 251 253 255 257 259 261 263 265 267 269 271 273 275 277 279 281 283 285 287 289 291 293 295 297 299 301 303 305 307 309 311 313 315 317 319 321 323 325 327 329 331 333 335 337 339 341 343 345 347 349 351 353 355 357 359 361 363 365 367 369 371 373 375 377 379 381 383 385 387 389 391 393 395 397 399 401 403 405 407 409 

## 1D Convolution

In [12]:
%%writefile convolution.cu
#include<stdio.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"

__global__ void convolution(int *arr, int *mask, int *res, int m,int n)
{
  int i=threadIdx.x;

  int start=i-(m/2);
  int val=0;
  for(int j=0;j<m;j++)
  {
    if(start+j>=0 && start+j<n)
    {
      val+=arr[start+j]*mask[j];
    }
    res[i]=val;
  }

}

int main()
{
  int n,m;

  printf("Enter array size and convolution width(odd) : ");
  scanf("%d%d",&n,&m);

  int arr[n];
  int mask[m];
  int res[n];

  printf("Enter %d elements : ",n);
  for(int i=0;i<n;i++)
    scanf("%d",&arr[i]);

  printf("Enter %d mask elements : ",m);
  for(int i=0;i<m;i++)
    scanf("%d",&mask[i]);

    int *d_A,*d_M,*d_res;

    cudaMalloc((void**)&d_A,n*sizeof(int));
    cudaMalloc((void**)&d_M,m*sizeof(int));
    cudaMalloc((void**)&d_res,n*sizeof(int));

    cudaMemcpy(d_A,arr,n*sizeof(int),cudaMemcpyHostToDevice);
    cudaMemcpy(d_M,mask,m*sizeof(int),cudaMemcpyHostToDevice);

    convolution<<<1,n>>>(d_A,d_M,d_res,m,n);
    cudaMemcpy(res,d_res,n*sizeof(int),cudaMemcpyDeviceToHost);

    for(int i=0;i<n;i++)
      printf("%d ",res[i]);

  return 1;
}

Overwriting convolution.cu


In [13]:
!nvcc convolution.cu -o conv
!./conv

Enter array size and convolution width(odd) : 7
5
Enter 7 elements : 1
2
3
4
5
6
7
Enter 5 mask elements : 1
2
3
4
5
26 40 55 70 85 60 38 

## Sine function

In [22]:
%%writefile sine.cu
#include<stdio.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"

__global__ void calculate(float *arr, float *res)
{
  int i=threadIdx.x;

  res[i]=sin(arr[i]);
}

int main()
{
  int n;
  printf("Enter array size : ");
  scanf("%d",&n);

  float arr[n];
  float res[n];
  printf("Enter %d angles in radians : ",n);
  for(int i=0;i<n;i++)
    scanf("%f",&arr[i]);

  float *d_A,*d_res;

  cudaMalloc((void**)&d_A,n*sizeof(float));
  cudaMalloc((void**)&d_res,n*sizeof(float));

  cudaMemcpy(d_A,arr,n*sizeof(float),cudaMemcpyHostToDevice);

  calculate<<<1,n>>>(d_A,d_res);
  cudaMemcpy(res,d_res,n*sizeof(float),cudaMemcpyDeviceToHost);

  for(int i=0;i<n;i++)
    printf("Sine of %f = %f\n",arr[i],res[i]);
  return 1;
}

Overwriting sine.cu


In [23]:
!nvcc sine.cu -o sine
!./sine

Enter array size : 3
Enter 3 angles in radians : 1.57
3.14
4.71
Sine of 1.570000 = 1.000000
Sine of 3.140000 = 0.001593
Sine of 4.710000 = -0.999997
