Permalink
Browse files

lab8

  • Loading branch information...
1 parent 51fafe0 commit 3d14334fb40c283e387bdb357887c90c3cc3da70 @yurigorokhov committed Apr 17, 2012
Showing with 105 additions and 0 deletions.
  1. +13 −0 src/lab8/Makefile
  2. +53 −0 src/lab8/lab8.cu
  3. +39 −0 src/lab8/run_all.sh
View
@@ -0,0 +1,13 @@
+CC=nvcc
+CFLAGS=-c -g
+LDFLAGS=-L/usr/lib/nvidia-current
+
+lab8: lab8.o
+ $(CC) $(LDFLAGS) lab8.o -o lab8.exe
+
+lab8.o: lab8.cu
+ $(CC) $(CFLAGS) lab8.cu
+
+clean:
+ rm -rf lab8 lab8.o *.exe
+
View
@@ -0,0 +1,53 @@
+/**
+ * Yuri Gorokhov
+ * lab 8 - grid configurations continued
+ */
+
+#include <stdio.h>
+#include <cuda.h>
+#include <math.h>
+
+#include "../include/cuda_util.h"
+
+#define PROX 48
+#define SHARED_MEM_PER_BLOCK 4000
+#define THREADS 2048
+
+#ifndef GRID_Y
+#define GRID_Y 4
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE 3 * SHARED_MEM_PER_BLOCK / 4
+#endif
+
+__global__ void sum_kernel();
+
+int main(void) {
+ cudaEvent_t start, stop;
+ float elapsedTime;
+
+ cudaEventCreate(&start);
+ cudaEventCreate(&stop);
+
+ cudaEventRecord(start,0);
+
+ dim3 grid(1,GRID_Y);
+ sum_kernel<<<grid, THREADS / GRID_Y>>>();
+ cudaEventRecord(stop, 0);
+ cudaEventSynchronize(stop);
+ cudaEventElapsedTime(&elapsedTime, start, stop);
+ printf("\nProcessors: %i\nShared mem per block: %i", PROX, SHARED_MEM_PER_BLOCK);
+ printf("\nGrid: 1x%i array of blocks, %i threads per block, S=%i -> %f\n", GRID_Y, THREADS / GRID_Y, ARRAY_SIZE, elapsedTime);
+}
+
+__global__ void sum_kernel() {
+ __shared__ int filler[ARRAY_SIZE];
+ filler[threadIdx.x % 16] = 0;
+ int result = 0;
+ for(int i = 1; i <= 1000; i++) {
+ result += i;
+ }
+ __syncthreads();
+}
+
View
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+# Program 1
+nvcc lab8.cu -o lab8-1.exe -L/usr/lib/nvidia-current -DGRID_Y=4 -DARRAY_SIZE=3*SHARED_MEM_PER_BLOCK/4
+
+# Program 2
+nvcc lab8.cu -o lab8-2.exe -L/usr/lib/nvidia-current -DGRID_Y=PROX -DARRAY_SIZE=3*SHARED_MEM_PER_BLOCK/4
+
+# Program 3
+nvcc lab8.cu -o lab8-3.exe -L/usr/lib/nvidia-current -DGRID_Y=2*PROX -DARRAY_SIZE=3*SHARED_MEM_PER_BLOCK/4
+
+# Program 4
+nvcc lab8.cu -o lab8-4.exe -L/usr/lib/nvidia-current -DGRID_Y=4 -DARRAY_SIZE=SHARED_MEM_PER_BLOCK/2
+
+# Program 5
+nvcc lab8.cu -o lab8-5.exe -L/usr/lib/nvidia-current -DGRID_Y=PROX -DARRAY_SIZE=SHARED_MEM_PER_BLOCK/2
+
+# Program 6
+nvcc lab8.cu -o lab8-6.exe -L/usr/lib/nvidia-current -DGRID_Y=2*PROX -DARRAY_SIZE=SHARED_MEM_PER_BLOCK/2
+
+# Program 7
+nvcc lab8.cu -o lab8-7.exe -L/usr/lib/nvidia-current -DGRID_Y=4 -DARRAY_SIZE=SHARED_MEM_PER_BLOCK/4
+
+# Program 8
+nvcc lab8.cu -o lab8-8.exe -L/usr/lib/nvidia-current -DGRID_Y=PROX -DARRAY_SIZE=SHARED_MEM_PER_BLOCK/4
+
+# Program 9
+nvcc lab8.cu -o lab8-9.exe -L/usr/lib/nvidia-current -DGRID_Y=2*PROX -DARRAY_SIZE=SHARED_MEM_PER_BLOCK/4
+
+./lab8-1.exe
+./lab8-2.exe
+./lab8-3.exe
+./lab8-4.exe
+./lab8-5.exe
+./lab8-6.exe
+./lab8-7.exe
+./lab8-8.exe
+./lab8-9.exe
+

0 comments on commit 3d14334

Please sign in to comment.