In [0]:
import numpy as np
import cupy as cp
import time

In [0]:
def matprint(mat, fmt="g"):
    col_maxes = [max([len(("{:"+fmt+"}").format(x)) for x in col]) for col in mat.T]
    for x in mat:
        for i, y in enumerate(x):
            print(("{:"+str(col_maxes[i])+fmt+"}").format(y), end="  ")
        print("")
    print()

In [3]:
m = np.ones((3, 3))
v = np.ones((3, 1))
p = np.dot(m,v)
matprint(m)
matprint(v)
matprint(p)



1  1  1  
1  1  1  
1  1  1  

1  
1  
1  

3  
3  
3  



In [22]:
N = 50000000
# Vector + Vector
############CPU###############

s = time.time()

a_h = np.ones((N))
b_h = np.ones((N))
c_h = np.ones((N))

c_h = a_h + b_h

e = time.time()
cpu_time = (e - s) * 1000.0
print(f"CPU Time: {cpu_time} msec")

############GPU###############
stream = cp.cuda.Stream.null
start = stream.record()

a_d = cp.ones((N))
b_d = cp.ones((N))
c_d = cp.ones((N))

c_d = a_d + b_d

end = stream.record()
end.synchronize()
gpu_time = cp.cuda.get_elapsed_time(start, end)
print(f"GPU Time: {gpu_time} msec")

############Speedup###############
print(f"Speedup = {cpu_time/gpu_time}")
# print(c_d[:10])

CPU Time: 390.06853103637695 msec
GPU Time: 17.771711349487305 msec
Speedup = 21.948844619718066


In [5]:
N = 10000000
# Vector * Scalar (Size: 10m)
############CPU###############
s = time.time()

v_cpu = np.ones((N))
p_cpu = v_cpu * 5

e = time.time()
cpu_time = (e - s) * 1000.0
print(f"CPU Time: {cpu_time} msec")

############GPU###############
stream = cp.cuda.Stream.null
start = stream.record()

v_gpu = cp.ones((N))
p_gpu = v_gpu * 5

end = stream.record()
end.synchronize()
gpu_time = cp.cuda.get_elapsed_time(start, end)
print(f"GPU Time: {gpu_time} msec")

############Speedup###############
print(f"Speedup = {cpu_time/gpu_time}")

CPU Time: 59.157609939575195 msec
GPU Time: 218.20953369140625 msec
Speedup = 0.27110460729564817


In [6]:
N = 20000
# Matrix * Vector
############CPU###############
s = time.time()

m_cpu = np.ones((N, N))
v_cpu = np.ones((N, 1))
p_cpu = np.dot(m_cpu, v_cpu)

e = time.time()
cpu_time = (e - s) * 1000.0
print(f"CPU Time: {cpu_time} msec")

############GPU###############
stream = cp.cuda.Stream.null
start = stream.record()

m_gpu = cp.ones((N, N))
v_gpu = cp.ones((N, 1))
p_gpu = cp.dot(m_gpu, v_gpu)

end = stream.record()
end.synchronize()
gpu_time = cp.cuda.get_elapsed_time(start, end)
print(f"GPU Time: {gpu_time} msec")

############Speedup###############
print(f"Speedup = {cpu_time/gpu_time}")

CPU Time: 1054.3713569641113 msec
GPU Time: 790.5429077148438 msec
Speedup = 1.3337307142655854


In [7]:
N = 5000
# Matrix * Matrix
############CPU###############
s = time.time()

m1_cpu = np.ones((N, N))
m2_cpu = np.ones((N, N))
p_cpu = np.dot(m1_cpu, m2_cpu)

e = time.time()
cpu_time = (e - s) * 1000.0
print(f"CPU Time: {cpu_time} msec")

############GPU###############
stream = cp.cuda.Stream.null
start = stream.record()

x_gpu = cp.ones((N, N))
v_gpu = cp.ones((N, N))
p_gpu = cp.dot(x_gpu, v_gpu)

end = stream.record()
end.synchronize()
gpu_time = cp.cuda.get_elapsed_time(start, end)
print(f"GPU Time: {gpu_time} msec")

############Speedup#############
print(f"Speedup = {cpu_time/gpu_time}")

CPU Time: 3770.7877159118652 msec
GPU Time: 291.92529296875 msec
Speedup = 12.916961314193218
