# Benchmarking CPU vs. GPU

In [38]:
import numpy as np
import matplotlib.pyplot as plt
from time import time
import torch
import pandas

In [39]:
# Create some functions to test, they will be run on the CPU and GPU
# They should be vectorized to take advantage of the GPU

def scalar_multiplication(x, y):
    return x * y

def matrix_multiplication(x, y):
    return x @ y

### Scalar Multiplication

In [40]:
# Make numpy vectors
x, y = np.random.rand(int(1e8)), np.random.rand(int(1e8))

In [41]:
tic = time()
output = scalar_multiplication(x, y)
toc = time()

print(f"Time taken: {toc - tic}")

Time taken: 0.3863370418548584


In [42]:
# Make numpy vectors
x, y = torch.rand(int(1e8)), torch.rand(int(1e8))

In [43]:
tic = time()
output = scalar_multiplication(x, y)
toc = time()

print(f"Time taken: {toc - tic}")

Time taken: 0.16155266761779785


In [44]:
# Make numpy vectors
x, y = torch.rand(int(1e8)).cuda(), torch.rand(int(1e8)).cuda()

Naive timing

In [45]:
tic = time()
output = scalar_multiplication(x, y)
toc = time()

print(f"Time taken: {toc - tic}")

Time taken: 0.0379030704498291


Proper synchronized timing

In [46]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

start.record()
output = scalar_multiplication(x, y)
end.record()

torch.cuda.synchronize()

print(f"Time taken: {start.elapsed_time(end)}")

Time taken: 2.328864097595215


## Matrix Multiplication

In [47]:
# Make numpy vectors
x, y = np.random.rand(int(1e4), int(1e4)), np.random.rand(int(1e4), int(1e4))

In [48]:
tic = time()
output = matrix_multiplication(x, y)
toc = time()

print(f"Time taken: {toc - tic}")

Time taken: 20.937828302383423


In [49]:
# Make numpy vectors
x, y = torch.rand(int(1e4), int(1e4)), torch.rand(int(1e4), int(1e4))

In [50]:
tic = time()
output = matrix_multiplication(x, y)
toc = time()

print(f"Time taken: {toc - tic}")

Time taken: 11.135852336883545


In [51]:
# Make numpy vectors
x, y = torch.rand(int(1e4)).cuda(), torch.rand(int(1e4)).cuda()

In [56]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

start.record()
output = matrix_multiplication(x, y)
end.record()

torch.cuda.synchronize()

print(f"Time taken: {start.elapsed_time(end)}")

Time taken: 0.8213120102882385
