2021 Takahiro Shinozaki @ Tokyo Tech

Quick introduction of pytorch GPU/CPU utilization

References: 
*   https://pytorch.org/tutorials/

In [None]:
import torch
import torch.nn as nn
import time

To use GPU, your computer have to have it.

If you are using Google colab, you can request to use a computer with a GPU from the top menu : Runtime -> Change Tuntime Type -> GPU

# Check if a GPU is available

In [None]:
if torch.cuda.is_available():
    print('CUDA（GPU）is available')
else:
    print('CUDA（GPU）is not available')

CUDA（GPU）is available


In [None]:
# Check the GPU status
! nvidia-smi

Wed Nov 17 04:11:57 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P8    29W / 149W |      3MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Switch a device of a tensor

In [None]:
x = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
print(x)
print(x.device)

x = torch.tensor([[1.0, 2.0], [3.0, 4.0]], device='cpu')
print(x)
print(x.device)

x = torch.tensor([[1.0, 2.0], [3.0, 4.0]], device='cuda')
print(x)
print(x.device)

y = x.to('cpu')
print(y)
print(y.device)

z = y.to('cuda')
print(z)
print(z.device)

tensor([[1., 2.],
        [3., 4.]])
cpu
tensor([[1., 2.],
        [3., 4.]])
cpu
tensor([[1., 2.],
        [3., 4.]], device='cuda:0')
cuda:0
tensor([[1., 2.],
        [3., 4.]])
cpu
tensor([[1., 2.],
        [3., 4.]], device='cuda:0')
cuda:0


Speed up by using GPU depends on the type and size of the computation.

Since using GPU involves some overhead, GPU is usually more advantageous when handling larger problems.

# Compare a computation speed of CPU/GPU

## Matrix product

In [None]:
dim=50
print('dim= ', dim)

xcpu = torch.rand(dim, dim, device='cpu')
start_time = time.time()
y = torch.mm(xcpu, xcpu)
elapsed_time = time.time() - start_time
print('elapsed_time (CPU) =', elapsed_time, 'sec')

xgpu = torch.rand(dim, dim, device='cuda')
start_time = time.time()
y = torch.mm(xgpu, xgpu)
elapsed_time = time.time() - start_time
print('elapsed_time (GPU) =', elapsed_time, 'sec')

dim=5000
print('dim= ', dim)

xcpu = torch.rand(dim, dim, device='cpu')
start_time = time.time()
y = torch.mm(xcpu, xcpu)
elapsed_time = time.time() - start_time
print('elapsed_time (CPU) =', elapsed_time, 'sec')

xgpu = torch.rand(dim, dim, device='cuda')
start_time = time.time()
y = torch.mm(xgpu, xgpu)
elapsed_time = time.time() - start_time
print('elapsed_time (GPU) =', elapsed_time, 'sec')

dim=  50
elapsed_time (CPU) = 0.0003695487976074219 sec
elapsed_time (GPU) = 0.0005223751068115234 sec
dim=  5000
elapsed_time (CPU) = 3.503896713256836 sec
elapsed_time (GPU) = 0.0044498443603515625 sec


## Matrix determinant

In [None]:
dim=50
print('dim= ', dim)

xcpu = torch.rand(dim, dim, device='cpu')
start_time = time.time()
y = torch.linalg.det(xcpu)
elapsed_time = time.time() - start_time
print('elapsed_time (CPU) =', elapsed_time, 'sec')

xgpu = torch.rand(dim, dim, device='cuda')
start_time = time.time()
y = torch.linalg.det(xgpu)
elapsed_time = time.time() - start_time
print('elapsed_time (GPU) =', elapsed_time, 'sec')

dim=5000
print('dim= ', dim)

xcpu = torch.rand(dim, dim, device='cpu')
start_time = time.time()
y = torch.linalg.det(xcpu)
elapsed_time = time.time() - start_time
print('elapsed_time (CPU) =', elapsed_time, 'sec')

xgpu = torch.rand(dim, dim, device='cuda')
start_time = time.time()
y = torch.linalg.det(xgpu)
elapsed_time = time.time() - start_time
print('elapsed_time (GPU) =', elapsed_time, 'sec')


dim=  50
elapsed_time (CPU) = 0.05368828773498535 sec
elapsed_time (GPU) = 0.07685184478759766 sec
dim=  5000
elapsed_time (CPU) = 1.45981764793396 sec
elapsed_time (GPU) = 0.3555586338043213 sec


## Eigenvalues

In [None]:
x = torch.tensor([[1.0,0.0],[0.0,3.0]])
e,v = torch.linalg.eig(x)
print(e)
print(v)

dim=50
print('dim= ', dim)

xcpu = torch.rand(dim, dim, device='cpu')
start_time = time.time()
e, v = torch.linalg.eig(xcpu)
elapsed_time = time.time() - start_time
print('elapsed_time (CPU) =', elapsed_time, 'sec')

xgpu = torch.rand(dim, dim, device='cuda')
start_time = time.time()
e, v = torch.linalg.eig(xgpu)
elapsed_time = time.time() - start_time
print('elapsed_time (GPU) =', elapsed_time, 'sec')

dim=5000
print('dim= ', dim)

xcpu = torch.rand(dim, dim, device='cpu')
start_time = time.time()
e, v = torch.linalg.eig(xcpu)
elapsed_time = time.time() - start_time
print('elapsed_time (CPU) =', elapsed_time, 'sec')

xgpu = torch.rand(dim, dim, device='cuda')
start_time = time.time()
e, v = torch.linalg.eig(xgpu)
elapsed_time = time.time() - start_time
print('elapsed_time (GPU) =', elapsed_time, 'sec')

tensor([1.+0.j, 3.+0.j])
tensor([[1.+0.j, 0.+0.j],
        [0.+0.j, 1.+0.j]])
dim=  50
elapsed_time (CPU) = 0.028529882431030273 sec
elapsed_time (GPU) = 0.002012014389038086 sec
dim=  5000
elapsed_time (CPU) = 43.454078912734985 sec
elapsed_time (GPU) = 23.43065309524536 sec
