In [1]:
!nvidia-smi

Sun Jan  9 04:34:55 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:1E.0 Off |                    0 |
| N/A   38C    P0    26W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
import torch
from torch import nn

In [3]:
torch.cuda.device_count()

1

In [4]:
def try_gpu(i=0):
    if torch.cuda.device_count() >= i+1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')

def try_all_gpus():
    devices = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())]
    return  devices if devices else [torch.device('cpu')]

In [5]:
try_gpu(), try_gpu(10), try_all_gpus()

(device(type='cuda', index=0),
 device(type='cpu'),
 [device(type='cuda', index=0)])

### Moving data between cpu and gpu

In [6]:
x = torch.tensor([1,2,3])
x.device

device(type='cpu')

In [7]:
X = torch.ones(2, 3, device=try_gpu())
X

tensor([[1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')

In [8]:
!nvidia-smi

Sun Jan  9 04:35:27 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:1E.0 Off |                    0 |
| N/A   39C    P0    26W /  70W |   1038MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [22]:
Y = torch.rand(2, 3)
Y

tensor([[0.3768, 0.8501, 0.3747],
        [0.7386, 0.4954, 0.6061]])

In [23]:
Y.device

device(type='cpu')

In [24]:
Z = Y.cuda(0)
Z

tensor([[0.3768, 0.8501, 0.3747],
        [0.7386, 0.4954, 0.6061]], device='cuda:0')

In [25]:
Z.device

device(type='cuda', index=0)

In [38]:
# Z is on cuda 0
Z.cuda(0) is Z

True

In [39]:
# be careful this
Y.cuda(0) is Z

False

### Train model on GPU

In [41]:
X

tensor([[1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')

In [42]:
model = nn.Sequential(nn.Linear(3,1))
model = model.to(device=try_gpu())

In [43]:
model(X)

tensor([[0.1924],
        [0.1924]], device='cuda:0', grad_fn=<AddmmBackward>)

In [44]:
model[0].weight.data.device

device(type='cuda', index=0)