In [1]:
import torch
from torch import nn

In [3]:
torch.cuda.device('cuda')

<torch.cuda.device at 0x232ef63e4e0>

In [2]:
torch.cuda.device_count()

1

In [6]:
def try_gpu(i=0):
    if torch.cuda.device_count()>=i+1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')

def try_all_gpus():
    devices = [
        torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())
    ]
    return devices if devices else [torch.device('cpu')]

try_gpu(), try_gpu(10), try_all_gpus()

(device(type='cuda', index=0),
 device(type='cpu'),
 [device(type='cuda', index=0)])

In [7]:
# 张量默认存储在CPU上
x = torch.tensor([1,2,3])
x.device

device(type='cpu')

In [8]:
# 将张量存在GPU上
X = torch.ones((2,3), device=try_gpu())
X

tensor([[1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')

In [9]:
Y = torch.ones((2,3))
Y

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [10]:
# 两个张量进行运算，需要在同一个设备上
# 其实，不同设备上的张量可以进行运算，
# 只不过需要来回来拷贝，而且Debug较难
# 因此框架干脆直接报错
X + Y

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!

In [12]:
# 将CPU上的Y拷贝到第一块GPU上
Z = Y.cuda(0)
Z.device

device(type='cuda', index=0)

In [13]:
Z + X

tensor([[2., 2., 2.],
        [2., 2., 2.]], device='cuda:0')

## 在GPU上创建网络

In [14]:
net = nn.Linear(3,1)
net = net.to(device=try_gpu())
net(X)

tensor([[0.7377],
        [0.7377]], device='cuda:0', grad_fn=<AddmmBackward>)

In [15]:
net.weight.data.device

device(type='cuda', index=0)