<a href="https://colab.research.google.com/github/veager/StudyNotes/blob/new/Codes/PyTorch-Tutorial/PyTorch-GPU%E5%8A%A0%E9%80%9F.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

PyTorch GPU 加速

参考资料

- 博客：Pytorch GPU 加速, [site](https://www.cnblogs.com/veager/articles/16298631.html)

- Github：Codes/PyTorch-Tutorial/PyTorch-GPU加速.ipynb

# 1 GPU 加速

In [None]:
# 查看 GPU 信息
!nvidia-smi

Thu May 26 04:50:16 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   70C    P8    11W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## 1.1 常用的 device 相关函数

In [None]:
import torch
# 如果GPU可用，则使用GPU；如果不可用，使用CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [None]:
use_gpu = torch.cuda.is_available()  # GPU 是否可用
print(torch.cuda.is_available())

print(torch.device('cpu'))      # 获取 CPU 设备
print(torch.device(0))          # 方法 1：获取 GPU 设备
print(torch.device('cuda'))     # 方法 2：获取 GPU 设备
print(torch.device('cuda:0'))   # 方法 3：获取 GPU 设备

gpu_device = torch.device('cuda')    
print(gpu_device, gpu_device.type)  # 参考 GPU 设备类型
    
if use_gpu:
    print(torch.cuda.device_count())
    print(torch.cuda.current_device())
    print(torch.cuda.device(0))
    print(torch.cuda.get_device_name(0))

True
cpu
cuda:0
cuda
cuda:0
cuda cuda
1
0
<torch.cuda.device object at 0x7f115f9d7610>
Tesla T4


## 1.2 `Tensor` 数据与 device 相关的属性和方法

In [None]:
import torch

# 定义设备
gpu_device = torch.device("cuda")


# 方法 1：推荐使用，直接在 GPU 上创建
tensor_X1 = torch.tensor([1, 2], device = 0)
tensor_X2 = torch.tensor([1, 2], device = gpu_device)
tensor_X3 = torch.tensor([1, 2], device = "cuda:0")
print(tensor_X1.device, tensor_X2.device, tensor_X3.device)    
# Output:  cuda:0  cuda:0  cuda:0 


# 方法 2：先将 tensor 创建在CPU上，再复制到GPU上
tensor_X1 = torch.tensor([1, 2])       # 在 CPU 上创建 tensor
tensor_X2 = tensor_X1.to(device = gpu_device)
print(tensor_X1.device, tensor_X2.device)
# Output:  cpu  cuda:0
tensor_X3 = tensor_X1.cuda()           # 默认的 device 
tensor_X4 = tensor_X1.cuda(0)          # 其他的参数方式，可以得到相同的结果
tensor_X5 = tensor_X1.cuda(gpu_device) 
tensor_X6 = tensor_X1.cuda("cuda:0")
print(tensor_X3.device, tensor_X4.device, tensor_X5.device, tensor_X6.device)
# Output:  cuda:0  cuda:0  cuda:0  cuda:0

cuda:0 cuda:0 cuda:0
cpu cuda:0
cuda:0 cuda:0 cuda:0 cuda:0


将 GPU 上的 tensor 复制到 CPU 上

In [None]:
cpu_device = torch.device("cpu")

# 在 GPU 设备上创建 tensor
tensor_X1 = torch.tensor([1, 2], device=torch.device("cuda")) 

# 将 GPU 上的 tensor 数据复制到 CPU 上
tensor_X2 = tensor_X1.to(cpu_device)
tensor_X3 = tensor_X1.to("cpu")
tensor_X4 = tensor_X1.cpu()

print(tensor_X1.device, tensor_X2.device, tensor_X3.device, tensor_X4.device)    

cuda:0 cpu cpu cpu


## 1.3 使用 GPU 训练

## 1.4 使用 `Dataset()` 类和 `DataLoader()` 类加载数据

In [None]:
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.data.dataloader import default_collate

mydataset = TensorDataset(torch.ones((10,2)))

dataloader = DataLoader(mydataset,
    collate_fn = lambda x: tuple(x_.to(gpu_device) for x_ in default_collate(x)))  # 将加载的数据置于 GPU 上

for i, (x, ) in enumerate(dataloader):
    print(i, x, x.device)

0 tensor([[1., 1.]], device='cuda:0') cuda:0
1 tensor([[1., 1.]], device='cuda:0') cuda:0
2 tensor([[1., 1.]], device='cuda:0') cuda:0
3 tensor([[1., 1.]], device='cuda:0') cuda:0
4 tensor([[1., 1.]], device='cuda:0') cuda:0
5 tensor([[1., 1.]], device='cuda:0') cuda:0
6 tensor([[1., 1.]], device='cuda:0') cuda:0
7 tensor([[1., 1.]], device='cuda:0') cuda:0
8 tensor([[1., 1.]], device='cuda:0') cuda:0
9 tensor([[1., 1.]], device='cuda:0') cuda:0


# 2 Colab TPU 加速