<h3>5.7 GPU加速</h3>

5.7.1 单GPU加速

In [1]:
import torch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# for batch_idx, (img, label) in enumerate(train_loader):
#     img=img.to(device)
#     label=label.to(device)


In [2]:
# 实例化网络
# model = Net()
# model.to(device)   #使用序号为0的GPU
# 或model.to(device1) #使用序号为1的GPU

5.7.2 多GPU加速

In [3]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
import numpy  as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

boston = load_boston()
X,y = (boston.data, boston.target)
dim = X.shape[1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
#组合训练数据及标签
myset = list(zip(X_train,y_train))


    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np


        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_h

In [4]:
#对训练数据进行标准化
mean=X_train.mean(axis=0)
std=X_train.std(axis=0)
X_train-=mean
X_train/=std

X_test-=mean
X_test/=std

In [5]:
#实例化模型
device_ids =[0,1,2,3]
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dtype = torch.FloatTensor
#train_data=nn.DataParallel(torch.from_numpy(X_train).type(dtype))
train_data=torch.from_numpy(X_train).type(dtype).to(device)
train_target=torch.from_numpy(y_train).type(dtype).to(device)
test_data=torch.from_numpy(X_test).type(dtype).to(device)
test_target=torch.from_numpy(y_test).type(dtype).to(device)

In [6]:
from torch.utils import data
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dtype = torch.FloatTensor
train_loader = data.DataLoader(myset,batch_size=128,shuffle=True)

构建网络

In [7]:
class Net1(nn.Module):
    """
    使用sequential构建网络，Sequential()函数的功能是将网络的层组合到一起
    """
    def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
        super(Net1, self).__init__()
        self.layer1 = torch.nn.Sequential(nn.Linear(in_dim, n_hidden_1))
        self.layer2 = torch.nn.Sequential(nn.Linear(n_hidden_1, n_hidden_2))
        self.layer3 = torch.nn.Sequential(nn.Linear(n_hidden_2, out_dim))

    def forward(self, x):
        x1 = F.relu(self.layer1(x))
        x1 = F.relu(self.layer2(x1))
        x2 = self.layer3(x1)
        #显示每个GPU分配的数据大小
        print("\tIn Model: input size", x.size(),"output size", x2.size())
        return x2

In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#实例化网络
model = Net1(13, 16, 32, 1)
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs")
    # dim = 0 [64, xxx] -> [32, ...], [32, ...] on 2GPUs
    model = nn.DataParallel(model)

model.to(device)


Net1(
  (layer1): Sequential(
    (0): Linear(in_features=13, out_features=16, bias=True)
  )
  (layer2): Sequential(
    (0): Linear(in_features=16, out_features=32, bias=True)
  )
  (layer3): Sequential(
    (0): Linear(in_features=32, out_features=1, bias=True)
  )
)

In [9]:
optimizer_orig = torch.optim.Adam(model.parameters(), lr=0.01)
loss_func = torch.nn.MSELoss()

模型训练，并可视化损失值

In [10]:
from tensorboardX import SummaryWriter
writer = SummaryWriter(log_dir='logs')
for epoch in range(100):        
    model.train()
    for data,label in train_loader:
        input = data.type(dtype).to(device)
        label = label.type(dtype).to(device)
        output = model(input)       
        loss = loss_func(output, label)
        # 反向传播
        optimizer_orig.zero_grad()
        loss.backward()
        optimizer_orig.step()
        print("Outside: input size", input.size() ,"output_size", output.size())
    writer.add_scalar('train_loss_paral',loss, epoch)


	In Model: input size torch.Size([128, 13]) output size torch.Size([128, 1])
Outside: input size torch.Size([128, 13]) output_size torch.Size([128, 1])
	In Model: input size torch.Size([128, 13]) output size torch.Size([128, 1])
Outside: input size torch.Size([128, 13]) output_size torch.Size([128, 1])
	In Model: input size torch.Size([128, 13]) output size torch.Size([128, 1])
Outside: input size torch.Size([128, 13]) output_size torch.Size([128, 1])
	In Model: input size torch.Size([20, 13]) output size torch.Size([20, 1])
Outside: input size torch.Size([20, 13]) output_size torch.Size([20, 1])
	In Model: input size torch.Size([128, 13]) output size torch.Size([128, 1])
Outside: input size torch.Size([128, 13]) output_size torch.Size([128, 1])
	In Model: input size torch.Size([128, 13]) output size torch.Size([128, 1])
Outside: input size torch.Size([128, 13]) output_size torch.Size([128, 1])
	In Model: input size torch.Size([128, 13]) output size torch.Size([128, 1])
Outside: input 

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
