In [7]:
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import csv
import numpy as np
import pandas as pd

### 优化
判断模型是否包含正确的结果
判断优化器(optimization)是否能找到正确结果

#### optimization



创建Dataset

In [8]:
class MyDataset(Dataset):
    def __init__(self,x,y=None) -> None:
        if y is None:
            self.y = y
        else:
            self.y = torch.FloatTensor(y)
        self.x = torch.FloatTensor(x)
        self.dim = self.x.size()[1]
    def __getitem__(self, idx):
        if self.y is None:
            return self.x[idx]
        else:
            return self.x[idx], self.y[idx]

    def __len__(self):
        return len(self.x)


创建Dataloader

In [9]:
# dataset = MyDataset('./covid.train.csv')
data =pd.read_csv('./covid.train.csv').values
data_test =pd.read_csv('./covid.test.csv').values
data_train,data_valid = random_split(data, [int(len(data)*0.8), len(data)-int(len(data)*0.8)], generator=torch.Generator().manual_seed(42)) # 随机分配
x_train, x_valid,x_test = np.array(data_train)[:,:-1],np.array(data_valid)[:,:-1],np.array(data_test) # 划分x和y
y_train, y_valid = np.array(data_train)[:,-1],np.array(data_valid)[:,-1]

dataset_train,dataset_valid,dataset_test = MyDataset(x_train,y_train), MyDataset(x_valid,y_valid), MyDataset(x_test)

# dataset_test = MyDataset('./covid.test.csv',mode='test')
data_loader_train = DataLoader(dataset_train,256,shuffle=True,drop_last=False,num_workers=0,pin_memory=True )
data_loader_valid = DataLoader(dataset_valid,256,shuffle=True, drop_last=False,num_workers=0,pin_memory=True )
data_loader_test = DataLoader(dataset_test,256,shuffle=True, drop_last=False,num_workers=0,pin_memory=True )

创建Neural Network

In [10]:
class MyModule(nn.Module):
    def __init__(self, dim) -> None:
        super(MyModule,self).__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 1)
        )
        self.loss = nn.MSELoss(reduction='mean')
    def forward(self, x):
        # print(1,x.size())
        x = self.net(x)
        # print(2,x.size())
        x = x.squeeze(1) # (B, 1) -> (B)
        return x
    def calc_loss(self, pre, tar): # 计算loss
        return self.loss(pre, tar)

创建model和Optimizer

In [11]:
import math
# model = MyModule(data_loader_test.dataset.dim).to('cuda')
model = MyModule(data_loader_test.dataset.dim)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5,momentum= 0.9)



训练

In [14]:
def train():
    best_loss = math.inf
    stop = 0
    for i in range(3000):
        model.train()
        losses = []
        for x,y in data_loader_train:
            optimizer.zero_grad()
            # x,y = x.to('cuda'),y.to('cuda')
            pre = model(x)
            loss = model.calc_loss(pre,y)
            # print(loss.detach().item())
            losses.append(loss.detach().item())
            loss.backward()
            optimizer.step()
        mean_loss_train = sum(losses)/len(losses)
        model.eval()
        losses = []
        # print('valid')
        for x,y in data_loader_valid:
            # x,y = x.to('cuda'),y.to('cuda')
            with torch.no_grad():
                pre = model(x) # 删除了数据集的索引就不报错了
                loss = model.calc_loss(pre,y)
                losses.append(loss.detach().item())
        mean_loss_valid = sum(losses)/len(losses)
        print(f' Train loss: {mean_loss_train:.4f}, Valid loss: {mean_loss_valid:.4f}')
        if mean_loss_valid <best_loss:
            best_loss = mean_loss_valid
            torch.save(model.state_dict(), 'model.ckpt') # 导出模型
            stop = 0
        else:
            stop+=1
        if stop>400:
            return
        
        

In [15]:
train()

 Train loss: 148.7262, Valid loss: 76.8678
 Train loss: 81.5647, Valid loss: 73.5081
 Train loss: 67.9641, Valid loss: 49.9745
 Train loss: 52.8826, Valid loss: 46.0838
 Train loss: 47.4801, Valid loss: 42.1899
 Train loss: 42.6923, Valid loss: 44.7932
 Train loss: 39.8269, Valid loss: 36.0895
 Train loss: 37.3231, Valid loss: 37.7718
 Train loss: 34.3001, Valid loss: 29.6582
 Train loss: 31.3001, Valid loss: 33.3422
 Train loss: 27.6055, Valid loss: 26.7052
 Train loss: 24.1515, Valid loss: 23.4886
 Train loss: 20.5608, Valid loss: 22.7611
 Train loss: 16.7945, Valid loss: 17.2526
 Train loss: 13.9721, Valid loss: 14.7282
 Train loss: 12.1991, Valid loss: 10.0148
 Train loss: 10.6915, Valid loss: 8.2423
 Train loss: 9.9530, Valid loss: 8.5539
 Train loss: 9.3381, Valid loss: 9.3192
 Train loss: 8.9960, Valid loss: 8.7788
 Train loss: 8.6306, Valid loss: 6.5263
 Train loss: 8.4146, Valid loss: 7.1716
 Train loss: 8.1939, Valid loss: 7.6247
 Train loss: 8.1386, Valid loss: 6.6109
 Train

In [18]:
def predict():
    model.load_state_dict(torch.load('model.ckpt')) # 加载本地模型
    model.eval()
    preds = []
    for i in data_loader_test:
        # i = i.to('cuda')
        with torch.no_grad():
            pred = model(i)
            preds.append(pred.detach().cpu())
    preds = torch.cat(preds,dim=0).numpy()
    print(preds)

In [19]:
predict()

[ 5.2590537 11.131734  33.010853  17.148214  20.878134   4.236643
  5.8868484  5.859329  14.62074   26.17326    8.878503  11.0845585
 12.60321   11.07266   24.362867  23.23411   18.346348  15.167547
 16.587662   3.8312    14.701917  18.433968  36.35982   25.545593
 18.416737  22.20093   17.76818   10.798759  17.670137   6.129708
 14.28406    9.586671  12.44957   11.936432  12.988687  14.743255
 23.16463   17.87775   20.43757    4.387268  10.276024  16.742222
  6.6830683  9.482897  38.604996   8.776642   5.4218087 23.288706
  2.8719277 10.66676    8.605672   8.030998   6.7649403 19.951702
 20.215288  34.60758   26.024807  12.93356   19.901543  19.85
 11.024064  17.040083  15.672958  17.229134   7.984905  19.609497
 16.896896   5.7404146  2.8768568 16.154026   9.342786  18.898943
 15.215969  16.25014   30.546555  20.925379  25.460207  27.628939
  9.142287  15.454375   6.960889  10.987697  23.196444   6.680127
 12.426985  26.287195  22.744328  11.26139    4.888595  20.43844
 18.359438  17