In [281]:
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import csv
import numpy as np
import pandas as pd

创建Dataset

In [282]:
class MyDataset(Dataset):
    def __init__(self,x,y=None) -> None:
        if y is None:
            self.y = y
        else:
            self.y = torch.FloatTensor(y)
        self.x = torch.FloatTensor(x)
        self.dim = self.x.size()[1]
    def __getitem__(self, idx):
        if self.y is None:
            return self.x[idx]
        else:
            return self.x[idx], self.y[idx]

    def __len__(self):
        return len(self.x)


创建Dataloader

In [283]:
# dataset = MyDataset('./covid.train.csv')
data =pd.read_csv('./covid.train.csv').values
data_test =pd.read_csv('./covid.test.csv').values
data_train,data_valid = random_split(data, [int(len(data)*0.8), len(data)-int(len(data)*0.8)], generator=torch.Generator().manual_seed(42)) # 随机分配
x_train, x_valid,x_test = np.array(data_train)[:,:-1],np.array(data_valid)[:,:-1],np.array(data_test) # 划分x和y
y_train, y_valid = np.array(data_train)[:,-1],np.array(data_valid)[:,-1]

dataset_train,dataset_valid,dataset_test = MyDataset(x_train,y_train), MyDataset(x_valid,y_valid), MyDataset(x_test)

# dataset_test = MyDataset('./covid.test.csv',mode='test')
data_loader_train = DataLoader(dataset_train,256,shuffle=True,drop_last=False,num_workers=0,pin_memory=True )
data_loader_valid = DataLoader(dataset_valid,256,shuffle=True, drop_last=False,num_workers=0,pin_memory=True )
data_loader_test = DataLoader(dataset_test,256,shuffle=True, drop_last=False,num_workers=0,pin_memory=True )

创建Neural Network

In [284]:
class MyModule(nn.Module):
    def __init__(self, dim) -> None:
        super(MyModule,self).__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 1)
        )
        self.loss = nn.MSELoss(reduction='mean')
    def forward(self, x):
        # print(1,x.size())
        x = self.net(x)
        # print(2,x.size())
        x = x.squeeze(1) # (B, 1) -> (B)
        return x
    def calc_loss(self, pre, tar): # 计算loss
        return self.loss(pre, tar)

创建model和Optimizer

In [285]:
import math
model = MyModule(data_loader_test.dataset.dim).to('cuda')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5,momentum= 0.9)



训练

In [286]:
def train():
    best_loss = math.inf
    stop = 0
    for i in range(3000):
        model.train()
        losses = []
        for x,y in data_loader_train:
            optimizer.zero_grad()
            x,y = x.to('cuda'),y.to('cuda')
            pre = model(x)
            loss = model.calc_loss(pre,y)
            # print(loss.detach().item())
            losses.append(loss.detach().item())
            loss.backward()
            optimizer.step()
        mean_loss_train = sum(losses)/len(losses)
        model.eval()
        losses = []
        # print('valid')
        for x,y in data_loader_valid:
            x,y = x.to('cuda'),y.to('cuda')
            with torch.no_grad():
                pre = model(x) # 删除了数据集的索引就不报错了
                loss = model.calc_loss(pre,y)
                losses.append(loss.detach().item())
        mean_loss_valid = sum(losses)/len(losses)
        print(f' Train loss: {mean_loss_train:.4f}, Valid loss: {mean_loss_valid:.4f}')
        if mean_loss_valid <best_loss:
            best_loss = mean_loss_valid
            torch.save(model.state_dict(), 'model.ckpt') # 导出模型
            stop = 0
        else:
            stop+=1
        if stop>400:
            return
        
        

In [287]:
train()

 Train loss: 320.0723, Valid loss: 311.6259
 Train loss: 222.0527, Valid loss: 84.9897
 Train loss: 81.0102, Valid loss: 44.9567
 Train loss: 61.3975, Valid loss: 52.0367
 Train loss: 48.8686, Valid loss: 41.4939
 Train loss: 42.8685, Valid loss: 36.5668
 Train loss: 38.1583, Valid loss: 38.6131
 Train loss: 33.1415, Valid loss: 31.0916
 Train loss: 29.0016, Valid loss: 26.5694
 Train loss: 24.6346, Valid loss: 25.3837
 Train loss: 20.0187, Valid loss: 18.4103
 Train loss: 15.7005, Valid loss: 12.1610
 Train loss: 12.4349, Valid loss: 11.6179
 Train loss: 10.2858, Valid loss: 9.4892
 Train loss: 9.3244, Valid loss: 7.7330
 Train loss: 8.9317, Valid loss: 8.6584
 Train loss: 8.6438, Valid loss: 8.0821
 Train loss: 8.4725, Valid loss: 6.4677
 Train loss: 8.2339, Valid loss: 7.1247
 Train loss: 8.1949, Valid loss: 6.4748
 Train loss: 8.0107, Valid loss: 8.1309
 Train loss: 7.6767, Valid loss: 6.4404
 Train loss: 7.4946, Valid loss: 6.1157
 Train loss: 7.4668, Valid loss: 6.3884
 Train los

In [276]:
def predict():
    model.load_state_dict(torch.load('model.ckpt')) # 加载本地模型
    model.eval()
    preds = []
    for i in data_loader_test:
        i = i.to('cuda')
        with torch.no_grad():
            pred = model(i)
            preds.append(pred.detach().cpu())
    preds = torch.cat(preds,dim=0).numpy()
    print(preds)

In [277]:
predict()

[19.045124  12.806736   5.0945525 14.367683  11.094612  20.212677
  5.780754  31.223135  32.93761   11.226249  11.815798  23.636429
 12.686301  17.69925   10.374793  15.883711  24.771757   5.072011
 14.870366  24.563065  22.562166  20.669008  20.334013   9.558588
 21.436989  24.68518   21.991175  24.931004  17.566477  25.284407
 16.511826  28.647865  21.588219  19.489206  18.725094  20.602745
 27.817377  12.535513   4.288083   7.177533  15.994921  18.730513
  5.8094325 28.940536  14.157928  18.011627  23.152994  17.064428
 16.88906   17.684113   8.812605   5.2682886  3.7998102 11.462024
 17.310291   7.421469  17.074781  23.6026    19.370867  15.160688
 19.290203   3.3408682 16.41777    5.365499   5.077559  22.80876
  5.4951344 18.974247  14.813986  23.455254  15.337715  18.917242
 26.895473   7.3750105 20.120224  30.312313  16.547485  17.408754
 12.114369   5.6543226 11.588513  12.668947   7.6156516 17.84272
 17.837044   8.902703   4.6878347 16.627256  13.800458  17.279854
  4.4874845 