In [None]:
{
  "cells": [],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "version": "3.9.0"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 5
}

In [None]:
# PyTorch
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# For data preprocess
import numpy as np
import csv
import os

# For plotting
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

myseed = 2025  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

In [None]:

def get_device():
    ''' Get device (if GPU is available, use GPU) '''
    return 'cuda' if torch.cuda.is_available() else 'cpu'

def plot_learning_curve(loss_record, title=''):
    ''' Plot learning curve of your DNN (train & dev loss) '''
    total_steps = len(loss_record['train'])
    x_1 = range(total_steps)
    x_2 = x_1[::len(loss_record['train']) // len(loss_record['dev'])]
    figure(figsize=(6, 4))
    plt.plot(x_1, loss_record['train'], c='tab:red', label='train')
    plt.plot(x_2, loss_record['dev'], c='tab:cyan', label='dev')
    plt.ylim(0.0, 5.)
    plt.xlabel('Training steps')
    plt.ylabel('MSE loss')
    plt.title('Learning curve of {}'.format(title))
    plt.legend()
    plt.show()


def plot_pred(dv_set, model, device, lim=35., preds=None, targets=None):
    ''' Plot prediction of your DNN '''
    if preds is None or targets is None:
        model.eval()
        preds, targets = [], []
        for x, y in dv_set:
            x, y = x.to(device), y.to(device)
            with torch.no_grad():
                pred = model(x)
                preds.append(pred.detach().cpu())
                targets.append(y.detach().cpu())
        preds = torch.cat(preds, dim=0).numpy()
        targets = torch.cat(targets, dim=0).numpy()

    figure(figsize=(5, 5))
    plt.scatter(targets, preds, c='r', alpha=0.5)
    plt.plot([-0.2, lim], [-0.2, lim], c='b')
    plt.xlim(-0.2, lim)
    plt.ylim(-0.2, lim)
    plt.xlabel('ground truth value')
    plt.ylabel('predicted value')
    plt.title('Ground Truth v.s. Prediction')
    plt.show()

In [None]:
def dev(dv_set, model, device):
    model.eval()                                # set model to evalutation mode
    total_loss = 0
    for x, y in dv_set:                         # iterate through the dataloader
        x, y = x.to(device), y.to(device)       # move data to device (cpu/cuda)
        with torch.no_grad():                   # disable gradient calculation
            pred = model(x)                     # forward pass (compute output)
            mse_loss = model.cal_loss(pred, y)  # compute loss
        total_loss += mse_loss.detach().cpu().item() * len(x)  # accumulate loss
    total_loss = total_loss / len(dv_set.dataset)              # compute averaged loss

    return total_loss

In [None]:
# === 最小整合：定义 + 训练 + 预测 ===
import os, csv, time
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

# 随机种子
seed = 2025
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

# 数据集
class COVID19Dataset(Dataset):
    def __init__(self, path, mode='train', target_only=False):
        self.mode = mode
        with open(path, 'r') as fp:
            data = list(csv.reader(fp))
            data = np.array(data[1:])[:, 1:].astype(float)

        if target_only:
            feats = list(range(40)) + [57, 75] #选了特定的数据
        else:
            feats = list(range(93))

        if mode == 'test':
            data = data[:, feats]
            self.data = torch.FloatTensor(data)
        else:
            target = data[:, -1]
            data = data[:, feats]
            if mode == 'train':
                idx = [i for i in range(len(data)) if i % 10 != 0]
            else:  # dev
                idx = [i for i in range(len(data)) if i % 10 == 0]
            self.data = torch.FloatTensor(data[idx])
            self.target = torch.FloatTensor(target[idx])

        self.data[:, 40:] = (self.data[:, 40:] - self.data[:, 40:].mean(0, keepdim=True)) / \
                            self.data[:, 40:].std(0, keepdim=True)
        self.dim = self.data.shape[1]
        print(f'Loaded {mode} set: {len(self.data)} samples, dim={self.dim}')

    def __getitem__(self, i):
        if self.mode in ['train', 'dev']:
            return self.data[i], self.target[i]
        return self.data[i]

    def __len__(self):
        return len(self.data)

def prep_dataloader(path, mode, batch_size, n_jobs=0, target_only=False):
    dataset = COVID19Dataset(path, mode=mode, target_only=target_only)
    return DataLoader(dataset, batch_size, shuffle=(mode == 'train'),
                      drop_last=False, num_workers=n_jobs, pin_memory=True)

class NeuralNet(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64,32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16,1)
    
        )
        self.criterion = nn.MSELoss()

    def forward(self, x):
        return self.net(x).squeeze(1)

    def cal_loss(self, pred, target):
        mse = self.criterion(pred, target)
        l2 = sum(torch.norm(p, p=2) for p in self.parameters()) #用了L2正则化项，避免过拟合
        return mse + 1e-4 * l2

def dev(dv_set, model, device):
    model.eval()
    total = 0.0
    for x, y in dv_set:
        x, y = x.to(device), y.to(device)
        with torch.no_grad():
            loss = model.cal_loss(model(x), y)
        total += loss.item() * len(x)
    return total / len(dv_set.dataset)

def train(tr_set, dv_set, model, config, device):
    opt = getattr(torch.optim, config['optimizer'])(model.parameters(), **config['optim_hparas'])
    best = float('inf')
    record = {'train': [], 'dev': []}
    es_cnt = 0
    epoch = 0
    while epoch < config['n_epochs']:
        model.train()
        for x, y in tr_set:
            x, y = x.to(device), y.to(device)
            opt.zero_grad()
            loss = model.cal_loss(model(x), y)
            loss.backward()
            opt.step()
            record['train'].append(loss.item())
        dv_loss = dev(dv_set, model, device)
        record['dev'].append(dv_loss)
        if dv_loss < best:
            best = dv_loss
            torch.save(model.state_dict(), config['save_path'])
            print(f'Saving model (epoch={epoch+1:4d}, loss={best:.4f})')
            es_cnt = 0
        else:
            es_cnt += 1
        if es_cnt > config['early_stop']:
            break
        epoch += 1
    print(f'Finished training after {epoch} epochs')
    return best, record

def test(tt_set, model, device):
    model.eval()
    preds = []
    for x in tt_set:
        x = x.to(device)
        with torch.no_grad():
            preds.append(model(x).detach().cpu())
    return torch.cat(preds, 0).numpy()

def plot_learning_curve(loss_record, title=''):
    steps = len(loss_record['train'])
    x1 = range(steps)
    x2 = x1[::max(1, steps // len(loss_record['dev']))]
    figure(figsize=(6,4))
    plt.plot(x1, loss_record['train'], label='train', c='tab:red')
    plt.plot(x2, loss_record['dev'], label='dev', c='tab:cyan')
    plt.xlabel('Steps'); plt.ylabel('MSE'); plt.title(title); plt.legend(); plt.show()

def plot_pred(dv_set, model, device, lim=35.):
    model.eval()
    preds, targets = [], []
    for x, y in dv_set:
        x = x.to(device)
        with torch.no_grad():
            preds.append(model(x).cpu())
        targets.append(y)
    preds = torch.cat(preds).numpy()
    targets = torch.cat(targets).numpy()
    figure(figsize=(5,5))
    plt.scatter(targets, preds, c='r', alpha=0.5)
    plt.plot([-0.2, lim], [-0.2, lim], c='b')
    plt.xlim(-0.2, lim); plt.ylim(-0.2, lim)
    plt.xlabel('GT'); plt.ylabel('Pred'); plt.title('GT vs Pred'); plt.show()

# === 训练配置 ===
tr_path = 'covid_train.csv'
tt_path = 'covid_test.csv'
for p in [tr_path, tt_path]:
    if not os.path.isfile(p):
        raise FileNotFoundError(p)

config = {
    'n_epochs': 3000,
    'batch_size': 64,
    'optimizer': 'Adam',
    'optim_hparas': {'lr': 0.001},
    'early_stop': 200,
    'save_path': 'models/model.pth'
}
target_only = True
os.makedirs('models', exist_ok=True)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('[INFO] device =', device)

tr_set = prep_dataloader(tr_path, 'train', config['batch_size'], target_only=target_only)
dv_set = prep_dataloader(tr_path, 'dev',   config['batch_size'], target_only=target_only)
tt_set = prep_dataloader(tt_path, 'test',  config['batch_size'], target_only=target_only)

model = NeuralNet(tr_set.dataset.dim).to(device)

t0 = time.time()
best_dev, loss_rec = train(tr_set, dv_set, model, config, device)
print(f'Best Dev MSE: {best_dev:.6f}; Time: {time.time()-t0:.2f}s')

plot_learning_curve(loss_rec, title='deep model')

best_model = NeuralNet(tr_set.dataset.dim).to(device)
best_model.load_state_dict(torch.load(config['save_path'], map_location=device))
plot_pred(dv_set, best_model, device)

def save_pred(preds, file):
    with open(file, 'w', newline='') as fp:
        w = csv.writer(fp)
        w.writerow(['id','tested_positive'])
        for i, p in enumerate(preds):
            w.writerow([i,p])
    print('Saved', file)

preds = test(tt_set, best_model, device)
save_pred(preds, 'pred.csv')
print('[INFO] Done.')

NameError: name 's200' is not defined