In [1]:
#import packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

import torch
import torchvision
import torch.nn as nn
import torchmetrics
from torchvision import transforms

import torchvision.models as models
from torch.utils.data import DataLoader, Dataset,random_split
from pytorch_lightning import seed_everything, LightningModule, Trainer
from sklearn.metrics import classification_report


In [2]:
#测试GPU是否可用
import torch
torch.cuda.is_available()

True

In [3]:
# CPU
class OurModel(LightningModule):
    def __init__(self):
        super(OurModel,self).__init__()
        
        # model architecute  
        self.resnet18 = models.resnet18(pretrained=True)
        self.resnet18.fc=nn.Sequential(
            nn.Linear(self.resnet18.fc.in_features,256),
            nn.ReLU(),
            nn.Linear(256,1)
            )
        
        # augmentation
        self.aug=transforms.Compose([
                    transforms.Resize(size=(224,224)),
                    transforms.RandomHorizontalFlip(),
                    transforms.RandomVerticalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225]),
                    ])
        
        #parameters
        self.lr=1e-4
        self.batch_size=224
        self.numworker=4     ######################################## modify here #################################
        self.acc = torchmetrics.Accuracy(task='binary') # metric
        self.criterion=nn.BCEWithLogitsLoss() # loss function
        #list to store loss and accuracy
        self.trainacc,self.valacc=[],[]
        self.trainloss,self.valloss=[],[]
        
        ######################################## modify here #################################
        #load data        
        # self.dataset=torchvision.datasets.ImageFolder('Binary',transform=self.aug)
        dataroot = r'H:\0. videos\Bilibili\[医学AI-图像分类] COVID二分类问题-Pytorch源码\前处理\Binary'
        dataset=torchvision.datasets.ImageFolder(dataroot,transform=self.aug)
        
        #split data
        self.train_set, self.val_set =random_split(dataset,
                            [int(np.floor(len(dataset)*0.75)),int(np.ceil(len(dataset)*0.25))],
                                                  generator=torch.Generator().manual_seed(42))
    def forward(self,x):
        x=self.resnet18(x)
        return x

    
    def configure_optimizers(self):
        opt=torch.optim.AdamW(params=self.parameters(),lr=self.lr )
        return opt
    
    def train_dataloader(self):#load train 
        return DataLoader(self.train_set,shuffle=True,num_workers=self.numworker,batch_size=self.batch_size)

    def training_step(self,batch,batch_idx):
        image,label=batch
        pred = self(image)#pass images to model
        loss=self.criterion(pred.flatten(),label.float()) #calculate loss
        acc=self.acc(pred.flatten(),label)#calculate accuracy
        return {'loss':loss,'acc':acc}

    def training_epoch_end(self, outputs):
        #average loss and accuracy in all batches of train data
        loss=torch.stack([x["loss"] for x in outputs]).mean().detach().cpu().numpy().round(2)
        acc=torch.stack([x["acc"] for x in outputs]).mean().detach().cpu().numpy().round(2)
        self.trainacc.append(acc)
        self.trainloss.append(loss)

        
    def val_dataloader(self):
        ds=DataLoader(self.val_set,shuffle=False,num_workers=self.numworker,batch_size=self.batch_size)
        return ds

    def validation_step(self,batch,batch_idx):
        image,label=batch
        pred=self(image)
        loss=self.criterion(pred.flatten(),label.float())
        acc=self.acc(pred.flatten(),label)
        return {'loss':loss,'acc':acc}

    def validation_epoch_end(self, outputs):
        loss=torch.stack([x["loss"] for x in outputs]).mean().detach().cpu().numpy().round(2)
        acc=torch.stack([x["acc"] for x in outputs]).mean().detach().cpu().numpy().round(2)
        self.valacc.append(acc)
        self.valloss.append(loss)
        print('validation loss accuracy ',self.current_epoch,loss, acc)


In [4]:
model = OurModel()



In [5]:
# CPU
seed_everything(0) # Global seed set to 0 提高可复现性
trainer = Trainer(max_epochs=15, 
                deterministic=True,
                gpus=0,precision=16,                                ###### modify here ###########
                accumulate_grad_batches=1,
                enable_progress_bar = True,
                num_sanity_val_steps=0,
                #limit_train_batches=20,
                #limit_val_batches=5
                )

Global seed set to 0
  rank_zero_deprecation(
  rank_zero_warn(
Using bfloat16 Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


为了保证实验的「可复现性」，许多机器学习的代码都会有一个方法叫 seed everything，这个方法尝试固定随机种子以让一些随机的过程在每一次的运行中产生相同的结果。

![image.png](attachment:image.png)

## Seed Everything - 可复现的 PyTorch（一）

https://zhuanlan.zhihu.com/p/577506804

In [None]:
trainer.fit(model)


  | Name      | Type              | Params
------------------------------------------------
0 | resnet18  | ResNet            | 11.3 M
1 | acc       | BinaryAccuracy    | 0     
2 | criterion | BCEWithLogitsLoss | 0     
------------------------------------------------
11.3 M    Trainable params
0         Non-trainable params
11.3 M    Total params
45.232    Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [None]:
trainer.validate(model)

In [None]:
plt.plot(model.trainacc,label='train acc')
plt.plot(model.valacc,label='vall acc')
plt.legend()
plt.title('validation and training accuracy comparison')

In [None]:
plt.plot(model.trainloss,label='train loss')
plt.plot(model.valloss,label='vall loss')
plt.legend()
plt.title('validation and training loss comparison')