In [63]:
import torch
import torchvision
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torch.nn as nn
from tqdm import tqdm
import wandb

In [59]:
multiple_gpus = False
if torch.cuda.is_available():
    if torch.cuda.device_count() > 1:
        multiple_gpus = True
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
train_csv = pd.read_csv('../../data/plant_2020/train.csv')

In [4]:
train_csv.head()

Unnamed: 0,image_id,healthy,multiple_diseases,rust,scab
0,Train_0,0,0,0,1
1,Train_1,0,1,0,0
2,Train_2,1,0,0,0
3,Train_3,0,0,1,0
4,Train_4,1,0,0,0


In [5]:
train_csv['updated_paths'] = train_csv['image_id'].apply(lambda x: '../../data/plant_2020/images/' + x + '.jpg')

In [38]:
class CreateDataset(Dataset):
    def __init__(self, csv_file, img_size = 64):
        self.csv_file = csv_file
        self.paths = csv_file['updated_paths'].values.tolist()
        self.labels = self.transform_labels()
        self.img_size = img_size
    
    def transform_labels(self):
        return self.csv_file[['healthy', 'multiple_diseases', 'rust', 'scab']].values
    
    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        # pass
        img = torchvision.io.read_file(self.paths[idx])
        img = torchvision.io.decode_jpeg(img)
        img = torchvision.transforms.functional.resize(img, [self.img_size, self.img_size])
        img = img / 255
        return img, torch.Tensor(self.labels[idx]).float()

In [39]:
def split_datasets(dataset, test_size = 0.01):
    train, test = train_test_split(dataset, test_size = test_size)
    train, val = train_test_split(train, test_size = test_size)
    return train, val, test

In [40]:
train, val, test = split_datasets(train_csv)

In [41]:
train_dataset = CreateDataset(train)
train_loader = DataLoader(train_dataset)

In [49]:
def create_model():
    model = torchvision.models.resnet50()
    model.fc = nn.Linear(2048, 4)
    return model

In [86]:
def save_model(model, epoch, optimizer, multiple_gpus, save_path):
    if os.path.exists(save_path) == False:
        os.makedirs(save_path)
    # Please refer this link for saving and loading models
    # Link: https://pytorch.org/tutorials/beginner/saving_loading_models.html
    torch.save({
        'epoch' : epoch,
        # Please change this..... if you are using single GPU to model.state_dict().
        'model_state_dict': model.module.state_dict() if multiple_gpus == True else model.state_dict(),
        'optimizer_state_dict' : optimizer.state_dict()
    }, f'{save_path}{epoch}.pth')
    print(f'Weight saved for epoch {epoch}.')

In [56]:
def loss_fn(inputs, targets):
    loss = nn.CrossEntropyLoss()
    return loss(inputs, targets)

def precision_recall(inputs, targets):
    pass

In [87]:
def train_script(model, train_set, val_set, config_, epochs=2, load_weights = None, weights_path = '../../plant_path_2020/weights/'):
    wandb.init(project = 'plant_pathology',
               config = config_)
    # Define data pointers
    dataloaders = {
        'train' : train_set,
        'val' : val_set
    }
    if load_weights != None:
        model.load_state_dict(torch.load(load_weights)['model_state_dict'])
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
    if next(model.parameters()).is_cuda == False:
        if multiple_gpus:
            model = nn.DataParallel(model)
        model = model.to(device)
    for epoch in range(epochs):
        train_loss = 0.0
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            running_loss = 0.0
            with tqdm(dataloaders[phase], unit = 'batch') as tepoch:
                for imgs, labels in tepoch:
                    tepoch.set_description(f'Epoch: {epoch}')
                    imgs = imgs.to(device)
                    labels = labels.to(device)
                    optimizer.zero_grad()
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(imgs)
                        loss = loss_fn(outputs, labels)
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()
                    running_loss += loss.item()
                    tepoch.set_postfix(loss = loss.item())
            if phase == 'train':
                train_loss = running_loss / len(train_set)
                print(f'Train Loss: {train_loss}')
            else:
                val_loss = running_loss / len(val_set)
                print(f'Val Loss: {val_loss}')
        wandb.log({
            'train_loss' : train_loss,
            'val_loss' : val_loss
        })
        save_model(model, epoch, optimizer, multiple_gpus, weights_path)

In [88]:
train_set = CreateDataset(train)
train_loader = DataLoader(train_set, batch_size=32, shuffle=True, num_workers=7, prefetch_factor=4)
val_set = CreateDataset(val)
val_loader = DataLoader(val_set, batch_size=32, shuffle=False, num_workers=7, prefetch_factor=4)

In [89]:
model = create_model()

In [92]:
config = {
    'arch_name' : 'ResNet50',
    'epochs' : 2,
    'batch_size' : 32,
    'weights_path' : '../../plant_path_2020_weights/res_50/'
}

In [93]:
train_script(model, train_loader, val_loader, config, weights_path='../../plant_path_2020_weights/res_50/')

0,1
train_loss,█▁
val_loss,█▁

0,1
train_loss,1.28123
val_loss,1.18415


Epoch: 0: 100%|██████████| 56/56 [00:09<00:00,  6.05batch/s, loss=1.26]


Train Loss: 1.2826543429068156


Epoch: 0: 100%|██████████| 1/1 [00:00<00:00,  1.60batch/s, loss=1.45]


Val Loss: 1.4516401290893555
Weight saved for epoch 0.


Epoch: 1: 100%|██████████| 56/56 [00:08<00:00,  6.55batch/s, loss=1.33]


Train Loss: 1.2679476950849806


Epoch: 1: 100%|██████████| 1/1 [00:00<00:00,  1.58batch/s, loss=1.2]


Val Loss: 1.1987475156784058
Weight saved for epoch 1.
