In [1]:
import torch
import torchvision
import PIL
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import OneHotEncoder
import torchvision.transforms.functional as fn
from torch import nn
import torch.nn.functional as F
import wandb
import torch.optim as optim
from tqdm import tqdm
import sys
sys.path.append('../scripts/')
from save_model_every_epoch_torch import save_model
import warnings
warnings.filterwarnings("ignore")

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
multiple_gpus = False
if torch.cuda.is_available():
    if torch.cuda.device_count() > 1:
        multiple_gpus = True
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
csv_file = pd.read_csv('../../files/train.csv')
csv_file['updated_paths'] = csv_file['image'].apply(lambda x: '../../files/train_images/' + x)

In [4]:
def split_datasets(csv_file, test_size):
    train, test = train_test_split(csv_file, test_size=test_size)
    train, val = train_test_split(train, test_size=test_size)
    return train, val, test

In [5]:
def encoding_data(csv_data):
    labels = csv_data['species'].values.tolist()
    encoder = OneHotEncoder()
    encoder = encoder.fit(np.array(labels).reshape(-1, 1))
    return encoder

In [6]:
encoder = encoding_data(csv_file)

In [7]:
class DolphinDataset(Dataset):
    def __init__(self, csv_file, encoder):
        self.csv_file = csv_file
        self.images = csv_file['updated_paths'].values.tolist()
        self.labels = csv_file['species'].values.tolist()
        self.encoder = encoder
        self.encoded_labels = self.encoder.transform(np.array(self.labels).reshape(-1, 1)).toarray()
        self.convert_rgb = torchvision.transforms.Lambda(lambda x: x.repeat(3, 1, 1))

    def __len__(self):
        return len(self.csv_file)

    def __getitem__(self, idx):
        img = torchvision.io.read_file(self.images[idx])
        img = torchvision.io.decode_jpeg(img)
        if img.shape[0] != 3:
            img = self.convert_rgb(img)
        img = torchvision.transforms.functional.resize(img, (512, 512))
        img = img / 255.0
        return img, torch.Tensor(self.encoded_labels[idx])

In [8]:
train, val, test = split_datasets(csv_file, test_size = 0.01)

In [9]:
# incep_model = torch.hub.load('pytorch/vision:v0.10.0', 'inception_v3', pretrained = False)

In [10]:
def get_model():
    incep_model = torch.hub.load('pytorch/vision:v0.10.0', 'inception_v3', pretrained = False)
    incep_model.fc = nn.Linear(2048, 30)
    return incep_model

In [11]:
categorical_cross_entropy = nn.CrossEntropyLoss()

In [14]:
def train_model(train_dataset, val_dataset, epochs, load_weights=None):
    model = get_model()
    model = model.to(device)
    wandb.init(project='dolphin',
               config = {
                   'arch' : 'InceptionV4'
               })
    data_pointers = {
        'train' : train_dataset,
        'val' : val_dataset,
    }
    if load_weights != None:
        model.load_state_dict(torch.load(load_weights)['model_state_dict'])
    optimizer = optim.SGD(model.parameters(), lr=0.001)
    if next(model.parameters()).is_cuda == False:
        if multiple_gpus == True:
            model = nn.DataParallel(model)
        model = model.to(device)
    for epoch in range(epochs):
        train_loss= 0.0
        val_loss = 0.0
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            running_loss, running_precision, running_recall = 0.0, 0.0, 0.0
            with tqdm(data_pointers[phase], unit='batch') as tepoch:
                for imgs, labels in tepoch:
                    tepoch.set_description(f'Epoch: {epoch}')
                    imgs = imgs.to(device)
                    labels = labels.to(device)
                    optimizer.zero_grad()
                    with torch.set_grad_enabled(phase == 'train'):
                        if phase == 'train':
                            outputs = model(imgs)[0]
                        else:
                            outputs = model(imgs)
                        loss = categorical_cross_entropy(outputs, labels)
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()
                    running_loss += loss.item()
                    tepoch.set_postfix(loss = loss.item())
            if phase == 'train':
                train_loss = running_loss / len(train)
                print(f'{phase} Loss: {float(train_loss)}')
            else:
                val_loss = running_loss / len(val)
                print(f'{phase} Loss: {float(val_loss)}')
        wandb.log({
            'train_loss' : train_loss,
            'val_loss' : val_loss
        })

In [16]:
batch_size = 172
train_dataset = DolphinDataset(train, encoder)
val_dataset = DolphinDataset(val, encoder)
train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True, num_workers = 24, prefetch_factor=2)
val_dataloader = DataLoader(val_dataset, batch_size = batch_size, shuffle = False, num_workers = 24, prefetch_factor=2)
train_model(train_dataloader, val_dataloader, 10)

Using cache found in /home/ubuntu/.cache/torch/hub/pytorch_vision_v0.10.0


0,1
train_loss,█▁
val_loss,█▁

0,1
train_loss,0.01525
val_loss,0.01554


Epoch: 0: 100%|██████████| 291/291 [04:18<00:00,  1.12batch/s, loss=2.56]


train Loss: 0.01648843092980937


Epoch: 0: 100%|██████████| 3/3 [00:13<00:00,  4.34s/batch, loss=2.74]


val Loss: 0.01605397674877182


Epoch: 1: 100%|██████████| 291/291 [04:17<00:00,  1.13batch/s, loss=2.54]


train Loss: 0.015254596382932487


Epoch: 1: 100%|██████████| 3/3 [00:12<00:00,  4.29s/batch, loss=2.63]


val Loss: 0.015494843245494978


Epoch: 2: 100%|██████████| 291/291 [04:10<00:00,  1.16batch/s, loss=2.56]


train Loss: 0.014843505602842406


Epoch: 2: 100%|██████████| 3/3 [00:12<00:00,  4.28s/batch, loss=2.57]


val Loss: 0.015097929083782694


Epoch: 3: 100%|██████████| 291/291 [04:15<00:00,  1.14batch/s, loss=2.23]


train Loss: 0.014464185752513237


Epoch: 3: 100%|██████████| 3/3 [00:13<00:00,  4.41s/batch, loss=2.53]


val Loss: 0.014819434980158749


Epoch: 4: 100%|██████████| 291/291 [04:18<00:00,  1.12batch/s, loss=2.37]


train Loss: 0.014083849858452095


Epoch: 4: 100%|██████████| 3/3 [00:13<00:00,  4.41s/batch, loss=2.45]


val Loss: 0.014420783566863169


Epoch: 5: 100%|██████████| 291/291 [04:12<00:00,  1.15batch/s, loss=2.31]


train Loss: 0.013791935900923386


Epoch: 5: 100%|██████████| 3/3 [00:13<00:00,  4.44s/batch, loss=2.42]


val Loss: 0.014234134802234032


Epoch: 6: 100%|██████████| 291/291 [04:17<00:00,  1.13batch/s, loss=2.28]


train Loss: 0.013494512699043911


Epoch: 6: 100%|██████████| 3/3 [00:13<00:00,  4.42s/batch, loss=2.37]


val Loss: 0.013889825391203989


Epoch: 7: 100%|██████████| 291/291 [04:13<00:00,  1.15batch/s, loss=2.23]


train Loss: 0.013235089422149378


Epoch: 7: 100%|██████████| 3/3 [00:13<00:00,  4.59s/batch, loss=2.33]


val Loss: 0.013749858607416567


Epoch: 8: 100%|██████████| 291/291 [04:16<00:00,  1.13batch/s, loss=2.38]


train Loss: 0.012970792824403643


Epoch: 8: 100%|██████████| 3/3 [00:12<00:00,  4.24s/batch, loss=2.31]


val Loss: 0.01348521162869902


Epoch: 9: 100%|██████████| 291/291 [04:17<00:00,  1.13batch/s, loss=2.01]


train Loss: 0.012732843045557605


Epoch: 9: 100%|██████████| 3/3 [00:10<00:00,  3.62s/batch, loss=2.22]

val Loss: 0.013087458761313215



