In [None]:
from scipy.special import softmax
import os
import numpy as np
import cv2
import torch
import torch.optim as optim
from torchvision.models.mobilenet import mobilenet_v2
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import StepLR
from torch.nn import CrossEntropyLoss
import pandas as pd
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.metrics import roc_auc_score

import torchvision.transforms as T

class DatasetPeople(Dataset):
    
    def __init__(self, file_path, root_path, transform=None):
        self.data = pd.read_csv(file_path)
        self.root_path = root_path
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        image_path = os.path.join(self.root_path, self.data.loc[index, 'id'])
        image = cv2.imread(image_path)
        label = self.data.loc[index, 'target_people']
        
        if self.transform is not None:
            aug = self.transform(image=image)
            image = aug['image']
        
        return image, label


def train(model, device, train_loader, optimizer, epoch):
    log_interval = 10
    loss_func = CrossEntropyLoss()
    model.train()
    passed = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        #data = data.repeat(1, 1, 1, 1)
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = loss_func(output, target)
        loss.backward()
        optimizer.step()
        passed += len(data)
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, passed, len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


def valid(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    loss_func = CrossEntropyLoss()
    y = []
    pred_y = []
    with torch.no_grad():
        for data, target in test_loader:
            #data = data.repeat(1, 1, 1, 1)
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += loss_func(output, target)
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
            y.extend(target.tolist())
            pred_y.extend(pred.tolist())
    roc_auc_scr = roc_auc_score(y, pred_y)

    test_loss /= len(test_loader)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%), ROC AUC Score: {:.2f}\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset), roc_auc_scr))


def main():
    batch_size = 100
    learning_rate = 1.0
    reduce_lr_gamma = 0.6
    epochs = 1
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print('Device: {} Epochs: {} Batch size: {}'.format(device, epochs, batch_size))

    kwargs = {'batch_size': batch_size}
    if torch.cuda.is_available():
        kwargs.update({'num_workers': 2, 'pin_memory': True})

    transform = A.Compose(
        [
        A.Resize(256, 256),
        A.Normalize((0.1307,), (0.3081,)),
        ToTensorV2(),
        ], p=1.0)

    dataset1 = DatasetPeople('./input/train.csv', './input/train', transform=transform)
    dataset2 = DatasetPeople('./input/valid.csv', './input/valid', transform=transform)
    dataset3 = DatasetPeople('./input/sample_submission.csv', './input/test', transform=transform)
    
    print('Length train: {} Length valid: {} Length test: {}'.format(len(dataset1), len(dataset2), len(dataset3)))
    
    train_loader = DataLoader(dataset1, shuffle=True, **kwargs)
    valid_loader = DataLoader(dataset2, shuffle=True, **kwargs)
    test_loader = DataLoader(dataset3, shuffle=False, **kwargs)
    print('Number of train batches: {} Number of valid batches: {} Number of test batches: {}'.format(len(train_loader), len(valid_loader), len(test_loader)))
    model = mobilenet_v2(pretrained=True)
    model.classifier[1] = torch.nn.Linear(in_features=model.classifier[1].in_features, out_features=2)
    model.to(device)
    optimizer = optim.Adadelta(model.parameters(), lr=learning_rate)

    scheduler = StepLR(optimizer, step_size=1, gamma=reduce_lr_gamma)
    for epoch in range(1, epochs + 1):
        train(model, device, train_loader, optimizer, epoch)
        valid(model, device, valid_loader)
        scheduler.step()
        
    # torch.save(model.state_dict(), "trained_model.pt")

    # Final prediction
    ids = list(dataset3.data['id'])
    submission = pd.DataFrame(ids, columns=['id'])
    predictions = []
    for data, target in test_loader:
        data = data.to(device)
        output = model(data)
        predictions += [x[1].item() for x in output.softmax(dim=1)]
    submission['target_people'] = predictions
    submission.to_csv('submission.csv', index=False)
    print('Submission saved in: {}'.format('submission.csv'))


if __name__ == '__main__':
    main()