In [None]:
# import glob
import os
import sys

import torch
import torchvision

import pandas as pd

import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils, models

from skimage import io, transform

from torch.optim import SGD
from torch.nn import CrossEntropyLoss, Dropout, Flatten

import torch.nn as nn
import torch.nn.functional as F

import PIL
from PIL import Image as img

from IPython.display import Image

GLOBAL_LABEL = {}
GLOBAL_LABEL_REV = {}

class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, train = True, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(img_dir + annotations_file)
        self.img_dir = img_dir
        self.is_train = train
        
        if train == True:
            self.img_labels.iloc[:,1] = self.img_labels.iloc[:,1].map(GLOBAL_LABEL)

        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = PIL.Image.open(img_path)
        
        if self.is_train:
            label = self.img_labels.iloc[idx, 1]
        else:
            label = -1

        image = self.transform(image)
            
        if self.target_transform:
            label = self.target_transform(label)
            
        sample = {"image": image, "label": label}
        
        return sample

def createGlobalDic(trainingFile):
    df = pd.read_csv(trainingFile)

    global GLOBAL_LABEL, GLOBAL_LABEL_REV

    categ = df['category']
    asanas = categ.unique()

    for index,value in enumerate(asanas):
        GLOBAL_LABEL_REV[index] = value 
        GLOBAL_LABEL[value] = index

def loadData(file):
    BATCH_SIZE = 50 
    NUM_WORKERS = 20

    stats = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

#     img_tran = transforms.Compose([transforms.Resize((224,224)),
#                                     transforms.RandomHorizontalFlip(),
#                                     transforms.ToTensor(), 
#                                     transforms.Normalize(*stats,inplace=True)])

    img_tran = transforms.Compose([transforms.Resize(299),
                                    transforms.RandomHorizontalFlip(),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                                ])

    root = file

    training_data = CustomImageDataset(annotations_file = "training.csv", img_dir = root,transform=img_tran)
    train_dataloader = DataLoader(training_data, batch_size=BATCH_SIZE, shuffle=False, num_workers = NUM_WORKERS)

    return train_dataloader

def modelLoader(train_dataloader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    criterion = CrossEntropyLoss()

#     model = models.googlenet(pretrained=True)
#     model = torchvision.models.mnasnet1_3(pretrained = False)

    model = models.inception_v3(pretrained = True)
    
    # pytorch_total_params = sum(p.numel() for p in model.parameters())
    # print(pytorch_total_params)
    # print(model)

    model.fc = nn.Sequential( nn.Linear(model.fc.in_features, 19),)
                       
    optimizer = SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=4e-5, nesterov = True)
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer,  max_lr = 0.1, epochs = 40, steps_per_epoch = len(train_dataloader))
   
    if not torch.cuda.is_available():
        return [model, criterion, optimizer, scheduler]

    model = model.cuda()
    criterion = criterion.cuda()

    return [model, criterion, optimizer, scheduler]

def main():

    trainingFile = "../input/col341-a3/"
    modelFile = "./model.pth"

    createGlobalDic(trainingFile+"training.csv")

    train_dataloader = loadData(trainingFile)

    model, criterion, optimizer, scheduler = modelLoader(train_dataloader)

    epochs = 10
    steps = 0
    running_loss = 0
    print_every = 10
    train_losses, test_losses = [], []
    
    test_csv = "test.csv"
    stats = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

    valid_tfms = transforms.Compose([transforms.Resize(299),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                                    ])

    BATCH_SIZE = 50 
    NUM_WORKERS = 20

    root = "../input/col341-a3/"

    test_dataset = CustomImageDataset(annotations_file = test_csv, train=False, img_dir = root, transform=valid_tfms)
    test_loader = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle=False, num_workers = NUM_WORKERS)

    for epoch in range(epochs):

        print("Epoch: ", epoch)
        count = 0

        for batch_idx, sample in enumerate(train_dataloader):
            steps += 1
            
            if not torch.cuda.is_available():
                inputs = sample['image']
                labels = sample['label']
            else:
                inputs = sample['image'].cuda()
                labels = sample['label'].cuda()
                
            logps = model.forward(inputs).logits
            loss = criterion(logps, labels)

            optimizer.zero_grad()
            loss.backward()

            running_loss += loss.item()

            optimizer.step()
            scheduler.step()

            model.train()

            count += 1

        print('Epoch : ',epoch+1, '\t', 'loss :', running_loss/count)
        running_loss = 0

        train_losses.append(running_loss/count)
        
        if (epoch > 10) :
            torch.save(model.state_dict(), modelFile)
            
            modelFile = "./model.pth"
            testFile = "../input/col341-a3/test.csv"
            outputFile = "./submission.csv"

            model1, criterion1, optimizer1, scheduler1 = modelLoader(test_loader)
            model1.load_state_dict(torch.load(modelFile))
            model1.eval()

            total_count = 0
            count = 0

            predictions = []

            for batch_idx, sample in enumerate(test_loader):

                if not torch.cuda.is_available():
                    x_test = sample['image']
                    y_test = sample['label'] 
                else:
                    x_test = sample['image'].cuda()
                    y_test = sample['label'].cuda()

                pred = model1(x_test)
                pred = torch.argmax(pred, dim = 1)

                for i in range(len(pred)):
                    predictions.append(pred[i].item())

            f = open(testFile, 'r')
            filenames = f.readlines()
            f.close()

            print(len(predictions), len(filenames))

            f = open(outputFile, 'w')
            f.write('name,'+'category'+'\n')

            for i in range(len(predictions) - 1):
                f.write(filenames[i+1][:-1] + ',' + GLOBAL_LABEL_REV[predictions[i]] + '\n')

            f.close()
        
    torch.save(model.state_dict(), modelFile)

main()


  cpuset_checked))
Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth


  0%|          | 0.00/104M [00:00<?, ?B/s]

Epoch:  0
Epoch :  1 	 loss : 0.6981421775102821
8800 8801
Epoch:  1
Epoch :  2 	 loss : 0.3423359273594649
Epoch:  2
Epoch :  3 	 loss : 0.3283222024577466
8800 8801
Epoch:  3
Epoch :  4 	 loss : 0.30669325532747527
Epoch:  4
Epoch :  5 	 loss : 0.2843085771615708
8800 8801
Epoch:  5
Epoch :  6 	 loss : 0.25469143149882184
Epoch:  6
Epoch :  7 	 loss : 0.21963468751043166
8800 8801
Epoch:  7
Epoch :  8 	 loss : 0.19305715337295482
Epoch:  8


In [None]:
import os
os.chdir(r'/kaggle/working/')

from IPython.display import FileLink
FileLink(r'model.pth')

In [None]:
test_csv = "test.csv"
stats = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

# valid_tfms = transforms.Compose([transforms.Resize((224,224)),
#                          transforms.ToTensor(), 
#                          transforms.Normalize(*stats,inplace=True)])

valid_tfms = transforms.Compose([transforms.Resize(299),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                                ])

# training_data = CustomImageDataset(annotations_file = train_csv, img_dir = root,transform=img_tran)
# train_dataloader = DataLoader(training_data, batch_size=BATCH_SIZE, shuffle=False, num_workers = NUM_WORKERS)

BATCH_SIZE = 50 
NUM_WORKERS = 20

root = "../input/col341-a3/"

test_dataset = CustomImageDataset(annotations_file = test_csv, train=False, img_dir = root, transform=valid_tfms)
test_loader = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle=False, num_workers = NUM_WORKERS)

In [None]:
modelFile = "./model.pth"
testFile = "../input/col341-a3/test.csv"
outputFile = "./submission.csv"
    
model, criterion, optimizer, scheduler = modelLoader(test_loader)
model.load_state_dict(torch.load(modelFile))
model.eval()

total_count = 0
count = 0

predictions = []

for batch_idx, sample in enumerate(test_loader):

    if not torch.cuda.is_available():
        x_test = sample['image']
        y_test = sample['label'] 
    else:
        x_test = sample['image'].cuda()
        y_test = sample['label'].cuda()

    pred = model(x_test)
    pred = torch.argmax(pred, dim = 1)

    for i in range(len(pred)):
        predictions.append(pred[i].item())

f = open(testFile, 'r')
filenames = f.readlines()
f.close()

print(len(predictions), len(filenames))

f = open(outputFile, 'w')
f.write('name,'+'category'+'\n')

for i in range(len(predictions) - 1):
    f.write(filenames[i+1][:-1] + ',' + GLOBAL_LABEL_REV[predictions[i]] + '\n')

f.close()
