In [95]:
import os
from torchvision import models
import numpy as np
from PIL import Image
import pandas as pd
import matplotlib.pyplot as plt
from torchvision import transforms as T
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch

In [96]:
ROOT = r'/home/vijay/learn/kaggle/petfinder/petfinder-pawpularity-score/'
os.chdir(ROOT)

In [97]:
def read_image(path):
    with open(path, "rb") as img:
        img = Image.open(img)
        img.load()
        return img

In [98]:
preprocess = T.Compose([
    T.Resize((64, 64)),
    T.ToTensor(),
    T.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])

In [99]:
class PetDataset(Dataset):
    def __init__(self, file, train=True):
        self.df = pd.read_csv(os.path.join(ROOT, f"{file}.csv"))
        self.df['file_path'] = self.df['Id'].apply(lambda x: os.path.join(f"{file}", x+".jpg"))
        self.train = train
        
    def __getitem__(self, idx):
        img = read_image(self.df.iloc[idx]['file_path'])
        if self.train:
            return preprocess(img), torch.tensor(self.df.iloc[idx]['Pawpularity'], dtype=torch.float32)/100
        else:
            return preprocess(img)
    
    def __len__(self):
        return self.df.shape[0]
        

In [100]:
def train(model, epoch, criterian, optimizer):
    model.train()
    for i in range(epoch):
        count = 0
        for data, label in dataloader:

            optimizer.zero_grad()

            out = model(data)
            loss = criterian(out.ravel(), label)

            loss.backward()
            optimizer.step()
            count += 1

            print(f"Epoch:{i}, Data:{count}, Loss:{torch.mean(loss).item()}")


In [101]:
def submission(model):
    dataset_test = PetDataset("test", train=False)
    dataloader = DataLoader(dataset=dataset_test, batch_size=8, shuffle=False)
    
    model.eval()
    with torch.no_grad():
        for data in dataloader:
            out = model(data)
            print(out)
    

In [103]:
dataset = PetDataset("train")
dataloader = DataLoader(dataset=dataset, batch_size=1000, shuffle=True)
criterian = nn.MSELoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-3)

model = models.alexnet(pretrained=True)
model.classifier[6] = nn.Linear(in_features=4096, out_features=1)

train(model, 3, criterian, optimizer)

Epoch:0, Data:1, Loss:1.9740077257156372
Epoch:0, Data:2, Loss:2.15999698638916
Epoch:0, Data:3, Loss:2.0038576126098633
Epoch:0, Data:4, Loss:2.0254125595092773
Epoch:0, Data:5, Loss:2.101158618927002
Epoch:0, Data:6, Loss:2.0667481422424316
Epoch:0, Data:7, Loss:2.1614415645599365
Epoch:0, Data:8, Loss:1.993062973022461
Epoch:0, Data:9, Loss:1.9085583686828613
Epoch:0, Data:10, Loss:2.0212883949279785
Epoch:1, Data:1, Loss:2.0609793663024902
Epoch:1, Data:2, Loss:2.102393388748169
Epoch:1, Data:3, Loss:1.9767366647720337
Epoch:1, Data:4, Loss:2.2230677604675293
Epoch:1, Data:5, Loss:2.092487335205078
Epoch:1, Data:6, Loss:2.0556998252868652
Epoch:1, Data:7, Loss:2.1082406044006348
Epoch:1, Data:8, Loss:1.9800457954406738
Epoch:1, Data:9, Loss:2.0827395915985107
Epoch:1, Data:10, Loss:1.9886611700057983
Epoch:2, Data:1, Loss:2.0349316596984863
Epoch:2, Data:2, Loss:1.9482394456863403
Epoch:2, Data:3, Loss:2.0356531143188477
Epoch:2, Data:4, Loss:1.9739620685577393
Epoch:2, Data:5, Los