In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

path = "/kaggle/input/col774-2022"

In [4]:
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [5]:
train_x = pd.read_csv(path + "/train_x.csv")
train_y = pd.read_csv(path + "/train_y.csv")

test_x = pd.read_csv(path + "/non_comp_test_x.csv")
test_y = pd.read_csv(path + "/non_comp_test_y.csv")

image_names = train_x['Cover_image_name']
labels = (train_y['Genre'])

image_names_test = test_x['Cover_image_name']
labels_test = (test_y['Genre'])

In [6]:
transform = transforms.Compose([transforms.PILToTensor()])
def convert_to_tensor(image_name):
    image_path = path + "/images/images/" + image_name
    img = Image.open(image_path)
    img_tensor = transform(img)
    img_tensor = img_tensor.unsqueeze(0)
    return (img_tensor/255)

In [10]:
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 32, 5)
        
        self.conv2 = nn.Conv2d(32, 64, 5)
        
        self.conv3 = nn.Conv2d(64, 128, 5)
        
        self.fc1 = nn.Linear(128*24*24,128)
        self.fc2 = nn.Linear(128, 30)
        
    def forward(self, inp):
        
        inp = F.max_pool2d(F.relu(self.conv1(inp)), (2,2))
        
        inp = F.max_pool2d(F.relu(self.conv2(inp)), (2,2))
        
        inp = F.max_pool2d(F.relu(self.conv3(inp)), (2,2))
        
        inp = torch.flatten(inp, 1)
        
        inp = F.relu(self.fc1(inp))
        input_final = self.fc2(inp)
        
        return input_final
        


net = Net()
net.to(device)

loss_fn = nn.CrossEntropyLoss()
epochs = 5
learning_rate = 0.001

optimizer = optim.SGD(net.parameters(), lr = learning_rate)
net.zero_grad()

In [11]:
def accuracy(inputs, outputs):
    correct = 0
    
    for j in range(len(inputs)):
    
        data_point = convert_to_tensor(inputs[j]).to(device)
        pred_list = net(data_point)[0]
        
        max_ind = torch.argmax(pred_list)
       
        if(max_ind == outputs[j]):
            correct += 1
            
    return (correct/len(outputs))

In [12]:
for i in range(epochs):
    
    for j in range(len(image_names)):
        data_point = convert_to_tensor(image_names[j]).to(device)
        
        optimizer.zero_grad()
        output = net(data_point).to(device)
        target = torch.tensor([labels[j]]).to(device)
    
        loss = loss_fn(output, target)
        
        loss.backward()
        optimizer.step()
print(f"Training accuracy after {epochs} epochs is: {accuracy(image_names, labels)}")
print(f"Test accuracy after {epochs} epochs is: {accuracy(image_names_test, labels_test)}")

Training accuracy after 5 epochs is: 0.26941520467836255
Test accuracy after 5 epochs is: 0.1368421052631579


In [14]:
def output_file_generator(image_names_test):
    total_samples = len(image_names_test)
    prediction_output = []
    id_value = []
    for i in range(total_samples):
        data_point = convert_to_tensor(image_names_test[i]).to(device)
        pred_list = net(data_point)[0]
        max_ind = torch.argmax(pred_list)
        id_value.append(i)
        prediction_output.append(max_ind.item())
    dictCsv = {'Id': id_value, 'Genre': prediction_output}
    df1 = pd.DataFrame(dictCsv)
    df1.to_csv('non_comp_test_pred_y.csv',index = False)
output_file_generator(image_names_test)        