In [3]:
import os

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support

In [2]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(30, 64),
            nn.ReLU(),
            nn.Dropout(0.15),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Dropout(0.15),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Dropout(0.15),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.15),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.15),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.15),
            nn.Linear(32, 1), 
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.layers(x)

In [8]:
parent_dir = os.path.dirname(os.getcwd())

In [9]:
device = torch.device("cuda")

In [11]:
network = Network()
network = network.to(device)

In [3]:
train = pd.read_csv(os.path.join(parent_dir, 'csv_files', 'train.csv'))
device = torch.device("cuda")

train_labels = train['Target'].to_numpy()
train_data = train.drop(['Target'], axis=1).to_numpy()

# standard normalise using sklearn
scaler = StandardScaler()
scaler.fit(train_data)
train_data = scaler.transform(train_data)

# convert to tensors
train_data = torch.from_numpy(train_data).float()
train_labels = torch.from_numpy(train_labels).float().reshape(-1, 1)

# split into train and validation sets
train_data, val_data, train_labels, val_labels = train_test_split(train_data, train_labels, test_size=0.3)

# create dataset and dataloader
train_dataset = TensorDataset(train_data.to(device), train_labels.to(device))
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_dataset = TensorDataset(val_data.to(device), val_labels.to(device))
val_dataloader = DataLoader(val_dataset, batch_size=256, shuffle=True)

# utilities
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(network.parameters(), lr = 3 * 1e-4)

# train the model
epochs = 500
max_eval_epoch = 0.0

for epoch in range(epochs):
    for data, label in train_dataloader:
        optimizer.zero_grad()
        output = network(data)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
    
    # calculate validation accuracy
    correct = 0
    total = 0
    with torch.no_grad():
        for data, label in val_dataloader:
            preds = network(data)
            preds = torch.round(preds)
            correct += torch.sum(preds == label)
            total += len(label)
        
        accuracy = correct / total
        print(f'Epoch: {epoch+1}, Val Accuracy: {accuracy}')
        if accuracy > max_eval_epoch:
            max_eval_epoch = accuracy
            torch.save(network.state_dict(), 'model5.pth')
            print("Model Saved")


FileNotFoundError: [Errno 2] No such file or directory: 'train.csv'

In [37]:
# generate predictions on test dataset
test_data = pd.read_csv('test.csv')
test_data = test_data.drop(['ID'], axis=1)

test_data = test_data.to_numpy()
test_data = scaler.transform(test_data)
test_data = torch.from_numpy(test_data).float().to(device)

test_net = Network()
test_net.load_state_dict(torch.load('model5.pth'))
test_net = test_net.to(device)

preds = test_net(test_data)
preds = torch.round(preds).detach().cpu().numpy().astype(int)
preds = preds.reshape(-1)

# save predictions to csv
submission_df = pd.DataFrame({'ID': np.arange(1, len(preds) + 1), 'Target': preds})
submission_df.to_csv('submission.csv', index=False)

In [39]:
# accuracy and loss on training and validation set

network = Network()
network.load_state_dict(
    torch.load(os.path.join(parent_dir, "weights", "final_weights.pth"))
)
network = network.to(device)

train_correct = 0
train_total = 0
train_loss = 0
train_preds = []

with torch.no_grad():
    for data, label in train_dataloader:
        preds = network(data)
        preds = torch.round(preds)
        train_correct += torch.sum(preds == label)
        train_total += len(label)
        train_loss += criterion(preds, label)
        train_preds.append(preds)

train_accuracy = train_correct / train_total
train_loss = train_loss / len(train_dataloader)

val_correct = 0
val_total = 0
val_loss = 0
val_preds = []

with torch.no_grad():
    for data, label in val_dataloader:
        preds = network(data)
        preds = torch.round(preds)

        val_correct += torch.sum(preds == label)
        val_total += len(label)
        val_loss += criterion(preds, label)
        val_preds.append(preds)

val_accuracy = val_correct / val_total
val_loss = val_loss / len(val_dataloader)

print(f"Train Accuracy: {train_accuracy}, Train Loss: {train_loss}")
print(f"Val Accuracy: {val_accuracy}, Val Loss: {val_loss}")

# precision, recall, f1 score on validation set

val_preds = torch.cat(val_preds).cpu().numpy()
val_preds = val_preds.reshape(-1)
val_labels = val_labels.reshape(-1)

precision, recall, f1, _ = precision_recall_fscore_support(val_labels, val_preds)
print(f"Precision: {precision}, Recall: {recall}, F1 Score: {f1}")

Train Accuracy: 0.7471821308135986, Train Loss: 25.28178596496582
Val Accuracy: 0.7467833161354065, Val Loss: 25.32038688659668
Precision: [0.46630485 0.5298578 ], Recall: [0.46934164 0.52681818], F1 Score: [0.46781831 0.52833362]
