In [50]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import os
import random
import time

import matplotlib.pyplot as plt # for plotting
import torch.optim as optim #for gradient descent
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import Dataset

from torch.utils.data import DataLoader
from torch.utils.data import sampler

# For plotting
import matplotlib.pyplot as plt
import numpy as np
import time

import pandas as pd
from sklearn.model_selection import train_test_split

In [136]:
def get_model_name(batch_size, learning_rate, epoch):
    """ Generate a name for the model consisting of all the hyperparameter values

    Args:
        config: Configuration object containing the hyperparameters
    Returns:
        path: A string with the hyperparameter name and value concatenated
    """
    path = "model_epochs/densenetmodel_bs{0}_lr{1}_epoch{2}".format(
                                                   batch_size,
                                                   learning_rate,
                                                   epoch)
    return path

def evaluate(net, loader, criterion, device):
    """ Evaluate the network on the validation set.

    Args:
        net: PyTorch neural network object
        loader: PyTorch data loader for the validation set
        criterion: The loss function
    Returns:
        err: A scalar for the avg classification error over the validation set
        loss: A scalar for the average loss function over the validation set
    """
    total_loss = 0.0
    total_err = 0.0
    total_samples = 0

    net.eval()  # Set the network to evaluation mode
    
    with torch.no_grad():  # Disable gradient computation
        for data in loader:
            # Input and labels
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = net(inputs)
            loss = criterion(outputs, labels.float())
            
            # Compute if the output matches the label
            preds = (outputs > 0.0).squeeze().long()  # Binary classification: outputs > 0.0
            incorrect = preds != labels
            total_err += incorrect.sum().item()
            total_loss += loss.item() * len(labels)
            total_samples += len(labels)
    
    err = total_err / total_samples
    loss = total_loss / total_samples
    return err, loss






def plot_training_curve(path):
    """ Plots the training curve for a model run, given the csv files
    containing the train/validation error/loss.

    Args:
        path: The base path of the csv files produced during training
    """
    train_err = np.loadtxt("{}_train_err.csv".format(path))
    val_err = np.loadtxt("{}_val_err.csv".format(path))
    train_loss = np.loadtxt("{}_train_loss.csv".format(path))
    val_loss = np.loadtxt("{}_val_loss.csv".format(path))
    plt.title("Train vs Validation Error")
    n = len(train_err) # number of epochs
    plt.plot(range(1,n+1), train_err, label="Train")
    plt.plot(range(1,n+1), val_err, label="Validation")
    plt.xlabel("Epoch")
    plt.ylabel("Error")
    plt.legend(loc='best')
    plt.show()
    plt.title("Train vs Validation Loss")
    plt.plot(range(1,n+1), train_loss, label="Train")
    plt.plot(range(1,n+1), val_loss, label="Validation")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend(loc='best')
    plt.show()

In [137]:
class Data(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        features = row.drop('catch').values.astype(float)
        target = row['catch']
        if isinstance(target, str):
            target = 1.0 if target == 'positive_label' else 0.0 
        target = torch.tensor(target, dtype=torch.float32).unsqueeze(0)  
        return torch.tensor(features, dtype=torch.float32), target

def data_loader(file_name, data_percentage=0.4, batch_size=32):
    # Add columns of csv file to the following list
    header = ['distance','time', 'catch']
    
    # Load in data
    df = pd.read_csv(
        file_name,
        names=header,
        index_col=False, 
        skiprows = 1)
    
    df_train, df_valtest = train_test_split(df, test_size=0.3, random_state=50)
    df_val, df_test = train_test_split(df_valtest, test_size=0.33, random_state=50)
    
    train_data = Data(df_train)
    val_data = Data(df_val)
    test_data = Data(df_test)
    
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)


    return train_loader, val_loader, test_loader

    # Previous loading trials, not sure if working yet
    '''
    # Converts data to numpy array
    datanp = df.values.astype(np.float32)
    
    # Data to be used for training
    df = df[:int(data_percentage * len(datanp))]
    
    # set the numpy seed for reproducibility
    # https://docs.scipy.org/doc/numpy/reference/generated/numpy.random.seed.html
    np.random.seed(50)
    np.random.shuffle(datanp)

    # Get 70% training data and labels
    train_data = datanp[:int(0.7 * len(datanp)), :-1]
    train_labels = datanp[:int(0.7 * len(datanp)), -1]

    # Get 20% validation data and labels
    val_data = datanp[int(0.7 * len(datanp)):int(0.9 * len(datanp)), :-1]
    val_labels = datanp[int(0.7 * len(datanp)):int(0.9 * len(datanp)), -1]

    # Get 10% testing data and labels
    test_data = datanp[int(0.9 * len(datanp)):, :-1]
    test_labels = datanp[int(0.9 * len(datanp)):, -1]
    
    # Generate loaders
    train_loader = torch.utils.data.DataLoader(train_data, batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size, shuffle=True)
    
    return train_loader, val_loader, test_loader
    '''

In [138]:
class catchProb(nn.Module):
    def __init__(self, num_features, num_classes):
        super(catchProb, self).__init__()
        self.num_features = num_features
        self.name = "catchProb"
        self.fc1 = nn.Linear(num_features, 32)
        self.bn1 = nn.BatchNorm1d(32)
        self.fc2 = nn.Linear(32, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 32)
        self.bn3 = nn.BatchNorm1d(32)
        self.fc4 = nn.Linear(32, 4)
        self.bn4 = nn.BatchNorm1d(4)
        self.fc5 = nn.Linear(4, num_classes)
    
    
    def forward(self, x):
        
        x = self.fc1(x)
        x = F.relu(self.bn1(x))
        
        
        x = self.fc2(x)
        x = F.relu(self.bn2(x))
        
        x = self.fc3(x)
        x = F.relu(self.bn3(x))
        
        x = self.fc4(x)
        x = F.relu(self.bn4(x))
        
        x = self.fc5(x)
        return x

In [155]:
def train(net, device, train_loader, valid_loader, test_loader, batch_size=32, learning_rate=0.001, num_epochs=10):
    net.to(device)
    net.train()

    # # Freeze the weights for training
    # for params in net.parameters():
    #     params.requires_grad = False

    # Attaching classifier to the pretrained model
    num_features = 2
    num_classes = 1
    custom_classifier = catchProb(num_features, num_classes).to(device)
    net.classifier = custom_classifier.to(device)

    criterion = nn.BCEWithLogitsLoss()  # For binary classification
    # optimizer = optim.AdamW(net.parameters(), learning_rate)
    decay_rate = 0.9
    optimizer = optim.AdamW(net.parameters(), lr=learning_rate, betas=(decay_rate, 0.999))

    # optimizer = optim.SGD(net.parameters(), lr=0.001)
    # optimizer = optim.RMSprop(net.parameters(), lr=learning_rate)


    train_error = np.zeros(num_epochs)
    train_loss = np.zeros(num_epochs)
    valid_error = np.zeros(num_epochs)
    valid_loss = np.zeros(num_epochs)

    start_time = time.time()
    for epoch in range(num_epochs):
        total_train_loss = 0.0
        total_train_error = 0.0
        total_epoch = 0
        for i, data in enumerate(train_loader):

            # Input and labels
            inputs, labels = data
        
            
            # # Move tensors to the specified device
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            # Zero the parameter gradients
            optimizer.zero_grad()


            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()  # Backward pass
            optimizer.step()  # Update parameters

            # Compute if the output matches the label
            corr = (outputs > 0.0).squeeze().long() != labels

            # Stats
            total_train_error += int(corr.sum())
            total_train_loss += loss.item()
            total_epoch += len(labels)

        # Training error and loss
        train_error[epoch] = float(total_train_error) / total_epoch
        train_loss[epoch] = float(total_train_loss) / len(train_loader)

        # Run validation tests
        valid_error[epoch], valid_loss[epoch] = evaluate(net, valid_loader, criterion, device)
            # Print results
        print(("Epoch {}: Train err: {}, Train loss: {} |" +
               "Validation err: {}, Validation loss: {}").format(
            epoch + 1,
            train_error[epoch],
            train_loss[epoch],
            valid_error[epoch],
            valid_loss[epoch]))
        model_path = get_model_name(batch_size, learning_rate, epoch)

        # Save the model
        model_scripted = torch.jit.script(net)
        model_scripted.save(f"{model_path}.pt")

    print("Finished Training")
    end_time = time.time()
    elapsed_time = end_time - start_time
    print("Total time elapsed: {:.2f} seconds".format(elapsed_time))

    test_er, test_loss = evaluate(net, test_loader, criterion, device)
    print('Test Accuracy: ' + str(test_er) + '  ' + "Test Loss:" + str(test_loss))

    # Write the train/test loss/err into CSV file for plotting later
    epochs = np.arange(1, num_epochs + 1)
    np.savetxt("{}_train_err.csv".format(model_path), train_error)
    np.savetxt("{}_train_loss.csv".format(model_path), train_loss)
    np.savetxt("{}_val_err.csv".format(model_path), valid_error)
    np.savetxt("{}_val_loss.csv".format(model_path), valid_loss)

Models

In [156]:
# Prepare device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Instantiate model
num_features = 2
num_classes =1
net = catchProb(num_features, num_classes).to(device)

# Load data
train_loader, valid_loader, test_loader = data_loader('modelData_1.csv')

# Train model
train(net, device, train_loader, valid_loader, test_loader, batch_size=32, learning_rate=0.001, num_epochs=10)


Epoch 1: Train err: 9.076380409713742, Train loss: 0.4264822193296441 |Validation err: 10.826086956521738, Validation loss: 0.35823717735890853
Epoch 2: Train err: 9.282593949260615, Train loss: 0.14564196803018417 |Validation err: 8.426275992438564, Validation loss: 0.11164369287641829
Epoch 3: Train err: 9.307149640482974, Train loss: 0.1332394548518446 |Validation err: 8.69187145557656, Validation loss: 0.1132037991443068
Epoch 4: Train err: 9.305114638447971, Train loss: 0.1045572332356496 |Validation err: 10.331758034026466, Validation loss: 0.137254816766948


KeyboardInterrupt: 