In [2]:
import torch
import os.path

from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

## Download dataset

In [3]:
def download_mnist_datasets():
    train_data = datasets.MNIST(
        root="data", download=True, train=True, transform=ToTensor()
    )
    validation_data = datasets.MNIST(
        root="data", download=True, train=False, transform=ToTensor()
    )
    return train_data, validation_data

In [4]:
train_data, _ = download_mnist_datasets()
print("MNIST dataset downloaded")

MNIST dataset downloaded


In [4]:
train_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()

## Make dataloader

In [5]:
BATCH_SIZE = 128
train_data_loader = DataLoader(train_data, batch_size = BATCH_SIZE)

## Make network

In [6]:
# FeedForwardNet inherits from nn.Module
class FeedForwardNet(nn.Module):
    
    # Constructor for defining the different layers
    def __init__(self):
        # Super allows you to call functions from nn.Module directly due to inheritance
        super().__init__()
        
        # Reshape into 1D tensor
        self.flatten = nn.Flatten()

        # Dense layers
        self.dense_layers = nn.Sequential(
            nn.Linear(
                28 * 28, 256
            ),  # 28*28 inputs (from image) and 256 outputs (neurons)
            nn.ReLU(),
            nn.Linear(256, 10),  # 256 inputs and 10 outputs (MNIST has 10 classes)
        )

        # Normalizes output from 0 to 1 (probability of it being that class)
        self.softmax = nn.Softmax(dim=1)  # dim = 1 -> 0 to 1

    # Specifies data flow/forward pass
    def forward(self, input_data):
        flattened_data = self.flatten(input_data)
        logits = self.dense_layers(flattened_data)
        predictions = self.softmax(logits)
        return predictions

In [7]:
# Checks if GPU (cuda) is available and use it. If not, use CPU
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
print(f"Using {device} device")

feed_forward_net = FeedForwardNet().to(device)

Using cuda device


In [8]:
def train_one_epoch(model, data_loader, loss_fn, optimizer, device):
    for inputs, targets in data_loader:
        inputs, targets = inputs.to(device), targets.to(device)

        # Calculate loss
        predictions = model(inputs)
        loss = loss_fn(predictions, targets)

        # Backpropagate loss and update weights
        optimizer.zero_grad()  # Reset gradients for each batch
        loss.backward()  # Backprop
        optimiser.step()  # Update weights

    print(f"Loss: {loss.item()}")


def train(model, data_loader, loss_fn, optimizer, device, epochs):
    for i in range(epochs):
        print(f"Epoch {i+1}")
        train_one_epoch(model, data_loader, loss_fn, optimizer, device)
        print("-----------------")
    print("Training is done")

In [9]:
# Instantiate loss function + optimiser
LEARNING_RATE = 0.001
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(feed_forward_net.parameters(), lr=LEARNING_RATE)

In [10]:
EPOCHS = 10

# If model state dict already exists, load it into the model
if os.path.isfile("feedforwardnet.pth") == True:
    state_dict = torch.load("feedforwardnet.pth")
    feed_forward_net.load_state_dict(state_dict)
else:
    train(feed_forward_net, train_data_loader, loss_fn, optimizer, device, EPOCHS)
    # Store model
    torch.save(feed_forward_net.state_dict(), "feedforwardnet.pth")
    print("Model trained and stored at feedforwardnet.pth")

## Making predictions

In [11]:
class_mapping = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]

In [12]:
def predict(model, input_, target, class_mapping):
    model.eval() # For evaluation and inference, turns off certain layers like batch_normalization and dropout
    # model.train() # Turns back on layers offed by eval (As we are only looking at inference, don't need this)
    
    # no need to calculate gradients as we are doing inference
    with torch.no_grad():
        predictions = model(input_.to(device)) # Tensor (1,10) -> [[0.1,0.1,...,0.6]] sum = 1 due to softmax
        predicted_index = predictions[0].argmax(0) # Prediction for example [0]
        predicted = class_mapping[predicted_index]
        expected = class_mapping[target]
        
    return predicted, expected

In [13]:
_, validation_data = download_mnist_datasets()

# Get sample from validation set
# Input stored at [0][0] and output stored at [0][1] for example in [0]
input_, target = (
    validation_data[0][0],
    validation_data[0][1],
)

# Make an inference
predicted, expected = predict(feed_forward_net, input_, target, class_mapping)
print(f"Predicted: '{predicted}', Expected: '{expected}'")

Predicted: '7', Expected: '7'


# Creating a data loader with Urban8K

In [1]:
from torch.utils.data import Dataset

import pandas as pd
import torchaudio
import os

In [None]:
class UrbanSoundDataset(Dataset):
    
    def __init__(self, annotations_file, audio_dir):
        self.annotations = pd.read_csv(annotations_file)
        self.audio_dir = audio_dir
    
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, index):
        audio_sample_path = self._get_audio_sample_path(index)
        label = self._get_audio_sample_label(index)
        signal, sr = torchaudio.load(audio_sample_path)
        return signal, label
    
    # single underscore methods for internal use
    def _get_audio_sample_path(self, index):
        fold = f"fold{self.annotations.iloc[index, 5]}" # see csv for locations
        file_name = self.annotations.iloc[index, 0]
        path = os.path.join(self.audio_dir, fold, file_name)
        return path
    
    def _get_audio_sample_label(self,index):
        return self.anotations.iloc[index, 6] # see csv for locations