In [1]:
import torch
from torch import nn
import nbimporter
import torchaudio
from torch.utils.data import DataLoader


In [2]:
from vvLec4 import UrbanSoundDataset
from vvLec4_cnn import CNNNetwork

In [3]:
BATCH_SIZE = 256
EPOCHS = 10
LEARNING_RATE = 0.001


ANNOTATIONS_FILE = "UrbanSound8K/metadata/UrbanSound8K.csv"
AUDIO_DIR = "UrbanSound8K/audio/"
SAMPLE_RATE = 22050
NUM_SAMPLES = 22050

In [4]:
# Data Loader: Class that we can use to wrap a dataset that will allow us to fetch data in batches

# Creating a data-loader
def create_data_loader(train_data, batch_size):
    train_data_loader = DataLoader(train_data, batch_size=batch_size)
    return train_data_loader


In [5]:
def train_one_epoch(model, data_loader, loss_fn, optimiser, device):
    for inputs, targets in data_loader:
        inputs, targets = inputs.to(device), targets.to(device)    #need to do this for Pytorch
        
        # Calculate loss
        predictions = model(inputs)
        loss = loss_fn(predictions, targets)
        
        
        # Backpropagate loss and update weights
        optimiser.zero_grad()    # reset the gradients to zero 
        loss.backward()  #apply backpropagation
        optimiser.step()  # updating weights
        
    print(f"Loss: {loss.item()}")

def train(model, data_loader, loss_function, optimiser, device, epochs):
    for i in range(epochs):
        print(f"Epoch {i+1}")
        train_one_epoch(model, data_loader, loss_fn, optimiser, device)
        print("----------------------------")
    print("Training is done")    
    

In [7]:
if __name__ == "__main__":
    
    if torch.cuda.is_available():
        device = "cuda"
    else:
        device = "cpu"
    print(f"Using device {device}")
    
    #instantiating our dataset object
    mel_spectrogram = torchaudio.transforms.MelSpectrogram(
        sample_rate = SAMPLE_RATE,
        n_fft=1024,
        hop_length=512,
        n_mels=64
    )

    usd = UrbanSoundDataset(ANNOTATIONS_FILE, 
                            AUDIO_DIR, 
                            mel_spectrogram, 
                            SAMPLE_RATE,
                            NUM_SAMPLES,
                            device)    
    

    train_data_loader = create_data_loader(usd, BATCH_SIZE)
    cnn = CNNNetwork().to(device)
    print(cnn)

    # instantiate loss function and optimiser
    loss_fn = nn.CrossEntropyLoss()
    optimiser = torch.optim.Adam(cnn.parameters(),
                                 lr=LEARNING_RATE)

    train(cnn, train_data_loader, loss_fn, optimiser, device, EPOCHS)


Using device cuda
CNNNetwork(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv4): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear): Linear(in_features=2560, out_features=10, bias=True)
  (softmax): Softmax(dim=1)
)
Epoch 1
Loss: 2.39358

In [10]:
torch.save(cnn.state_dict(), "cnnnet.pth")
print("Model trained and stored at cnnnet.pth")

Model trained and stored at cnnnet.pth
