In [1]:
from google.colab import drive
from pathlib import Path

drive.mount('/content/drive')

CHORD_TRAINER_DIR = Path('/content/drive/MyDrive/chord_trainer')
TRAINING_DIR = CHORD_TRAINER_DIR / 'training'

Mounted at /content/drive


In [2]:
import csv
import random


all_wavs = list(TRAINING_DIR.glob('*/*.wav')) #list of audio files

rows = []

for path in all_wavs:
    folder = path.parent.name  #folder of audio file
    root, label = folder.split('_')
    label = label.lower()

    rows.append([str(path), label, root])


split_counts = {'major': 0, 'minor': 0}
splits = []

for _, label, _ in rows:
    n = split_counts[label]

    split = 'train' if n % 10 < 7 else 'val' if n % 10 < 9 else 'test' #70/20/10 split
    splits.append(split)
    split_counts[label] += 1

#write metadata to csv
metadata_path = CHORD_TRAINER_DIR / 'chord_metadata.csv'
with open(metadata_path, 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['filepath', 'label', 'root', 'split'])
    for row, split in zip(rows, splits):
        writer.writerow(row + [split])


In [3]:
import pandas as pd

df = pd.read_csv(metadata_path)
print(df['label'].value_counts())
print(df['split'].value_counts())
df.head()


label
major    1222
minor     728
Name: count, dtype: int64
split
train    1367
val       389
test      194
Name: count, dtype: int64


Unnamed: 0,filepath,label,root,split
0,/content/drive/MyDrive/chord_trainer/training/...,minor,E,train
1,/content/drive/MyDrive/chord_trainer/training/...,minor,E,train
2,/content/drive/MyDrive/chord_trainer/training/...,minor,E,train
3,/content/drive/MyDrive/chord_trainer/training/...,minor,E,train
4,/content/drive/MyDrive/chord_trainer/training/...,minor,E,train


In [4]:
import torch
import torchaudio
import torchaudio.transforms as T
import random
from torch.utils.data import Dataset

LABEL_MAP = {'major': 0, 'minor': 1}

class ChordDataset(Dataset):
    def __init__(self, csv_path, split='train', sample_rate=22050, n_mels=128, duration=2.0):
        self.df = pd.read_csv(csv_path)
        self.df = self.df[self.df['split'] == split].reset_index(drop=True)

        self.sample_rate = sample_rate
        self.n_samples = int(sample_rate * duration)
        self.n_mels = n_mels

        self.mel_transform = T.MelSpectrogram(
            sample_rate=sample_rate,
            n_fft=1024,
            hop_length=512,
            n_mels=n_mels
        )

        self.db_transform = T.AmplitudeToDB()

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        filepath = row['filepath']
        label = LABEL_MAP[row['label']]

        #load .wav into tensor
        waveform, sr = torchaudio.load(filepath)

        #stereo audio to mono by averaging channels
        if waveform.shape[0] > 1:
            waveform = waveform.mean(dim=0, keepdim=True)

        #trims waveform to 2 seconds
        if waveform.shape[1] > self.n_samples:
            waveform = waveform[:, :self.n_samples]
        else:
            pad_amt = self.n_samples - waveform.shape[1]
            waveform = torch.nn.functional.pad(waveform, (0, pad_amt))

        #convert waveform to log-mel spectrogram
        mel = self.mel_transform(waveform)
        mel_db = self.db_transform(mel)

        return mel_db, torch.tensor(label)



In [5]:
import torch.nn as nn
import torch.nn.functional as F

class ChordCNN(nn.Module):
    def __init__(self):
        super().__init__()

        #convolution layers
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2)

        #dropout layer
        self.dropout = nn.Dropout(0.3)

        self.fc1 = None
        self.fc2 = nn.Linear(64, 1)

    def forward(self, x):
        #forward through layer
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))

        #set fc1 based on spectrogram input size
        if self.fc1 is None:
            flattened_size = x.view(x.size(0), -1).shape[1]
            self.fc1 = nn.Linear(flattened_size, 64)
            self.fc1.to(x.device)

        x = x.view(x.size(0), -1)
        x = self.dropout(F.relu(self.fc1(x)))
        return torch.sigmoid(self.fc2(x)).squeeze(1)


In [6]:
#Load Data
from torch.utils.data import DataLoader

train_ds = ChordDataset(CHORD_TRAINER_DIR / 'chord_metadata.csv', split='train')
val_ds   = ChordDataset(CHORD_TRAINER_DIR / 'chord_metadata.csv', split='val')
test_ds  = ChordDataset(CHORD_TRAINER_DIR / 'chord_metadata.csv', split='test')

train_dl = DataLoader(train_ds, batch_size=16, shuffle=True)
val_dl   = DataLoader(val_ds, batch_size=16)
test_dl  = DataLoader(test_ds, batch_size=16)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #choose gpu by default

model = ChordCNN().to(device)
opt = torch.optim.Adam(model.parameters(), lr=1e-3) #Adam optimizer from pytorch
loss_fn = nn.BCELoss() #BCE loss function


In [None]:
#training loop

for epoch in range(30):
    model.train()
    total_loss = 0
    for xb, yb in train_dl:
        xb, yb = xb.to(device), yb.float().to(device)
        preds = model(xb) #get predicted probabilities
        loss = loss_fn(preds, yb) #compute loss
        loss.backward() #compute loss gradients for model weights
        opt.step() #update model weights
        opt.zero_grad() #clear gradients
        total_loss += loss.item()
    print(f"Epoch {epoch+1} - Train Loss: {total_loss / len(train_dl):.4f}")


Epoch 1 - Train Loss: 0.6635
Epoch 2 - Train Loss: 0.6278
Epoch 3 - Train Loss: 0.5944
Epoch 4 - Train Loss: 0.5698
Epoch 5 - Train Loss: 0.5482
Epoch 6 - Train Loss: 0.5180
Epoch 7 - Train Loss: 0.4955
Epoch 8 - Train Loss: 0.4806
Epoch 9 - Train Loss: 0.4705
Epoch 10 - Train Loss: 0.4567
Epoch 11 - Train Loss: 0.4419
Epoch 12 - Train Loss: 0.4358
Epoch 13 - Train Loss: 0.4244
Epoch 14 - Train Loss: 0.4216
Epoch 15 - Train Loss: 0.4211
Epoch 16 - Train Loss: 0.3889
Epoch 17 - Train Loss: 0.4084
Epoch 18 - Train Loss: 0.3958
Epoch 19 - Train Loss: 0.4041
Epoch 20 - Train Loss: 0.3802
Epoch 21 - Train Loss: 0.3913
Epoch 22 - Train Loss: 0.3659
Epoch 23 - Train Loss: 0.3561
Epoch 24 - Train Loss: 0.3774
Epoch 25 - Train Loss: 0.3601
Epoch 26 - Train Loss: 0.3432
Epoch 27 - Train Loss: 0.3657
Epoch 28 - Train Loss: 0.3608
Epoch 29 - Train Loss: 0.3538
Epoch 30 - Train Loss: 0.3501


In [None]:
#Model Evaluation

model.eval()
correct = 0
total = 0

with torch.no_grad():
    for xb, yb in test_dl:
        xb, yb = xb.to(device), yb.to(device).float()
        preds = model(xb)

        #convert probabilities into binary predictions
        pred_labels = (preds > 0.5).int()

        #count correct predictions
        correct += (pred_labels == yb.int()).sum().item()
        total += yb.size(0)

accuracy = correct / total
print(f"✅ Validation Accuracy: {accuracy:.4f}")


✅ Validation Accuracy: 0.8505
