## Import Library

In [None]:
import pandas as pd
from google.colab import drive
import os
import tarfile
import zipfile
import glob
from tqdm import tqdm  
import librosa
import random

## Dataset Load

In [None]:

# Load all three CSVs
csv1 = pd.read_csv('/content/data_test_4_classes(1).csv')  # adjust sep if not tab
csv2 = pd.read_csv('/content/data_train_4_classes(1).csv')
csv3 = pd.read_csv('/content/data_val_4_classes.csv')

# Concatenate into one DataFrame
df = pd.concat([csv1, csv2, csv3], ignore_index=True)

# Convert 'meter' to integer if it's not already
df['meter'] = df['meter'].astype(int)

# Filter for meters of interest
target_meters = [3, 4, 5, 7]
filtered = df[df['meter'].isin(target_meters)]

# Count occurrences of each meter
count_by_meter = filtered['meter'].value_counts().sort_index()

print(count_by_meter)


meter
3    1200
4    1200
5     200
7     200
Name: count, dtype: int64


## Import Drive

In [None]:
drive.mount('/content/drive')


Mounted at /content/drive


## Extract tar & zip files

In [None]:


dataset_path = "/content/drive/MyDrive/dl_dataset"

extract_path = "/content/meter_2800/"
os.makedirs(extract_path, exist_ok=True)

for file in ["FMA.tar.gz", "MAG.tar.gz", "OWN.tar.gz"]:
    file_path = os.path.join(dataset_path, file)
    if os.path.exists(file_path):
        with tarfile.open(file_path, "r:gz") as tar:
            tar.extractall(extract_path)
            print(f"Extracted {file}")

zip_file_path = os.path.join(dataset_path, "dl_data.zip")

if os.path.exists(zip_file_path):
    try:
        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            zip_ref.extractall(extract_path)
            print("Successfully extracted: dl_data.zip")
    except zipfile.BadZipFile:
        print(" Error: dl_data.zip is not a valid zip file!")
else:
    print(" File not found: dl_data.zip")

Extracted FMA.tar.gz
Extracted MAG.tar.gz
Extracted OWN.tar.gz
Successfully extracted: dl_data.zip


In [6]:
pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


## Converting MP3 files to WAV files

In [None]:


# Paths
input_dir = "/content/meter_2800/"  # Root folder containing FMA, MAG, OWN
output_dir = "/content/meter_2800_wav/"  # Converted WAV files

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

def convert_mp3_to_wav(mp3_path, output_dir):
    """Convert MP3 to WAV and save in output_dir"""
    filename = os.path.basename(mp3_path).replace(".mp3", ".wav")  # Change extension
    wav_path = os.path.join(output_dir, filename)  # Save in output_dir
    audio = AudioSegment.from_mp3(mp3_path)
    audio.export(wav_path, format="wav")
    return wav_path

# 🔍 Find all MP3 files inside FMA, MAG, OWN (Recursive Search)
mp3_files = glob.glob(os.path.join(input_dir, "**", "*.mp3"), recursive=True)

# 🎵 Convert all MP3 files to WAV
wav_files = [convert_mp3_to_wav(mp3_file, output_dir) for mp3_file in tqdm(mp3_files, desc="Converting MP3 to WAV")]

print(" All MP3 files converted to WAV!")


Converting MP3 to WAV: 100%|██████████| 1882/1882 [09:27<00:00,  3.31it/s]

 All MP3 files converted to WAV!





In [None]:
pip install audiomentations

Collecting audiomentations
  Downloading audiomentations-0.40.0-py3-none-any.whl.metadata (11 kB)
Collecting numpy<2,>=1.22.0 (from audiomentations)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting numpy-minmax<1,>=0.3.0 (from audiomentations)
  Downloading numpy_minmax-0.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Collecting numpy-rms<1,>=0.4.2 (from audiomentations)
  Downloading numpy_rms-0.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.5 kB)
Collecting librosa!=0.10.0,<0.11.0,>=0.8.0 (from audiomentations)
  Downloading librosa-0.10.2.post1-py3-none-any.whl.metadata (8.6 kB)
Collecting python-stretch<1,>=0.3.1 (from audiomentations)
  Downloading python_

In [None]:
!pip uninstall -y numpy


Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4


In [None]:
!pip install numpy==1.23.5


Collecting numpy==1.23.5
  Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB)
Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31m61.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
bigframes 2.1.0 requires numpy>=1.24.0, but you have numpy 1.23.5 which is incompatible.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.23.5 which is incompatible.
jax 0.5.2 requires numpy>=1.25, but you have numpy 1.23.5 which is incompatible.
blosc2 3.3.1 requires numpy>=1.26, but you have numpy 1.23.5 which is incompatible.
treescope 0.1.9 requires numpy>=1.25.2, but you have numpy 1.23.5 which is incompatible.
p

## Augmentation of Data for class 5 and 7

In [None]:

import soundfile as sf
from scipy.signal import butter, lfilter
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Gain, PolarityInversion

# === Load Multiple Meter CSVs ===
csv_files = [
    '/content/data_train_4_classes(1).csv',
    '/content/data_val_4_classes.csv',
    '/content/data_test_4_classes(1).csv'
]
meter_df_list = [pd.read_csv(csv) for csv in csv_files]
meter_df = pd.concat(meter_df_list, ignore_index=True)

# Ensure the meter column is numeric
meter_df['meter_num'] = meter_df['meter']

# Create mapping from filename (basename) to meter
filename_to_meter = dict(zip(meter_df['filename'].apply(os.path.basename), meter_df['meter_num']))

# Target augment counts
target_total = 1200
target_meters = [5, 7]

# Count current meter distribution
meter_counts = meter_df['meter_num'].value_counts().to_dict()
aug_counts = {5: meter_counts.get(5, 0), 7: meter_counts.get(7, 0)}

# Input and output directories
input_dirs = [
    "/content/meter_2800_wav/",
    "/content/meter_2800/"
]
output_dir = "/content/drive/My Drive/Meter_2800_Augmented_1/"
os.makedirs(output_dir, exist_ok=True)

# Print original file counts per meter
original_meter_counts = {5: 0, 7: 0}
for input_dir in input_dirs:
    for file in glob.glob(os.path.join(input_dir, "**", "*.wav"), recursive=True):
        filename = os.path.basename(file)
        meter = filename_to_meter.get(filename)
        if meter in [5, 7]:
            original_meter_counts[meter] += 1

print("📊 Original file counts per meter:", original_meter_counts)

# === Augmentation pipeline ===
augment = Compose([
    AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
    TimeStretch(min_rate=0.8, max_rate=1.2, p=0.5),
    PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
    Gain(min_gain_db=-6, max_gain_db=6, p=0.5),
    PolarityInversion(p=0.3)
])

def low_pass_filter(y, sr, cutoff=3000):
    nyquist = 0.5 * sr
    normal_cutoff = cutoff / nyquist
    b, a = butter(5, normal_cutoff, btype="low", analog=False)
    return lfilter(b, a, y)

def apply_augmentation(y, sr):
    y = augment(samples=y, sample_rate=sr)
    if random.random() < 0.5:
        y = low_pass_filter(y, sr)
    return y

# === Find all WAV files ===
wav_files = []
for input_dir in input_dirs:
    wav_files.extend(glob.glob(os.path.join(input_dir, "**", "*.wav"), recursive=True))

# === Augment files ===
for file_path in tqdm(wav_files, desc="Processing Files"):
    filename = os.path.basename(file_path)
    meter = filename_to_meter.get(filename)

    if meter not in target_meters:
        continue

    needed = target_total - aug_counts[meter]
    if needed <= 0:
        continue

    try:
        y, sr = librosa.load(file_path, sr=None)
        num_to_generate = min(needed, 10)  # Max 10 per file

        for _ in range(num_to_generate):
            y_aug = apply_augmentation(y, sr)
            new_filename = f"aug_{meter}_{aug_counts[meter]}_{filename}"
            sf.write(os.path.join(output_dir, new_filename), y_aug, sr)
            aug_counts[meter] += 1

            if aug_counts[meter] >= target_total:
                break

    except Exception as e:
        print(f" Error processing {filename}: {e}")

print("\n Augmentation completed!")
print(" Final augmented counts:", aug_counts)


📊 Original file counts per meter: {5: 193, 7: 198}


Processing Files: 100%|██████████| 2873/2873 [24:15<00:00,  1.97it/s]


🎉 Augmentation completed!
✅ Final augmented counts: {5: 1200, 7: 1200}





## Feature Extraction

In [None]:


# === Paths ===
augmented_dir = "/content/drive/My Drive/Meter_2800_Augmented_1/"
original_dirs = [
    "/content/meter_2800_wav/",
    "/content/meter_2800/"
]
output_dir = "/content/drive/My Drive/Meter_2800_Features/"
output_csv = os.path.join(output_dir, "mel_metadata.csv")
os.makedirs(output_dir, exist_ok=True)

# === Load Time Signature Labels ===
csv_files = [
    "/content/data_test_4_classes(1).csv",
    "/content/data_train_4_classes(1).csv",
    "/content/data_val_4_classes.csv",
]

time_signature_map = {}
meter_3_and_4_files = set()

for csv_file in csv_files:
    try:
        df = pd.read_csv(csv_file)
        for _, row in df.iterrows():
            filename = os.path.basename(row["filename"])
            meter = int(row["meter"])
            time_signature_map[filename] = f"{meter}/4"

            # Track files with meter 3 and 4
            if meter in [3, 4]:
                meter_3_and_4_files.add(filename)
    except Exception as e:
        print(f" Error reading {csv_file}: {e}")

# === Gather All Files ===
all_files = []

# 1. Augmented files (all)
augmented_files = glob.glob(os.path.join(augmented_dir, "*.wav"))
all_files.extend(augmented_files)

# 2. Original files with meter 3 or 4 only
for input_dir in original_dirs:
    for file_path in glob.glob(os.path.join(input_dir, "**", "*.wav"), recursive=True):
        filename = os.path.basename(file_path)
        if filename in meter_3_and_4_files:
            all_files.append(file_path)

print(f" Total files to extract features from: {len(all_files)}")

# === Feature Extraction ===
metadata = []

for file_path in tqdm(all_files, desc="Extracting Mel Spectrograms"):
    try:
        y, sr = librosa.load(file_path, sr=None)

        # === Mel Spectrogram ===
        mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
        log_mel = librosa.power_to_db(mel, ref=np.max)
        log_mel = (log_mel - log_mel.min()) / (log_mel.max() - log_mel.min() + 1e-6)

        # === Save Feature ===
        base_filename = os.path.basename(file_path).replace(".wav", "")
        np.save(os.path.join(output_dir, f"{base_filename}_mel.npy"), log_mel)

        # === Meter Detection ===
        filename = os.path.basename(file_path)

        if filename.startswith("aug_"):
            # Extract original filename from augmented name
            original_filename = filename.split("_")[-1]  # e.g. '00076.wav'
        else:
            original_filename = filename

        meter = time_signature_map.get(original_filename, "unknown")

        # === Metadata Entry ===
        metadata.append([
            filename, meter,
            log_mel.shape[0], log_mel.shape[1]
        ])

    except Exception as e:
        print(f" Error processing {file_path}: {e}")

# === Save Metadata CSV ===
df = pd.DataFrame(metadata, columns=[
    "filename", "meter", "mel_n_mels", "mel_n_frames"
])
df.to_csv(output_csv, index=False)

print(f"\n Extracted mel features for {len(df)} files.")
print(f" Features saved to: {output_dir}")
print(f" Metadata CSV saved to: {output_csv}")


 Total files to extract features from: 4397


Extracting Mel Spectrograms: 100%|██████████| 4397/4397 [15:18<00:00,  4.79it/s]


 Extracted mel features for 4397 files.
 Features saved to: /content/drive/My Drive/Meter_2800_Features/
 Metadata CSV saved to: /content/drive/My Drive/Meter_2800_Features/mel_metadata.csv





## ResNet Model

In [2]:
import os
import gc
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import GradScaler, autocast
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from tqdm import tqdm
from torchvision.models import resnet18
# ===== Dataset Class with Caching =====
class CachedMelSpectrogramDataset(Dataset):
    def __init__(self, metadata_df, feature_dir):
        self.metadata_df = metadata_df.reset_index(drop=True)
        self.feature_dir = feature_dir
        self.label_map = {"3/4": 0, "4/4": 1, "5/4": 2, "7/4": 3}
        self.cache = self._load_all()

    def _load_all(self):
        cache = []
        for i in range(len(self.metadata_df)):
            row = self.metadata_df.iloc[i]
            fname = row['filename'].replace(".wav", "")
            mel_file = os.path.join(self.feature_dir, f"{fname}_mel.npy")

            try:
                mel = np.load(mel_file)
            except Exception as e:
                print(f"Error loading {mel_file}: {e}")
                continue

            mel = torch.from_numpy(mel).float().unsqueeze(0)
            T_max = 1024
            if mel.shape[2] < T_max:
                mel = F.pad(mel, (0, T_max - mel.shape[2]))
            else:
                mel = mel[:, :, :T_max]

            label = self.label_map.get(row['meter'], -1)
            if label == -1:
                continue
            cache.append((mel, torch.tensor(label)))
        return cache

    def __len__(self):
        return len(self.cache)

    def __getitem__(self, idx):
        return self.cache[idx]

def mel_collate_fn(batch):
    mel_list, label_list = zip(*batch)
    return torch.stack(mel_list), torch.tensor(label_list, dtype=torch.long)

# ===== ResNet Model =====
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, 1, stride=stride),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out)) + self.shortcut(x)
        return F.relu(out)

class ResNetFeatureExtractor(nn.Module):
    def __init__(self):
        super().__init__()
        self.resnet = nn.Sequential(
            BasicBlock(1, 32, stride=2),
            BasicBlock(32, 64, stride=2),
            nn.AdaptiveAvgPool2d((1, 1))
        )

    def forward(self, x):
        return self.resnet(x).view(x.size(0), -1)


class ResNetClassifier(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.resnet = resnet18(pretrained=False)
        self.resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_classes)

    def forward(self, x):
        return self.resnet(x)
# ===== Training and Evaluation =====
def plot_metrics(train_losses, val_losses, train_accuracies, val_accuracies, test_loss, test_acc, val_conf_matrices, test_conf_matrix, output_dir):
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Val Loss')
    plt.title('Loss Over Epochs')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(train_accuracies, label='Train Acc')
    plt.plot(val_accuracies, label='Val Acc')
    plt.title('Accuracy Over Epochs')
    plt.legend()

    plt.savefig(os.path.join(output_dir, 'metrics_plot.png'))
    plt.close()

    for i, cm in enumerate(val_conf_matrices):
        disp = ConfusionMatrixDisplay(confusion_matrix=cm)
        disp.plot()
        plt.title(f"Validation Confusion Matrix Epoch {i+1}")
        plt.savefig(os.path.join(output_dir, f"val_conf_matrix_epoch_{i+1}.png"))
        plt.close()

    disp = ConfusionMatrixDisplay(confusion_matrix=test_conf_matrix)
    disp.plot()
    plt.title("Test Confusion Matrix")
    plt.savefig(os.path.join(output_dir, "test_conf_matrix.png"))
    plt.close()

def evaluate(model, dataloader, criterion, device):
    model.eval()
    correct = total = 0
    running_loss = 0.0
    all_preds, all_labels = [], []
    with torch.no_grad():
        for mel, label in dataloader:
            mel, label = mel.to(device), label.to(device)
            output = model(mel)
            loss = criterion(output, label)
            running_loss += loss.item()
            _, pred = torch.max(output, 1)
            correct += (pred == label).sum().item()
            total += label.size(0)
            all_preds.extend(pred.cpu().numpy())
            all_labels.extend(label.cpu().numpy())
    accuracy = correct / total
    cm = confusion_matrix(all_labels, all_preds)
    return running_loss / len(dataloader), accuracy, cm

def train_one_epoch(model, dataloader, optimizer, criterion, scaler, device):
    model.train()
    running_loss = 0.0
    correct, total = 0, 0
    for mel, label in tqdm(dataloader, desc="Training", leave=False):
        mel, label = mel.to(device), label.to(device)
        optimizer.zero_grad()
        with autocast():
            output = model(mel)
            loss = criterion(output, label)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        running_loss += loss.item()
        _, pred = torch.max(output, 1)
        correct += (pred == label).sum().item()
        total += label.size(0)
    accuracy = correct / total
    return running_loss / len(dataloader), accuracy

# ===== Training Loop with Chunking =====
def train_on_chunks_fixed_split(metadata_path, feature_dir, output_dir, num_chunks=5, epochs_per_chunk=5, batch_size=32):
    df = pd.read_csv(metadata_path)
    df = df[df['meter'].isin(["3/4", "4/4", "5/4", "7/4"])].reset_index(drop=True)
    train_df, temp_df = train_test_split(df, test_size=0.2, stratify=df['meter'], random_state=42)
    val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df['meter'], random_state=42)

    os.makedirs(output_dir, exist_ok=True)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    val_ds = CachedMelSpectrogramDataset(val_df, feature_dir)
    test_ds = CachedMelSpectrogramDataset(test_df, feature_dir)
    val_dl = DataLoader(val_ds, batch_size=batch_size, collate_fn=mel_collate_fn)
    test_dl = DataLoader(test_ds, batch_size=batch_size, collate_fn=mel_collate_fn)

    model = ResNetClassifier(num_classes=4).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()
    scaler = GradScaler()

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []
    val_conf_matrices = []

    chunk_size = len(train_df) // num_chunks
    for i in range(num_chunks):
        print(f"\nTraining on chunk {i+1}/{num_chunks}")
        chunk_df = train_df.iloc[i*chunk_size:] if i == num_chunks - 1 else train_df.iloc[i*chunk_size: (i+1)*chunk_size]
        train_ds = CachedMelSpectrogramDataset(chunk_df, feature_dir)
        train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, collate_fn=mel_collate_fn)

        for epoch in range(1, epochs_per_chunk + 1):
            train_loss, train_acc = train_one_epoch(model, train_dl, optimizer, criterion, scaler, device)
            val_loss, val_acc, val_cm = evaluate(model, val_dl, criterion, device)

            train_losses.append(train_loss)
            val_losses.append(val_loss)
            train_accuracies.append(train_acc)
            val_accuracies.append(val_acc)
            val_conf_matrices.append(val_cm)

            print(f"Epoch {epoch} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc*100:.2f}% | Val Acc: {val_acc*100:.2f}%")

            model_path = os.path.join(output_dir, f"resnet_chunk_{i+1}_epoch_{epoch}.pt")
            torch.save(model.state_dict(), model_path)

        del train_dl, train_ds
        torch.cuda.empty_cache()
        gc.collect()

    test_loss, test_acc, test_cm = evaluate(model, test_dl, criterion, device)
    print(f"\nFinal Test Accuracy: {test_acc * 100:.2f}%")

    plot_metrics(train_losses, val_losses, train_accuracies, val_accuracies, test_loss, test_acc, val_conf_matrices, test_cm, output_dir)


In [None]:
if __name__ == "__main__":
    train_on_chunks_fixed_split(
        metadata_path='/content/mel_metadata.csv',
        feature_dir='/content/drive/MyDrive/Meter_2800_Features',
        output_dir='/content/drive/MyDrive/Meter_Models_new_1/',
        num_chunks=5,
        epochs_per_chunk=4
    )


  scaler = GradScaler()



Training on chunk 1/5


  with autocast():


Epoch 1 | Train Loss: 1.1376 | Train Acc: 55.05% | Val Acc: 22.95%


  with autocast():


Epoch 2 | Train Loss: 0.9054 | Train Acc: 61.59% | Val Acc: 27.73%


  with autocast():


Epoch 3 | Train Loss: 0.7976 | Train Acc: 65.43% | Val Acc: 60.45%


  with autocast():


Epoch 4 | Train Loss: 0.7072 | Train Acc: 71.69% | Val Acc: 61.82%

Training on chunk 2/5


  with autocast():


Epoch 1 | Train Loss: 0.8164 | Train Acc: 65.86% | Val Acc: 67.50%


  with autocast():


Epoch 2 | Train Loss: 0.6997 | Train Acc: 72.83% | Val Acc: 65.00%


  with autocast():


Epoch 3 | Train Loss: 0.6456 | Train Acc: 73.40% | Val Acc: 54.09%


  with autocast():


Epoch 4 | Train Loss: 0.6282 | Train Acc: 73.83% | Val Acc: 74.32%

Training on chunk 3/5


  with autocast():


Epoch 1 | Train Loss: 0.6544 | Train Acc: 74.25% | Val Acc: 68.18%


  with autocast():


Epoch 2 | Train Loss: 0.6059 | Train Acc: 74.40% | Val Acc: 37.95%


  with autocast():


Epoch 3 | Train Loss: 0.5617 | Train Acc: 78.81% | Val Acc: 70.68%


  with autocast():


Epoch 4 | Train Loss: 0.5028 | Train Acc: 78.38% | Val Acc: 47.50%

Training on chunk 4/5


  with autocast():


Epoch 1 | Train Loss: 0.6343 | Train Acc: 75.39% | Val Acc: 82.05%


  with autocast():


Epoch 2 | Train Loss: 0.5431 | Train Acc: 76.96% | Val Acc: 64.77%


  with autocast():


Epoch 3 | Train Loss: 0.4588 | Train Acc: 81.51% | Val Acc: 76.82%


  with autocast():


Epoch 4 | Train Loss: 0.3850 | Train Acc: 85.06% | Val Acc: 76.59%

Training on chunk 5/5


  with autocast():


Epoch 1 | Train Loss: 0.7259 | Train Acc: 73.76% | Val Acc: 54.32%


  with autocast():


Epoch 2 | Train Loss: 0.6973 | Train Acc: 74.33% | Val Acc: 51.36%


  with autocast():


Epoch 3 | Train Loss: 0.5603 | Train Acc: 80.43% | Val Acc: 60.45%


  with autocast():


Epoch 4 | Train Loss: 0.5346 | Train Acc: 83.12% | Val Acc: 58.86%

Final Test Accuracy: 57.73%


## Custom CNN

In [None]:
import os
import gc
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import GradScaler, autocast
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from tqdm import tqdm

# ===== Dataset Class with Caching =====
class CachedMelSpectrogramDataset(Dataset):
    def __init__(self, metadata_df, feature_dir):
        self.metadata_df = metadata_df.reset_index(drop=True)
        self.feature_dir = feature_dir
        self.label_map = {"3/4": 0, "4/4": 1, "5/4": 2, "7/4": 3}
        self.cache = self._load_all()

    def _load_all(self):
        cache = []
        for i in range(len(self.metadata_df)):
            row = self.metadata_df.iloc[i]
            fname = row['filename'].replace(".wav", "")
            mel_file = os.path.join(self.feature_dir, f"{fname}_mel.npy")

            try:
                mel = np.load(mel_file)
            except Exception as e:
                print(f"Error loading {mel_file}: {e}")
                continue

            mel = torch.from_numpy(mel).float().unsqueeze(0)
            T_max = 1024
            if mel.shape[2] < T_max:
                mel = F.pad(mel, (0, T_max - mel.shape[2]))
            else:
                mel = mel[:, :, :T_max]

            label = self.label_map.get(row['meter'], -1)
            if label == -1:
                continue
            cache.append((mel, torch.tensor(label)))
        return cache

    def __len__(self):
        return len(self.cache)

    def __getitem__(self, idx):
        return self.cache[idx]

def mel_collate_fn(batch):
    mel_list, label_list = zip(*batch)
    return torch.stack(mel_list), torch.tensor(label_list, dtype=torch.long)

# ===== Custom CNN Classifier =====
class CustomCNNClassifier(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1))
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        x = self.cnn(x)
        return self.classifier(x)

# ===== Training and Evaluation =====
def plot_metrics(train_losses, val_losses, train_accuracies, val_accuracies, test_loss, test_acc, val_conf_matrices, test_conf_matrix, output_dir):
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Val Loss')
    plt.title('Loss Over Epochs')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(train_accuracies, label='Train Acc')
    plt.plot(val_accuracies, label='Val Acc')
    plt.title('Accuracy Over Epochs')
    plt.legend()

    plt.savefig(os.path.join(output_dir, 'metrics_plot.png'))
    plt.close()

    for i, cm in enumerate(val_conf_matrices):
        disp = ConfusionMatrixDisplay(confusion_matrix=cm)
        disp.plot()
        plt.title(f"Validation Confusion Matrix Epoch {i+1}")
        plt.savefig(os.path.join(output_dir, f"val_conf_matrix_epoch_{i+1}.png"))
        plt.close()

    disp = ConfusionMatrixDisplay(confusion_matrix=test_conf_matrix)
    disp.plot()
    plt.title("Test Confusion Matrix")
    plt.savefig(os.path.join(output_dir, "test_conf_matrix.png"))
    plt.close()

def evaluate(model, dataloader, criterion, device):
    model.eval()
    correct = total = 0
    running_loss = 0.0
    all_preds, all_labels = [], []
    with torch.no_grad():
        for mel, label in dataloader:
            mel, label = mel.to(device), label.to(device)
            output = model(mel)
            loss = criterion(output, label)
            running_loss += loss.item()
            _, pred = torch.max(output, 1)
            correct += (pred == label).sum().item()
            total += label.size(0)
            all_preds.extend(pred.cpu().numpy())
            all_labels.extend(label.cpu().numpy())
    accuracy = correct / total
    cm = confusion_matrix(all_labels, all_preds)
    return running_loss / len(dataloader), accuracy, cm

def train_one_epoch(model, dataloader, optimizer, criterion, scaler, device):
    model.train()
    running_loss = 0.0
    correct, total = 0, 0
    for mel, label in tqdm(dataloader, desc="Training", leave=False):
        mel, label = mel.to(device), label.to(device)
        optimizer.zero_grad()
        with autocast():
            output = model(mel)
            loss = criterion(output, label)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        running_loss += loss.item()
        _, pred = torch.max(output, 1)
        correct += (pred == label).sum().item()
        total += label.size(0)
    accuracy = correct / total
    return running_loss / len(dataloader), accuracy

# ===== Training Loop with Chunking =====
def train_on_chunks_fixed_split(metadata_path, feature_dir, output_dir, num_chunks=5, epochs_per_chunk=5, batch_size=32):
    df = pd.read_csv(metadata_path)
    df = df[df['meter'].isin(["3/4", "4/4", "5/4", "7/4"])].reset_index(drop=True)
    train_df, temp_df = train_test_split(df, test_size=0.2, stratify=df['meter'], random_state=42)
    val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df['meter'], random_state=42)

    os.makedirs(output_dir, exist_ok=True)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    val_ds = CachedMelSpectrogramDataset(val_df, feature_dir)
    test_ds = CachedMelSpectrogramDataset(test_df, feature_dir)
    val_dl = DataLoader(val_ds, batch_size=batch_size, collate_fn=mel_collate_fn)
    test_dl = DataLoader(test_ds, batch_size=batch_size, collate_fn=mel_collate_fn)

    model = CustomCNNClassifier(num_classes=4).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()
    scaler = GradScaler()

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []
    val_conf_matrices = []

    chunk_size = len(train_df) // num_chunks
    for i in range(num_chunks):
        print(f"\nTraining on chunk {i+1}/{num_chunks}")
        chunk_df = train_df.iloc[i*chunk_size:] if i == num_chunks - 1 else train_df.iloc[i*chunk_size: (i+1)*chunk_size]
        train_ds = CachedMelSpectrogramDataset(chunk_df, feature_dir)
        train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, collate_fn=mel_collate_fn)

        for epoch in range(1, epochs_per_chunk + 1):
            train_loss, train_acc = train_one_epoch(model, train_dl, optimizer, criterion, scaler, device)
            val_loss, val_acc, val_cm = evaluate(model, val_dl, criterion, device)

            train_losses.append(train_loss)
            val_losses.append(val_loss)
            train_accuracies.append(train_acc)
            val_accuracies.append(val_acc)
            val_conf_matrices.append(val_cm)

            print(f"Epoch {epoch} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc*100:.2f}% | Val Acc: {val_acc*100:.2f}%")

            model_path = os.path.join(output_dir, f"customcnn_chunk_{i+1}_epoch_{epoch}.pt")
            torch.save(model.state_dict(), model_path)

        del train_dl, train_ds
        torch.cuda.empty_cache()
        gc.collect()

    test_loss, test_acc, test_cm = evaluate(model, test_dl, criterion, device)
    print(f"\nFinal Test Accuracy: {test_acc * 100:.2f}%")

    plot_metrics(train_losses, val_losses, train_accuracies, val_accuracies, test_loss, test_acc, val_conf_matrices, test_cm, output_dir)


In [None]:
from IPython.display import Image
Image("/content/output_dir/metrics_plot.png")  # Adjust path


In [None]:
if __name__ == "__main__":
    train_on_chunks_fixed_split(
        metadata_path='/content/mel_metadata.csv',
        feature_dir='/content/drive/MyDrive/Meter_2800_Features',
        output_dir='/content/drive/MyDrive/Meter_Models_new_1/',
        num_chunks=5,
        epochs_per_chunk=4
    )


  scaler = GradScaler()



Training on chunk 1/5


  with autocast():


Epoch 1 | Train Loss: 1.3764 | Train Acc: 29.02% | Val Acc: 42.05%


  with autocast():


Epoch 2 | Train Loss: 1.3349 | Train Acc: 40.54% | Val Acc: 44.55%


  with autocast():


Epoch 3 | Train Loss: 1.2081 | Train Acc: 46.80% | Val Acc: 44.09%


  with autocast():


Epoch 4 | Train Loss: 1.1849 | Train Acc: 44.95% | Val Acc: 43.86%

Training on chunk 2/5


  with autocast():


Epoch 1 | Train Loss: 1.2219 | Train Acc: 43.39% | Val Acc: 47.50%


  with autocast():


Epoch 2 | Train Loss: 1.2032 | Train Acc: 44.81% | Val Acc: 46.36%


  with autocast():


Epoch 3 | Train Loss: 1.2145 | Train Acc: 43.10% | Val Acc: 48.64%


  with autocast():


Epoch 4 | Train Loss: 1.1963 | Train Acc: 45.52% | Val Acc: 48.41%

Training on chunk 3/5


  with autocast():


Epoch 1 | Train Loss: 1.1944 | Train Acc: 46.51% | Val Acc: 48.18%


  with autocast():


Epoch 2 | Train Loss: 1.2042 | Train Acc: 45.95% | Val Acc: 48.64%


  with autocast():


Epoch 3 | Train Loss: 1.1904 | Train Acc: 47.37% | Val Acc: 47.73%


  with autocast():


Epoch 4 | Train Loss: 1.1822 | Train Acc: 46.66% | Val Acc: 46.82%

Training on chunk 4/5


  with autocast():


Epoch 1 | Train Loss: 1.1609 | Train Acc: 44.38% | Val Acc: 46.82%


  with autocast():


Epoch 2 | Train Loss: 1.1670 | Train Acc: 45.66% | Val Acc: 47.27%


  with autocast():


Epoch 3 | Train Loss: 1.1562 | Train Acc: 42.25% | Val Acc: 49.32%


  with autocast():


Epoch 4 | Train Loss: 1.1615 | Train Acc: 45.23% | Val Acc: 48.64%

Training on chunk 5/5


  with autocast():


Epoch 1 | Train Loss: 1.2091 | Train Acc: 44.68% | Val Acc: 49.55%


  with autocast():


Epoch 2 | Train Loss: 1.1802 | Train Acc: 45.39% | Val Acc: 46.14%


  with autocast():


Epoch 3 | Train Loss: 1.1607 | Train Acc: 45.53% | Val Acc: 48.86%


  with autocast():


Epoch 4 | Train Loss: 1.1587 | Train Acc: 44.68% | Val Acc: 48.18%

Final Test Accuracy: 48.64%


## Custom CNN

In [None]:
import os
import gc
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import GradScaler, autocast
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from tqdm import tqdm

# ===== Dataset Class with Caching =====
class CachedMelSpectrogramDataset(Dataset):
    def __init__(self, metadata_df, feature_dir):
        self.metadata_df = metadata_df.reset_index(drop=True)
        self.feature_dir = feature_dir
        self.label_map = {"3/4": 0, "4/4": 1, "5/4": 2, "7/4": 3}
        self.cache = self._load_all()

    def _load_all(self):
        cache = []
        for i in range(len(self.metadata_df)):
            row = self.metadata_df.iloc[i]
            fname = row['filename'].replace(".wav", "")
            mel_file = os.path.join(self.feature_dir, f"{fname}_mel.npy")

            try:
                mel = np.load(mel_file)
            except Exception as e:
                print(f"Error loading {mel_file}: {e}")
                continue

            mel = torch.from_numpy(mel).float().unsqueeze(0)
            T_max = 1024
            if mel.shape[2] < T_max:
                mel = F.pad(mel, (0, T_max - mel.shape[2]))
            else:
                mel = mel[:, :, :T_max]

            label = self.label_map.get(row['meter'], -1)
            if label == -1:
                continue
            cache.append((mel, torch.tensor(label)))
        return cache

    def __len__(self):
        return len(self.cache)

    def __getitem__(self, idx):
        return self.cache[idx]

def mel_collate_fn(batch):
    mel_list, label_list = zip(*batch)
    return torch.stack(mel_list), torch.tensor(label_list, dtype=torch.long)

# ===== I Custom CNN Classifier =====
class CustomCNNClassifier(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.GELU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.GELU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.GELU(),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.GELU(),
            nn.AdaptiveAvgPool2d((1, 1))
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256, 128),
            nn.GELU(),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.cnn(x)
        return self.classifier(x)

# ===== Training and Evaluation =====
def plot_metrics(train_losses, val_losses, train_accuracies, val_accuracies, test_loss, test_acc, val_conf_matrices, test_conf_matrix, output_dir):
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Val Loss')
    plt.title('Loss Over Epochs')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(train_accuracies, label='Train Acc')
    plt.plot(val_accuracies, label='Val Acc')
    plt.title('Accuracy Over Epochs')
    plt.legend()

    plt.savefig(os.path.join(output_dir, 'metrics_plot.png'))
    plt.close()

    for i, cm in enumerate(val_conf_matrices):
        disp = ConfusionMatrixDisplay(confusion_matrix=cm)
        disp.plot()
        plt.title(f"Validation Confusion Matrix Epoch {i+1}")
        plt.savefig(os.path.join(output_dir, f"val_conf_matrix_epoch_{i+1}.png"))
        plt.close()

    disp = ConfusionMatrixDisplay(confusion_matrix=test_conf_matrix)
    disp.plot()
    plt.title("Test Confusion Matrix")
    plt.savefig(os.path.join(output_dir, "test_conf_matrix.png"))
    plt.close()

def evaluate(model, dataloader, criterion, device):
    model.eval()
    correct = total = 0
    running_loss = 0.0
    all_preds, all_labels = [], []
    with torch.no_grad():
        for mel, label in dataloader:
            mel, label = mel.to(device), label.to(device)
            output = model(mel)
            loss = criterion(output, label)
            running_loss += loss.item()
            _, pred = torch.max(output, 1)
            correct += (pred == label).sum().item()
            total += label.size(0)
            all_preds.extend(pred.cpu().numpy())
            all_labels.extend(label.cpu().numpy())
    accuracy = correct / total
    cm = confusion_matrix(all_labels, all_preds)
    return running_loss / len(dataloader), accuracy, cm

def train_one_epoch(model, dataloader, optimizer, criterion, scaler, device):
    model.train()
    running_loss = 0.0
    correct, total = 0, 0
    for mel, label in tqdm(dataloader, desc="Training", leave=False):
        mel, label = mel.to(device), label.to(device)
        optimizer.zero_grad()
        with autocast():
            output = model(mel)
            loss = criterion(output, label)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        running_loss += loss.item()
        _, pred = torch.max(output, 1)
        correct += (pred == label).sum().item()
        total += label.size(0)
    accuracy = correct / total
    return running_loss / len(dataloader), accuracy

# ===== Training Loop with Chunking =====
def train_on_chunks_fixed_split(metadata_path, feature_dir, output_dir, num_chunks=5, epochs_per_chunk=5, batch_size=32):
    df = pd.read_csv(metadata_path)
    df = df[df['meter'].isin(["3/4", "4/4", "5/4", "7/4"])].reset_index(drop=True)
    train_df, temp_df = train_test_split(df, test_size=0.2, stratify=df['meter'], random_state=42)
    val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df['meter'], random_state=42)

    os.makedirs(output_dir, exist_ok=True)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    val_ds = CachedMelSpectrogramDataset(val_df, feature_dir)
    test_ds = CachedMelSpectrogramDataset(test_df, feature_dir)
    val_dl = DataLoader(val_ds, batch_size=batch_size, collate_fn=mel_collate_fn)
    test_dl = DataLoader(test_ds, batch_size=batch_size, collate_fn=mel_collate_fn)

    model = CustomCNNClassifier(num_classes=4).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()
    scaler = GradScaler()

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []
    val_conf_matrices = []

    chunk_size = len(train_df) // num_chunks
    for i in range(num_chunks):
        print(f"\nTraining on chunk {i+1}/{num_chunks}")
        chunk_df = train_df.iloc[i*chunk_size:] if i == num_chunks - 1 else train_df.iloc[i*chunk_size: (i+1)*chunk_size]
        train_ds = CachedMelSpectrogramDataset(chunk_df, feature_dir)
        train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, collate_fn=mel_collate_fn)

        for epoch in range(1, epochs_per_chunk + 1):
            train_loss, train_acc = train_one_epoch(model, train_dl, optimizer, criterion, scaler, device)
            val_loss, val_acc, val_cm = evaluate(model, val_dl, criterion, device)

            train_losses.append(train_loss)
            val_losses.append(val_loss)
            train_accuracies.append(train_acc)
            val_accuracies.append(val_acc)
            val_conf_matrices.append(val_cm)

            print(f"Epoch {epoch} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc*100:.2f}% | Val Acc: {val_acc*100:.2f}%")

            model_path = os.path.join(output_dir, f"customcnn_chunk_{i+1}_epoch_{epoch}.pt")
            torch.save(model.state_dict(), model_path)

        del train_dl, train_ds
        torch.cuda.empty_cache()
        gc.collect()

    test_loss, test_acc, test_cm = evaluate(model, test_dl, criterion, device)
    print(f"\nFinal Test Accuracy: {test_acc * 100:.2f}%")

    plot_metrics(train_losses, val_losses, train_accuracies, val_accuracies, test_loss, test_acc, val_conf_matrices, test_cm, output_dir)


In [None]:
if __name__ == "__main__":
    train_on_chunks_fixed_split(
        metadata_path='/content/mel_metadata.csv',
        feature_dir='/content/drive/MyDrive/Meter_2800_Features',
        output_dir='/content/drive/MyDrive/Meter_Models_new_2/',
        num_chunks=5,
        epochs_per_chunk=4
    )


  scaler = GradScaler()



Training on chunk 1/5


  with autocast():


Epoch 1 | Train Loss: 1.0916 | Train Acc: 52.49% | Val Acc: 24.55%


  with autocast():


Epoch 2 | Train Loss: 0.9245 | Train Acc: 61.88% | Val Acc: 32.50%


  with autocast():


Epoch 3 | Train Loss: 0.8519 | Train Acc: 65.29% | Val Acc: 59.77%


  with autocast():


Epoch 4 | Train Loss: 0.8279 | Train Acc: 65.86% | Val Acc: 63.41%

Training on chunk 2/5


  with autocast():


Epoch 1 | Train Loss: 0.8571 | Train Acc: 63.58% | Val Acc: 48.41%


  with autocast():


Epoch 2 | Train Loss: 0.8466 | Train Acc: 64.72% | Val Acc: 59.09%


  with autocast():


Epoch 3 | Train Loss: 0.8077 | Train Acc: 66.00% | Val Acc: 65.68%


  with autocast():


Epoch 4 | Train Loss: 0.7783 | Train Acc: 67.99% | Val Acc: 56.82%

Training on chunk 3/5


  with autocast():


Epoch 1 | Train Loss: 0.8352 | Train Acc: 61.45% | Val Acc: 64.09%


  with autocast():


Epoch 2 | Train Loss: 0.7724 | Train Acc: 66.15% | Val Acc: 67.73%


  with autocast():


Epoch 3 | Train Loss: 0.7566 | Train Acc: 66.86% | Val Acc: 67.05%


  with autocast():


Epoch 4 | Train Loss: 0.7369 | Train Acc: 66.00% | Val Acc: 62.05%

Training on chunk 4/5


  with autocast():


Epoch 1 | Train Loss: 0.7338 | Train Acc: 65.29% | Val Acc: 62.50%


  with autocast():


Epoch 2 | Train Loss: 0.7041 | Train Acc: 66.57% | Val Acc: 60.68%


  with autocast():


Epoch 3 | Train Loss: 0.6608 | Train Acc: 69.27% | Val Acc: 61.36%


  with autocast():


Epoch 4 | Train Loss: 0.6793 | Train Acc: 69.42% | Val Acc: 68.41%

Training on chunk 5/5


  with autocast():


Epoch 1 | Train Loss: 0.7634 | Train Acc: 66.95% | Val Acc: 60.91%


  with autocast():


Epoch 2 | Train Loss: 0.8252 | Train Acc: 67.80% | Val Acc: 57.05%


  with autocast():


Epoch 3 | Train Loss: 0.7514 | Train Acc: 67.38% | Val Acc: 49.32%


  with autocast():


Epoch 4 | Train Loss: 0.7917 | Train Acc: 66.52% | Val Acc: 69.55%

Final Test Accuracy: 67.27%


In [None]:
pip install efficientnet-pytorch


Collecting efficientnet-pytorch
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->efficientnet-pytorch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->efficientnet-pytorch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->efficientnet-pytorch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->efficientnet-pytorch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->efficientnet-pytorch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metada

## EfficientNet-B0 Model

In [None]:
import os
import gc
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.amp import GradScaler, autocast
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from tqdm import tqdm
from efficientnet_pytorch import EfficientNet

# ===== Dataset Class with Caching =====
class CachedMelSpectrogramDataset(Dataset):
    def __init__(self, metadata_df, feature_dir):
        self.metadata_df = metadata_df.reset_index(drop=True)
        self.feature_dir = feature_dir
        self.label_map = {"3/4": 0, "4/4": 1, "5/4": 2, "7/4": 3}
        self.cache = self._load_all()

    def _load_all(self):
        cache = []
        for i in range(len(self.metadata_df)):
            row = self.metadata_df.iloc[i]
            fname = row['filename'].replace(".wav", "")
            mel_file = os.path.join(self.feature_dir, f"{fname}_mel.npy")

            try:
                mel = np.load(mel_file)
            except Exception as e:
                print(f"Error loading {mel_file}: {e}")
                continue

            mel = torch.from_numpy(mel).float().unsqueeze(0)  # [1, 128, T]
            mel = mel.repeat(3, 1, 1)  # Convert to [3, 128, T] for EfficientNet
            T_max = 1024
            if mel.shape[2] < T_max:
                mel = F.pad(mel, (0, T_max - mel.shape[2]))
            else:
                mel = mel[:, :, :T_max]

            label = self.label_map.get(row['meter'], -1)
            if label == -1:
                continue
            cache.append((mel, torch.tensor(label)))
        return cache

    def __len__(self):
        return len(self.cache)

    def __getitem__(self, idx):
        return self.cache[idx]

def mel_collate_fn(batch):
    mel_list, label_list = zip(*batch)
    return torch.stack(mel_list), torch.tensor(label_list, dtype=torch.long)

# ===== EfficientNet-B0 Classifier =====
class EfficientNetB0Classifier(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.model = EfficientNet.from_pretrained('efficientnet-b0')
        # Keep default conv stem (expects 3 channels)
        in_features = self.model._fc.in_features
        self.model._fc = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.model(x)

# ===== Training and Evaluation =====
def plot_metrics(train_losses, val_losses, train_accuracies, val_accuracies, test_loss, test_acc, val_conf_matrices, test_conf_matrix, output_dir):
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Val Loss')
    plt.title('Loss Over Epochs')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(train_accuracies, label='Train Acc')
    plt.plot(val_accuracies, label='Val Acc')
    plt.title('Accuracy Over Epochs')
    plt.legend()

    plt.savefig(os.path.join(output_dir, 'metrics_plot.png'))
    plt.close()

    for i, cm in enumerate(val_conf_matrices):
        disp = ConfusionMatrixDisplay(confusion_matrix=cm)
        disp.plot()
        plt.title(f"Validation Confusion Matrix Epoch {i+1}")
        plt.savefig(os.path.join(output_dir, f"val_conf_matrix_epoch_{i+1}.png"))
        plt.close()

    disp = ConfusionMatrixDisplay(confusion_matrix=test_conf_matrix)
    disp.plot()
    plt.title("Test Confusion Matrix")
    plt.savefig(os.path.join(output_dir, "test_conf_matrix.png"))
    plt.close()

def evaluate(model, dataloader, criterion, device):
    model.eval()
    correct = total = 0
    running_loss = 0.0
    all_preds, all_labels = [], []
    with torch.no_grad():
        for mel, label in dataloader:
            mel, label = mel.to(device), label.to(device)
            output = model(mel)
            loss = criterion(output, label)
            running_loss += loss.item()
            _, pred = torch.max(output, 1)
            correct += (pred == label).sum().item()
            total += label.size(0)
            all_preds.extend(pred.cpu().numpy())
            all_labels.extend(label.cpu().numpy())
    accuracy = correct / total
    cm = confusion_matrix(all_labels, all_preds)
    return running_loss / len(dataloader), accuracy, cm

def train_one_epoch(model, dataloader, optimizer, criterion, scaler, device):
    model.train()
    running_loss = 0.0
    correct, total = 0, 0
    for mel, label in tqdm(dataloader, desc="Training", leave=False):
        mel, label = mel.to(device), label.to(device)
        optimizer.zero_grad()
        with autocast(device_type='cuda'):
            output = model(mel)
            loss = criterion(output, label)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        running_loss += loss.item()
        _, pred = torch.max(output, 1)
        correct += (pred == label).sum().item()
        total += label.size(0)
    accuracy = correct / total
    return running_loss / len(dataloader), accuracy

# ===== Training Loop with Chunking =====
def train_on_chunks_fixed_split(metadata_path, feature_dir, output_dir, num_chunks=5, epochs_per_chunk=5, batch_size=32):
    df = pd.read_csv(metadata_path)
    df = df[df['meter'].isin(["3/4", "4/4", "5/4", "7/4"])].reset_index(drop=True)
    train_df, temp_df = train_test_split(df, test_size=0.2, stratify=df['meter'], random_state=42)
    val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df['meter'], random_state=42)

    os.makedirs(output_dir, exist_ok=True)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    val_ds = CachedMelSpectrogramDataset(val_df, feature_dir)
    test_ds = CachedMelSpectrogramDataset(test_df, feature_dir)
    val_dl = DataLoader(val_ds, batch_size=batch_size, collate_fn=mel_collate_fn)
    test_dl = DataLoader(test_ds, batch_size=batch_size, collate_fn=mel_collate_fn)

    model = EfficientNetB0Classifier(num_classes=4).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()
    scaler = GradScaler()

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []
    val_conf_matrices = []

    chunk_size = len(train_df) // num_chunks
    for i in range(num_chunks):
        print(f"\nTraining on chunk {i+1}/{num_chunks}")
        chunk_df = train_df.iloc[i*chunk_size:] if i == num_chunks - 1 else train_df.iloc[i*chunk_size: (i+1)*chunk_size]
        train_ds = CachedMelSpectrogramDataset(chunk_df, feature_dir)
        train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, collate_fn=mel_collate_fn)

        for epoch in range(1, epochs_per_chunk + 1):
            train_loss, train_acc = train_one_epoch(model, train_dl, optimizer, criterion, scaler, device)
            val_loss, val_acc, val_cm = evaluate(model, val_dl, criterion, device)

            train_losses.append(train_loss)
            val_losses.append(val_loss)
            train_accuracies.append(train_acc)
            val_accuracies.append(val_acc)
            val_conf_matrices.append(val_cm)

            print(f"Epoch {epoch} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc*100:.2f}% | Val Acc: {val_acc*100:.2f}%")

            model_path = os.path.join(output_dir, f"efficientnetb0_chunk_{i+1}_epoch_{epoch}.pt")
            torch.save(model.state_dict(), model_path)

        del train_dl, train_ds
        torch.cuda.empty_cache()
        gc.collect()

    test_loss, test_acc, test_cm = evaluate(model, test_dl, criterion, device)
    print(f"\nFinal Test Accuracy: {test_acc * 100:.2f}%")

    plot_metrics(train_losses, val_losses, train_accuracies, val_accuracies, test_loss, test_acc, val_conf_matrices, test_cm, output_dir)

# ===== Run Training =====
if __name__ == "__main__":
    train_on_chunks_fixed_split(
        metadata_path="/content/mel_metadata.csv",
        feature_dir='/content/drive/MyDrive/Meter_2800_Features',
        output_dir='/content/drive/MyDrive/Meter_Models_new_3/',
        num_chunks=5,
        epochs_per_chunk=4
    )


Loaded pretrained weights for efficientnet-b0

Training on chunk 1/5




Epoch 1 | Train Loss: 0.9245 | Train Acc: 62.02% | Val Acc: 63.64%




Epoch 2 | Train Loss: 0.5681 | Train Acc: 77.95% | Val Acc: 60.68%




Epoch 3 | Train Loss: 0.3613 | Train Acc: 86.63% | Val Acc: 55.00%




Epoch 4 | Train Loss: 0.1926 | Train Acc: 93.60% | Val Acc: 60.91%

Training on chunk 2/5




Epoch 1 | Train Loss: 0.6548 | Train Acc: 76.96% | Val Acc: 33.18%




Epoch 2 | Train Loss: 0.3711 | Train Acc: 86.20% | Val Acc: 55.00%




Epoch 3 | Train Loss: 0.1803 | Train Acc: 94.17% | Val Acc: 57.05%




Epoch 4 | Train Loss: 0.1353 | Train Acc: 96.44% | Val Acc: 60.23%

Training on chunk 3/5




Epoch 1 | Train Loss: 0.4361 | Train Acc: 83.36% | Val Acc: 56.59%




Epoch 2 | Train Loss: 0.3162 | Train Acc: 87.48% | Val Acc: 64.09%




Epoch 3 | Train Loss: 0.1639 | Train Acc: 93.60% | Val Acc: 80.91%




Epoch 4 | Train Loss: 0.1219 | Train Acc: 95.31% | Val Acc: 76.14%

Training on chunk 4/5




Epoch 1 | Train Loss: 0.4138 | Train Acc: 85.35% | Val Acc: 74.09%




Epoch 2 | Train Loss: 0.2128 | Train Acc: 91.89% | Val Acc: 79.77%




Epoch 3 | Train Loss: 0.1201 | Train Acc: 95.73% | Val Acc: 83.18%




Epoch 4 | Train Loss: 0.0893 | Train Acc: 96.02% | Val Acc: 83.41%

Training on chunk 5/5




Epoch 1 | Train Loss: 0.4503 | Train Acc: 86.67% | Val Acc: 59.09%




Epoch 2 | Train Loss: 0.3301 | Train Acc: 91.49% | Val Acc: 76.36%




Epoch 3 | Train Loss: 0.2543 | Train Acc: 92.62% | Val Acc: 77.05%




Epoch 4 | Train Loss: 0.1400 | Train Acc: 97.02% | Val Acc: 79.32%

Final Test Accuracy: 81.14%
