# M2D Electronic Music Genre Classification

This notebook implements genre classification using M2D embeddings with a linear classifier on top.

In [1]:
# Install required packages
!pip install timm einops nnAudio librosa wget



In [36]:
import torch
import torchaudio
import numpy as np
from pathlib import Path
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from tqdm.notebook import tqdm
import zipfile
import wget
from tqdm.notebook import tqdm
import torch.nn.functional as F

In [None]:
# Download M2D model files
!wget https://raw.githubusercontent.com/nttcslab/m2d/master/examples/portable_m2d.py
!wget https://github.com/nttcslab/m2d/releases/download/v0.3.0/m2d_vit_base-80x1001p16x16-221006-mr7_as_46ab246d.zip

# Extract the model weights
with zipfile.ZipFile("../models/m2d_vit_base-80x1001p16x16-221006-mr7_as_46ab246d.zip", "r") as zip_ref:
    zip_ref.extractall("../models/")

--2025-01-04 14:36:50--  https://raw.githubusercontent.com/nttcslab/m2d/master/examples/portable_m2d.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 15862 (15K) [text/plain]
Saving to: ‘portable_m2d.py.1’


2025-01-04 14:36:50 (8.04 MB/s) - ‘portable_m2d.py.1’ saved [15862/15862]

--2025-01-04 14:36:51--  https://github.com/nttcslab/m2d/releases/download/v0.3.0/m2d_vit_base-80x1001p16x16-221006-mr7_as_46ab246d.zip
Resolving github.com (github.com)... 140.82.121.4
Connecting to github.com (github.com)|140.82.121.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/589370928/0bdeb8a7-c3f3-44c5-afb9-9b9edaa3e861?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Cred

In [12]:
# Load base M2D model
from portable_m2d import PortableM2D

# Initialize model without classification head (we'll add our own)
model = PortableM2D(
    weight_file='../models/m2d_vit_base-80x1001p16x16-221006-mr7_as_46ab246d/weights_ep69it3124-0.47929.pth',
    num_classes=None  # Set to None to get embeddings instead of classification
)

# Move to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
model.eval()  # Set to evaluation mode

 using 151 parameters, while dropped 9 out of 160 parameters from ../models/m2d_vit_base-80x1001p16x16-221006-mr7_as_46ab246d/weights_ep69it3124-0.47929.pth
 (dropped: ['module.ar.runtime.to_spec.mel_basis', 'module.ar.runtime.to_spec.stft.wsin', 'module.ar.runtime.to_spec.stft.wcos', 'module.ar.runtime.to_spec.stft.window_mask', 'module.head.norm.running_mean'] ...)
<All keys matched successfully>


PortableM2D(
  (backbone): LocalViT(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(1, 768, kernel_size=(16, 16), stride=(16, 16))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (patch_drop): Identity()
    (norm_pre): Identity()
    (blocks): Sequential(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (q_norm): Identity()
          (k_norm): Identity()
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): Identity()
        (drop_path1): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU(approximate='none')
      

In [13]:
# Audio preprocessing settings from the paper
SAMPLE_RATE = model.cfg.sample_rate  # Use M2D's sample rate
N_FFT = int(0.025 * SAMPLE_RATE)  # 25ms window
HOP_LENGTH = int(0.010 * SAMPLE_RATE)  # 10ms hop
N_MELS = 80
F_MIN = 50
F_MAX = 8000
AUDIO_MEAN = -7.1
AUDIO_STD = 4.2

In [14]:
class AudioDataset(Dataset):
    def __init__(self, data_dir, labels_file=None, transform=None):
        self.data_dir = Path(data_dir)
        self.transform = transform
        
        # Initialize mel spectrogram transform
        self.mel_spec = torchaudio.transforms.MelSpectrogram(
            sample_rate=SAMPLE_RATE,
            n_fft=N_FFT,
            hop_length=HOP_LENGTH,
            n_mels=N_MELS,
            f_min=F_MIN,
            f_max=F_MAX
        )
        
        # Load labels from CSV
        self.labels_df = pd.read_csv('../data/genre_dataset.csv')
        self.files = [self.data_dir / f for f in self.labels_df['path'].values]
        self.labels = self.labels_df['genre'].values
            
        # Convert genre names to indices
        self.label_to_idx = {label: idx for idx, label in enumerate(sorted(set(self.labels)))}
        self.labels = [self.label_to_idx[label] for label in self.labels]
    
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        # Load audio file
        waveform, sr = torchaudio.load(self.files[idx])
        
        # Resample if necessary
        if sr != SAMPLE_RATE:
            resampler = torchaudio.transforms.Resample(sr, SAMPLE_RATE)
            waveform = resampler(waveform)
        
        # Convert to mono if stereo
        if waveform.shape[0] > 1:
            waveform = torch.mean(waveform, dim=0, keepdim=True)
        
        return waveform, self.labels[idx]

In [15]:
def train_linear_classifier(model, train_loader, val_loader, num_classes, device='cuda', patience=10):
    classifier = nn.Linear(3840, num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(classifier.parameters(), lr=0.1)
    
    num_epochs = 20
    best_acc = 0
    patience_counter = 0
    
    # For early stopping
    best_val_acc = 0
    epochs_without_improvement = 0
    
    for epoch in tqdm(range(num_epochs), desc='Training'):
        classifier.train()
        train_loss = 0
        correct = 0
        total = 0
        
        for batch_idx, (inputs, targets) in enumerate(tqdm(train_loader, leave=False)):
            inputs, targets = inputs.to(device), targets.to(device)
            
            with torch.no_grad():
                embeddings = model(inputs)
                embeddings = embeddings.mean(dim=1)
            
            outputs = classifier(embeddings)
            loss = criterion(outputs, targets)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
        
        train_acc = 100. * correct / total
        
        # Validation
        classifier.eval()
        val_loss = 0
        correct = 0
        total = 0
        
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                embeddings = model(inputs)
                embeddings = embeddings.mean(dim=1)
                outputs = classifier(embeddings)
                loss = criterion(outputs, targets)
                
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()
        
        val_acc = 100. * correct / total
        
        print(f'Epoch: {epoch}')
        print(f'Train Loss: {train_loss/len(train_loader):.3f} | Train Acc: {train_acc:.3f}%')
        print(f'Val Loss: {val_loss/len(val_loader):.3f} | Val Acc: {val_acc:.3f}%')
        
        # Save if it's the best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            epochs_without_improvement = 0
            # Save both classifier and metadata
            torch.save({
                'epoch': epoch,
                'model_state_dict': classifier.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_acc': val_acc,
                'train_acc': train_acc,
            }, 'best_genre_classifier.pth')
            print(f'New best model saved with validation accuracy: {val_acc:.3f}%')
        else:
            epochs_without_improvement += 1
            
        # Early stopping check
        if epochs_without_improvement >= patience:
            print(f'Early stopping after {epoch + 1} epochs without improvement')
            break
    
    return classifier

In [20]:
# Create dataset
dataset = AudioDataset('../data')

# Split into train/val/test
train_size = int(0.7 * len(dataset))
val_size = int(0.2 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

In [21]:
# Train classifier
num_classes = len(dataset.label_to_idx)
classifier = train_linear_classifier(model, train_loader, val_loader, num_classes)

Training:   0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
Train Loss: 3.922 | Train Acc: 32.184%
Val Loss: 14.280 | Val Acc: 24.000%
New best model saved with validation accuracy: 24.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 1
Train Loss: 13.604 | Train Acc: 36.782%
Val Loss: 13.202 | Val Acc: 56.000%
New best model saved with validation accuracy: 56.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 2
Train Loss: 5.528 | Train Acc: 67.816%
Val Loss: 10.130 | Val Acc: 40.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 3
Train Loss: 4.241 | Train Acc: 67.816%
Val Loss: 1.186 | Val Acc: 76.000%
New best model saved with validation accuracy: 76.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 4
Train Loss: 0.284 | Train Acc: 93.103%
Val Loss: 1.118 | Val Acc: 68.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 5
Train Loss: 0.328 | Train Acc: 94.253%
Val Loss: 0.801 | Val Acc: 88.000%
New best model saved with validation accuracy: 88.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 6
Train Loss: 0.039 | Train Acc: 98.851%
Val Loss: 0.997 | Val Acc: 88.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 7
Train Loss: 0.022 | Train Acc: 98.851%
Val Loss: 0.764 | Val Acc: 88.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 8
Train Loss: 0.041 | Train Acc: 97.701%
Val Loss: 0.802 | Val Acc: 88.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 9
Train Loss: 0.007 | Train Acc: 100.000%
Val Loss: 0.938 | Val Acc: 88.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 10
Train Loss: 0.003 | Train Acc: 100.000%
Val Loss: 0.891 | Val Acc: 88.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 11
Train Loss: 0.002 | Train Acc: 100.000%
Val Loss: 0.910 | Val Acc: 88.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 12
Train Loss: 0.002 | Train Acc: 100.000%
Val Loss: 0.915 | Val Acc: 88.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 13
Train Loss: 0.002 | Train Acc: 100.000%
Val Loss: 0.928 | Val Acc: 88.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 14
Train Loss: 0.002 | Train Acc: 100.000%
Val Loss: 0.923 | Val Acc: 88.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 15
Train Loss: 0.002 | Train Acc: 100.000%
Val Loss: 0.920 | Val Acc: 88.000%
Early stopping after 16 epochs without improvement


In [23]:
def test_classifier(model, classifier, test_loader, device='cuda'):
    classifier.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            embeddings = model(inputs)
            embeddings = embeddings.mean(dim=1)
            outputs = classifier(embeddings)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    
    test_acc = 100. * correct / total
    print(f'Test Accuracy: {test_acc:.3f}%')
    return test_acc

# Test the classifier
test_accuracy = test_classifier(model, classifier, test_loader, device)

Test Accuracy: 76.923%


In [24]:
# Export train, val, and test datasets to CSV
train_df = dataset.labels_df.iloc[train_dataset.indices]
val_df = dataset.labels_df.iloc[val_dataset.indices]
test_df = dataset.labels_df.iloc[test_dataset.indices]

train_df.to_csv('../data/train_dataset.csv', index=False)
val_df.to_csv('../data/val_dataset.csv', index=False)
test_df.to_csv('../data/test_dataset.csv', index=False)

print("Datasets exported to CSV files.")

Datasets exported to CSV files.


# Comparison with CNN

In [38]:
class GenreCNN(nn.Module):
    def __init__(self, num_classes):
        super(GenreCNN, self).__init__()
        
        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        
        # Batch normalization layers
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        self.bn3 = nn.BatchNorm2d(128)
        self.bn4 = nn.BatchNorm2d(256)
        
        # Pooling layers
        self.pool = nn.MaxPool2d(2, 2)
        
        # Global average pooling
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
        
        # Dropout
        self.dropout = nn.Dropout(0.5)
        
        # Fully connected layers
        self.fc1 = nn.Linear(256, 512)
        self.fc2 = nn.Linear(512, num_classes)
        
    def forward(self, x):
        # Convolutional layers
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = self.pool(F.relu(self.bn4(self.conv4(x))))
        
        # Global average pooling
        x = self.global_pool(x)
        
        # Flatten
        x = x.view(x.size(0), -1)
        
        # Fully connected layers
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

In [39]:
class AudioDataset(Dataset):
    def __init__(self, data_dir, labels_file=None, transform=None, for_cnn=False):
        self.data_dir = Path(data_dir)
        self.transform = transform
        self.for_cnn = for_cnn
        
        # Initialize mel spectrogram transform
        self.mel_spec = torchaudio.transforms.MelSpectrogram(
            sample_rate=SAMPLE_RATE,
            n_fft=N_FFT,
            hop_length=HOP_LENGTH,
            n_mels=N_MELS,
            f_min=F_MIN,
            f_max=F_MAX
        )
        
        # Normalization values
        self.spec_mean = AUDIO_MEAN
        self.spec_std = AUDIO_STD
        
        # Load labels from CSV
        self.labels_df = pd.read_csv('../data/genre_dataset.csv')
        self.files = [self.data_dir / f for f in self.labels_df['path'].values]
        self.labels = self.labels_df['genre'].values
            
        # Convert genre names to indices
        self.label_to_idx = {label: idx for idx, label in enumerate(sorted(set(self.labels)))}
        self.labels = [self.label_to_idx[label] for label in self.labels]
    
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        # Load audio file
        waveform, sr = torchaudio.load(self.files[idx])
        
        # Resample if necessary
        if sr != SAMPLE_RATE:
            resampler = torchaudio.transforms.Resample(sr, SAMPLE_RATE)
            waveform = resampler(waveform)
        
        # Convert to mono if stereo
        if waveform.shape[0] > 1:
            waveform = torch.mean(waveform, dim=0, keepdim=True)
            
        if self.for_cnn:
            # Take a fixed-length segment (6 seconds like M2D)
            segment_length = 6 * SAMPLE_RATE
            if waveform.shape[1] > segment_length:
                start = torch.randint(0, waveform.shape[1] - segment_length, (1,))
                waveform = waveform[:, start:start + segment_length]
            else:
                # Pad if too short
                padding = segment_length - waveform.shape[1]
                waveform = F.pad(waveform, (0, padding))
            
            # Get mel spectrogram
            spec = self.mel_spec(waveform)
            # Convert to dB scale
            spec = torchaudio.transforms.AmplitudeToDB()(spec)
            # Normalize
            spec = (spec - self.spec_mean) / self.spec_std
            return spec, self.labels[idx]
        
        return waveform, self.labels[idx]

In [40]:
def train_cnn(model, train_loader, val_loader, device='cuda', patience=10):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3)
    
    num_epochs = 50
    best_val_acc = 0
    epochs_without_improvement = 0
    
    for epoch in tqdm(range(num_epochs), desc='Training'):
        model.train()
        train_loss = 0
        correct = 0
        total = 0
        
        for batch_idx, (inputs, targets) in enumerate(tqdm(train_loader, leave=False)):
            inputs, targets = inputs.to(device), targets.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
        
        train_acc = 100. * correct / total
        
        # Validation
        model.eval()
        val_loss = 0
        correct = 0
        total = 0
        
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()
        
        val_acc = 100. * correct / total
        scheduler.step(val_loss)
        
        print(f'Epoch: {epoch}')
        print(f'Train Loss: {train_loss/len(train_loader):.3f} | Train Acc: {train_acc:.3f}%')
        print(f'Val Loss: {val_loss/len(val_loader):.3f} | Val Acc: {val_acc:.3f}%')
        
        # Save if it's the best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            epochs_without_improvement = 0
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_acc': val_acc,
                'train_acc': train_acc,
            }, 'best_cnn_classifier.pth')
            print(f'New best model saved with validation accuracy: {val_acc:.3f}%')
        else:
            epochs_without_improvement += 1
            
        # Early stopping check
        if epochs_without_improvement >= patience:
            print(f'Early stopping after {epoch + 1} epochs without improvement')
            break
    
    return model

In [41]:
def test_cnn(model, test_loader, device='cuda'):
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    
    test_acc = 100. * correct / total
    print(f'Test Accuracy: {test_acc:.3f}%')
    return test_acc

In [42]:
# Create datasets for CNN
cnn_dataset = AudioDataset('../data', for_cnn=True)
train_dataset_cnn, val_dataset_cnn, test_dataset_cnn = torch.utils.data.random_split(
    cnn_dataset, [train_size, val_size, test_size])

# Create data loaders
train_loader_cnn = DataLoader(train_dataset_cnn, batch_size=32, shuffle=True, num_workers=4)
val_loader_cnn = DataLoader(val_dataset_cnn, batch_size=32, shuffle=False, num_workers=4)
test_loader_cnn = DataLoader(test_dataset_cnn, batch_size=32, shuffle=False, num_workers=4)

# Check input shape
batch = next(iter(train_loader_cnn))
inputs, _ = batch
print("Input spectrogram shape:", inputs.shape)

# Initialize and train CNN
num_classes = len(dataset.label_to_idx)
cnn_model = GenreCNN(num_classes).to(device)
cnn_model = train_cnn(cnn_model, train_loader_cnn, val_loader_cnn)

# Test the CNN classifier
test_accuracy_cnn = test_cnn(cnn_model, test_loader_cnn, device)

Input spectrogram shape: torch.Size([32, 1, 80, 601])


Training:   0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
Train Loss: 1.569 | Train Acc: 32.184%
Val Loss: 1.564 | Val Acc: 16.000%
New best model saved with validation accuracy: 16.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 1
Train Loss: 1.413 | Train Acc: 40.230%
Val Loss: 1.477 | Val Acc: 40.000%
New best model saved with validation accuracy: 40.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 2
Train Loss: 1.283 | Train Acc: 49.425%
Val Loss: 1.347 | Val Acc: 48.000%
New best model saved with validation accuracy: 48.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 3
Train Loss: 1.199 | Train Acc: 55.172%
Val Loss: 1.235 | Val Acc: 52.000%
New best model saved with validation accuracy: 52.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 4
Train Loss: 1.115 | Train Acc: 57.471%
Val Loss: 1.181 | Val Acc: 52.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 5
Train Loss: 1.090 | Train Acc: 59.770%
Val Loss: 1.628 | Val Acc: 32.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 6
Train Loss: 1.116 | Train Acc: 57.471%
Val Loss: 2.082 | Val Acc: 32.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 7
Train Loss: 1.086 | Train Acc: 56.322%
Val Loss: 1.466 | Val Acc: 44.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 8
Train Loss: 0.979 | Train Acc: 62.069%
Val Loss: 1.364 | Val Acc: 52.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 9
Train Loss: 0.969 | Train Acc: 58.621%
Val Loss: 1.187 | Val Acc: 60.000%
New best model saved with validation accuracy: 60.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 10
Train Loss: 0.956 | Train Acc: 62.069%
Val Loss: 1.102 | Val Acc: 60.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 11
Train Loss: 0.889 | Train Acc: 64.368%
Val Loss: 1.214 | Val Acc: 56.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 12
Train Loss: 0.916 | Train Acc: 60.920%
Val Loss: 1.383 | Val Acc: 44.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 13
Train Loss: 0.913 | Train Acc: 60.920%
Val Loss: 1.179 | Val Acc: 56.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 14
Train Loss: 0.898 | Train Acc: 63.218%
Val Loss: 1.340 | Val Acc: 48.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 15
Train Loss: 0.919 | Train Acc: 66.667%
Val Loss: 1.425 | Val Acc: 36.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 16
Train Loss: 0.891 | Train Acc: 65.517%
Val Loss: 1.259 | Val Acc: 52.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 17
Train Loss: 0.853 | Train Acc: 68.966%
Val Loss: 1.286 | Val Acc: 48.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 18
Train Loss: 0.844 | Train Acc: 71.264%
Val Loss: 1.408 | Val Acc: 48.000%


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 19
Train Loss: 0.876 | Train Acc: 66.667%
Val Loss: 1.240 | Val Acc: 60.000%
Early stopping after 20 epochs without improvement
Test Accuracy: 61.538%
