# MIDI-Trained Chord Recognition Model

## Data Preprocessing

### 1. Load and Extract from midi_folder

In [32]:
import os
import json
import pretty_midi
import pandas as pd
import numpy as np
from collections import defaultdict
import mido
import io

# define chord type templates: intervals relative to root
CHORD_TEMPLATES = {
    "Major":         {0, 4, 7},
    "Minor":         {0, 3, 7},
    "Dominant 7th":  {0, 4, 7, 10},
    "Diminished":    {0, 3, 6},
    "Augmented":     {0, 4, 8},
}

PITCH_CLASS_NAMES = ['C', 'C#', 'D', 'D#', 'E', 'F',
                     'F#', 'G', 'G#', 'A', 'A#', 'B']

# normalize chord, removing octave transpositions 
def normalize_chord(chord_tuple):
    normalized_chord = {note % 12 for note in chord_tuple}  # keep only unique notes modulo 12
    return tuple(sorted(normalized_chord))

# identify and name chords 
def identify_named_chord(chord_tuple):
    if not chord_tuple:
        return "Unknown"

    pitch_classes = sorted({p % 12 for p in chord_tuple})
    for root in pitch_classes:
        transposed = sorted({(p - root) % 12 for p in pitch_classes})
        for label, template in CHORD_TEMPLATES.items():
            if set(transposed) == template:
                root_name = PITCH_CLASS_NAMES[root]
                return f"{root_name} {label}"
    return "Unknown"

# fixed mapping for chord vocab: all 12 roots * templates
def create_fixed_chord_vocab():
    ALL_CHORDS = [
        f"{pitch} {chord_type}"
        for pitch in PITCH_CLASS_NAMES
        for chord_type in CHORD_TEMPLATES.keys()
    ]
    chord_to_index = {chord: idx for idx, chord in enumerate(ALL_CHORDS)}
    return chord_to_index

# extract chord sequence
def midi_to_chord_sequence(midi_file, merge_threshold=0.3):
    #midi_data = pretty_midi.PrettyMIDI(midi_file)
    
    raw = mido.MidiFile(midi_file, clip=True)
    merged = mido.MidiFile() 
    merged.ticks_per_beat = raw.ticks_per_beat
    merged_track = mido.merge_tracks(raw.tracks)
    merged.tracks.append(merged_track)
    
    # dump to memory buffer
    buf = io.BytesIO()
    merged.save(file=buf)
    buf.seek(0)

    midi_data = pretty_midi.PrettyMIDI(buf)

    events = []
    # for each note, add two events: on/off
    for instrument in midi_data.instruments:
        if instrument.is_drum:
            continue
        for note in instrument.notes:
            events.append((note.start, 'on', note.pitch))
            events.append((note.end, 'off', note.pitch))
    

    events.sort(key=lambda x: x[0])

    active_notes = set()  # track notes that are in use
    chords = []  # final list
    previous_chord = None
    chord_start_time = None
    last_event_time = 0

    # if note is starting, add to active set
    # if note ending, remove it from active set
    for time, action, pitch in events:
        if action == 'on':
            active_notes.add(pitch)
        elif action == 'off':
            active_notes.discard(pitch)

        current_chord = normalize_chord(active_notes) if active_notes else None
        chord_label = identify_named_chord(current_chord) if current_chord else None

        # if chord changed
        if chord_label != previous_chord:
            if previous_chord is not None and chord_start_time is not None:
                if time - chord_start_time >= merge_threshold:
                    chords.append((round(chord_start_time, 3), round(time, 3), previous_chord))
            chord_start_time = time
            previous_chord = chord_label

        last_event_time = time

    # capture final chord if any
    if previous_chord is not None and chord_start_time is not None:
        chords.append((round(chord_start_time, 3), round(midi_data.get_end_time(), 3), previous_chord))

    return chords, midi_data

# timeframe-level feature extraction and align with chord labels
def extract_frame_level_data(chords, midi_data, chord_to_index, frame_hop=1):
    end_time = midi_data.get_end_time()
    frame_times = np.arange(0, end_time, frame_hop)

    chroma = midi_data.get_chroma(fs=int(1 / frame_hop))
    chroma = chroma.T  # transpose to shape (frames, 12)

    data = []

    for i, t in enumerate(frame_times):
        frame_feature = chroma[i] if i < len(chroma) else np.zeros(12)
        label = None
        for start, end, chord in chords:
            if start <= t < end:
                if chord in chord_to_index:
                    label = chord_to_index[chord]
                break
        if label is not None:
            data.append((t, frame_feature, label))
    return data


# process all midi files in the folder, save to CSV
def process_midi_folder(input_folder, chord_csv, frame_csv, frame_hop=1):
    chord_rows = []
    frame_rows = []
    chord_to_index = create_fixed_chord_vocab()

    for root, _, files in os.walk(input_folder):
        for fname in files:
            if not fname.lower().endswith(('.mid','.midi')): continue
            path = os.path.join(root, fname)
            rel = os.path.relpath(path, input_folder)
            try:
                chords, midi = midi_to_chord_sequence(path)
                # chord-level
                for st, ed, ch in chords:
                    chord_rows.append([rel, st, ed, ch])
                # frame-level
                frames = extract_frame_level_data(chords, midi, chord_to_index, frame_hop)
                for t, feat, lbl in frames:
                    frame_rows.append([rel, t, *feat, lbl])

            except Exception as e:
                print(f"[ERROR] {rel}: {e}")

    # save to csv
    chord_df = pd.DataFrame(chord_rows, columns=["filename","start_time","end_time","chord"])
    chord_df.to_csv(chord_csv, index=False)
    cols = [f"chroma_{i}" for i in range(12)]
    frame_df = pd.DataFrame(frame_rows, columns=["filename","time", *cols, "label"])
    frame_df.to_csv(frame_csv, index=False)

    print(f"✔ Saved chords to: {chord_csv}")
    print(f"✔ Saved frames to: {frame_csv}")
    return chord_to_index


# def process_midi_folder(midi_folder, chord_output_csv, frame_output_csv, frame_hop=1):
#     chord_data = []
#     frame_data = []

#     chord_to_index = create_fixed_chord_vocab()

#     for midi_file in os.listdir(midi_folder):
#         if midi_file.endswith(".mid") or midi_file.endswith(".midi"):
#             file_path = os.path.join(midi_folder, midi_file)
#             try:
#                 chords, midi_data = midi_to_chord_sequence(file_path)
#                 for timestamp_start, timestamp_end, chord in chords:
#                     chord_data.append([midi_file, timestamp_start, timestamp_end, chord])
#             except Exception as e:
#                 print(f"Error processing {midi_file}: {e}")

#     # second pass to align frame-wise data using finalized vocab
#     for midi_file in os.listdir(midi_folder):
#         if midi_file.endswith(".mid") or midi_file.endswith(".midi"):
#             file_path = os.path.join(midi_folder, midi_file)
#             try:
#                 chords, midi_data = midi_to_chord_sequence(file_path)
#                 frame_entries = extract_frame_level_data(chords, midi_data, chord_to_index, frame_hop)
#                 for t, feat, label in frame_entries:
#                     frame_data.append([midi_file, round(t, 3)] + list(feat) + [label])
#             except Exception as e:
#                 print(f"Error processing {midi_file} for frame-level: {e}")

#     # save chord segment CSV
#     chord_df = pd.DataFrame(chord_data, columns=["filename", "start_time", "end_time", "chord"])
#     chord_df.to_csv(chord_output_csv, index=False)

#     # save frame-level CSV
#     feat_cols = [f"chroma_{i}" for i in range(12)]
#     frame_df = pd.DataFrame(frame_data, columns=["filename", "time"] + feat_cols + ["label"])
#     frame_df.to_csv(frame_output_csv, index=False)

#     print(f"Chord segments saved to {chord_output_csv}")
#     print(f"Frame-level data saved to {frame_output_csv}")
    
#     return chord_to_index

### 2. Extract and Combine to csv file

In [33]:
# paths
output_dir = 'output'
os.makedirs(output_dir, exist_ok=True)

folder_to_process = 'midi_folder' # test use; change to 'lakh-midi-clean' for actual experiments

base = os.path.basename(folder_to_process.rstrip(os.sep))
chord_csv = os.path.join(output_dir, f"chord_dataset.csv")
frame_csv = os.path.join(output_dir, f"timeframe_dataset.csv")
vocab_json = os.path.join(output_dir, f"chord_vocab.json")

chord_to_index = process_midi_folder(folder_to_process, chord_csv, frame_csv)

with open(vocab_json, 'w') as f:
    json.dump(chord_to_index, f, indent=2)
    

✔ Saved chords to: output/chord_dataset.csv
✔ Saved frames to: output/timeframe_dataset.csv


### 3. One-hot Encoding

In [34]:
# one-hot encoding 
import pandas as pd
import numpy as np
import os
import json

output_dir = "output"
os.makedirs(output_dir, exist_ok=True)

frame_csv_path = "output/timeframe_dataset.csv"
chord_vocab_path = "output/chord_vocab.json"
output_onehot_csv_path = os.path.join(output_dir, "timeframe_onehot.csv")


# load from JSON file
with open(chord_vocab_path, "r") as f:
    chord_to_index = json.load(f)

# reverse
chord_to_index = {str(k): v for k, v in chord_to_index.items()}


def one_hot_encode_labels(label_indices, num_classes):
    return np.eye(num_classes)[label_indices]

# load original timeframe-level dataset
df = pd.read_csv(frame_csv_path)

# get label col
label_indices = df["label"].astype(int).values

# one-hot encoding 
num_classes = len(chord_to_index)
one_hot = one_hot_encode_labels(label_indices, num_classes)

# create DataFrame 
one_hot_columns = [f"class_{i}" for i in range(num_classes)]
one_hot_df = pd.DataFrame(one_hot, columns=one_hot_columns)

# combine with filename + time 
minimal_df = df[["filename", "time"]].reset_index(drop=True)
result_df = pd.concat([minimal_df, one_hot_df], axis=1)

result_df.to_csv(output_onehot_csv_path, index=False)

print(f"One-hot encoded data saved to {output_onehot_csv_path}")

One-hot encoded data saved to output/timeframe_onehot.csv


## Baseline Model: SVM

In [35]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

frame_csv_path = "output/timeframe_dataset.csv"
df = pd.read_csv(frame_csv_path)

# split to train and test dataset
feature_cols = [f"chroma_{i}" for i in range(12)]

X = df[feature_cols].values
y = df["label"].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# RBF kernel 
svm_model = SVC()
svm_model.fit(X_train_scaled, y_train)

y_pred = svm_model.predict(X_test_scaled)

# print confusion metrics with zero_division fix
print("Classification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.47      0.71      0.57       185
           1       0.50      0.38      0.43        24
           2       0.47      0.37      0.41        19
           3       0.00      0.00      0.00         1
           4       0.00      0.00      0.00         2
           5       0.55      0.68      0.61        31
           6       0.75      0.46      0.57        13
           7       0.00      0.00      0.00         4
           8       0.00      0.00      0.00         1
           9       0.00      0.00      0.00         2
          10       0.59      0.74      0.66       133
          11       0.53      0.47      0.49        45
          12       0.57      0.29      0.38        14
          13       0.00      0.00      0.00         1
          14       0.00      0.00      0.00         1
          15       0.69      0.74      0.71        65
          16       0.00      0.00      0.00         5
    

## Deep Learning Models

### Reorganize timing data

In [55]:
import torch

# auto-select device:
if torch.cuda.is_available():
    device = torch.device("cuda")
    backend = "CUDA"
elif getattr(torch.backends, "mps", None) is not None \
     and torch.backends.mps.is_available():
    device = torch.device("mps")
    backend = "MPS (Apple Silicon)"
else:
    device = torch.device("cpu")
    backend = "CPU"

print(f"Using device: {device}  |  backend: {backend}")

Using device: mps  |  backend: MPS (Apple Silicon)


In [60]:
import numpy as np
import pandas as pd

def build_sequence_tensor(
    frame_df: pd.DataFrame,
    seq_len: int,
    num_feat: int = 12,
    num_classes: int = 24,
    to_torch: bool = False,
    device: str | None = None,
):

    groups = frame_df.groupby("filename", sort=False)
    n_song = len(groups)

    X_seq = np.zeros((n_song, seq_len, num_feat), dtype=np.float32)
    y_seq = np.zeros((n_song, seq_len),        dtype=np.int64)

    for idx, (_, g) in enumerate(groups):
        g = g.sort_values("time")

        x = g[[f"chroma_{i}" for i in range(num_feat)]].to_numpy(dtype=np.float32)
        y = g["label"].to_numpy(dtype=np.int64)

        pad = max(seq_len - len(x), 0)
        X_seq[idx] = np.pad(x, ((0, pad), (0, 0)), mode="constant")[:seq_len]
        y_seq[idx] = np.pad(y, (0, pad), mode="constant")[:seq_len]

    # one-hot via NumPy (no TF)
    eye = np.eye(num_classes, dtype=np.float32)
    y_seq_ohe = eye[y_seq]                    # (N, seq_len, C)

    if to_torch:
        import torch
        X_seq      = torch.tensor(X_seq,      device=device)
        y_seq_ohe  = torch.tensor(y_seq_ohe,  device=device)

    return X_seq, y_seq_ohe

In [61]:
import json, pandas as pd, numpy as np, torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split

# load data
df = pd.read_csv("output/timeframe_dataset.csv")
num_classes = len(json.load(open("output/chord_vocab.json")))

X_seq, y_seq_ohe = build_sequence_tensor(
    frame_df     = df,
    seq_len      = 64,
    num_feat     = 12,
    num_classes  = num_classes,
    to_torch     = True,
    device       = device
)

# train / val split
X_tr, X_te, y_tr, y_te = train_test_split(
    X_seq, y_seq_ohe,
    test_size   = 0.20,
    random_state= 42,
    shuffle     = True
)

y_tr_idx = y_tr.argmax(dim=-1)
y_te_idx = y_te.argmax(dim=-1)


batch_size = 16
train_dl = DataLoader(
    TensorDataset(X_tr.float(), y_tr_idx.long()),
    batch_size = batch_size,
    shuffle    = True
)
val_dl = DataLoader(
    TensorDataset(X_te.float(), y_te_idx.long()),
    batch_size = batch_size,
    shuffle    = False
)

seq_len  = X_tr.size(1)
num_feat = X_tr.size(2) 



### 1. CNN Model

### 2. RNN Model

In [64]:
import os, torch, numpy as np
from torch import nn

# define RNN model
class SimpleRNNModel(nn.Module):
    def __init__(self, n_feat, n_classes, hidden=64):
        super().__init__()
        self.rnn = nn.RNN(
            input_size=n_feat,
            hidden_size=hidden,
            batch_first=True,
            nonlinearity="tanh"
        )
        self.fc = nn.Linear(hidden, n_classes)

    def forward(self, x):                 # x: (B, T, F)
        out, _ = self.rnn(x)              # (B, T, H)
        return self.fc(out)               # (B, T, C)

# init
rnn_model = SimpleRNNModel(num_feat, num_classes).to(device)
criterion  = nn.CrossEntropyLoss()
optimizer  = torch.optim.Adam(rnn_model.parameters(), lr=1e-3)

epochs, patience = 30, 3
best_loss        = np.inf
patience_ctr     = 0

# checkpoint paths
os.makedirs("checkpoints", exist_ok=True)
best_ckpt = "checkpoints/best_rnn.pt"
last_ckpt = "checkpoints/last_rnn.pt"

# training loop
for epoch in range(1, epochs + 1):
    rnn_model.train()
    for xb, yb in train_dl:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        logits = rnn_model(xb)
        loss = criterion(logits.reshape(-1, num_classes), yb.reshape(-1))
        loss.backward()
        optimizer.step()

    # validation
    rnn_model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for xb, yb in val_dl:
            xb, yb = xb.to(device), yb.to(device)
            logits = rnn_model(xb)
            loss = criterion(logits.reshape(-1, num_classes), yb.reshape(-1))
            val_loss += loss.item() * xb.size(0)
    val_loss /= len(val_dl.dataset)

    print(f"Epoch {epoch:02d}  val_loss={val_loss:.4f}")
    torch.save(rnn_model.state_dict(), last_ckpt)            # always save last

    if val_loss < best_loss:                                 # save best
        best_loss = val_loss
        patience_ctr = 0
        torch.save(rnn_model.state_dict(), best_ckpt)
    else:
        patience_ctr += 1
        if patience_ctr >= patience:
            print("Early stopping.\n")
            break

# restore best
ckpt_to_load = best_ckpt if os.path.exists(best_ckpt) else last_ckpt
rnn_model.load_state_dict(torch.load(ckpt_to_load, map_location=device))
print(f"✓ RNN training done!")


Epoch 01  val_loss=3.7251
Epoch 02  val_loss=2.9053
Epoch 03  val_loss=1.8991
Epoch 04  val_loss=1.5197
Epoch 05  val_loss=1.3731
Epoch 06  val_loss=1.2936
Epoch 07  val_loss=1.2431
Epoch 08  val_loss=1.2129
Epoch 09  val_loss=1.1874
Epoch 10  val_loss=1.1680
Epoch 11  val_loss=1.1514
Epoch 12  val_loss=1.1368
Epoch 13  val_loss=1.1238
Epoch 14  val_loss=1.1144
Epoch 15  val_loss=1.1033
Epoch 16  val_loss=1.0957
Epoch 17  val_loss=1.0866
Epoch 18  val_loss=1.0783
Epoch 19  val_loss=1.0758
Epoch 20  val_loss=1.0699
Epoch 21  val_loss=1.0647
Epoch 22  val_loss=1.0608
Epoch 23  val_loss=1.0558
Epoch 24  val_loss=1.0522
Epoch 25  val_loss=1.0462
Epoch 26  val_loss=1.0437
Epoch 27  val_loss=1.0421
Epoch 28  val_loss=1.0386
Epoch 29  val_loss=1.0347
Epoch 30  val_loss=1.0331
✓ RNN training done!


### 3. LSTM Model

In [65]:
import os, torch, numpy as np
from torch import nn

# bidirectional LSTM
class BiLSTMModel(nn.Module):
    def __init__(self, n_feat, n_classes, hidden=64):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=n_feat,
            hidden_size=hidden,
            batch_first=True,
            bidirectional=True
        )
        self.fc = nn.Linear(hidden * 2, n_classes)

    def forward(self, x):                  # x: (B, T, F)
        out, _ = self.lstm(x)              # (B, T, 2H)
        out = self.fc(out)                 # (B, T, C)
        return out

# init
lstm_model = BiLSTMModel(num_feat, num_classes).to(device)
criterion   = nn.CrossEntropyLoss()
optimizer   = torch.optim.Adam(lstm_model.parameters(), lr=1e-3)

epochs, patience = 30, 3
best_loss        = np.inf
patience_ctr     = 0

# checkpoint directory
os.makedirs("checkpoints", exist_ok=True)
best_ckpt  = "checkpoints/best_lstm.pt"
last_ckpt  = "checkpoints/last_lstm.pt"

# training loop
for epoch in range(1, epochs + 1):
    lstm_model.train()
    for xb, yb in train_dl:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        logits = lstm_model(xb)
        loss   = criterion(logits.reshape(-1, num_classes),
                           yb.reshape(-1))
        loss.backward()
        optimizer.step()

    # validation
    lstm_model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for xb, yb in val_dl:
            xb, yb = xb.to(device), yb.to(device)
            logits = lstm_model(xb)
            loss   = criterion(logits.reshape(-1, num_classes),
                               yb.reshape(-1))
            val_loss += loss.item() * xb.size(0)
    val_loss /= len(val_dl.dataset)

    print(f"Epoch {epoch:02d}  val_loss={val_loss:.4f}")

    torch.save(lstm_model.state_dict(), last_ckpt)

    # save best & early stopping
    if val_loss < best_loss:
        best_loss = val_loss
        patience_ctr = 0
        torch.save(lstm_model.state_dict(), best_ckpt)
    else:
        patience_ctr += 1
        if patience_ctr >= patience:
            print("Early stopping.\n")
            break

# load best
ckpt_to_load = best_ckpt if os.path.exists(best_ckpt) else last_ckpt
lstm_model.load_state_dict(torch.load(ckpt_to_load, map_location=device))
print(f"✓ LSTM training done!")


Epoch 01  val_loss=3.8334
Epoch 02  val_loss=3.1944
Epoch 03  val_loss=1.7585
Epoch 04  val_loss=1.4826
Epoch 05  val_loss=1.3627
Epoch 06  val_loss=1.2731
Epoch 07  val_loss=1.2029
Epoch 08  val_loss=1.1482
Epoch 09  val_loss=1.1059
Epoch 10  val_loss=1.0692
Epoch 11  val_loss=1.0438
Epoch 12  val_loss=1.0220
Epoch 13  val_loss=1.0012
Epoch 14  val_loss=0.9850
Epoch 15  val_loss=0.9718
Epoch 16  val_loss=0.9584
Epoch 17  val_loss=0.9491
Epoch 18  val_loss=0.9406
Epoch 19  val_loss=0.9336
Epoch 20  val_loss=0.9268
Epoch 21  val_loss=0.9210
Epoch 22  val_loss=0.9158
Epoch 23  val_loss=0.9107
Epoch 24  val_loss=0.9079
Epoch 25  val_loss=0.9022
Epoch 26  val_loss=0.9010
Epoch 27  val_loss=0.8968
Epoch 28  val_loss=0.8933
Epoch 29  val_loss=0.8929
Epoch 30  val_loss=0.8916
✓ LSTM training done!


### 4. CNN + LSTM Model

## Evaluation

In [68]:
def predict_np(model, X_np, batch_size=128):
    model.eval()
    out = []
    with torch.no_grad():
        for i in range(0, len(X_np), batch_size):
            xb = torch.tensor(X_np[i:i+batch_size], dtype=torch.float32, device=device)
            logits = model(xb)                # (B, T, C)
            prob   = torch.softmax(logits, -1)
            out.append(prob.cpu().numpy())
    return np.concatenate(out, axis=0)

rnn_probs  = predict_np(rnn_model,  X_te)
lstm_probs = predict_np(lstm_model, X_te)


  xb = torch.tensor(X_np[i:i+batch_size], dtype=torch.float32, device=device)


In [None]:
# import json, numpy as np
# import torch, mir_eval

# # helper
# def _to_numpy(x):
#     if torch.is_tensor(x):
#         return x.detach().cpu().numpy()
#     return np.asarray(x)


# def make_ints_to_chords(vocab_path="output/chord_vocab.json"):
#     with open(vocab_path, "r") as f:
#         chord_to_idx = json.load(f)

#     idx_to_chord = {int(idx): chord for chord, idx in chord_to_idx.items()}

#     def ints_to_chords(arr_like):
#         arr = np.asarray(arr_like).astype(int).flatten()
#         return [idx_to_chord[i] for i in arr]

#     return ints_to_chords

# ints_to_chords = make_ints_to_chords("output/chord_vocab.json")



# def evaluate_chord_predictions(name, y_pred_prob, y_true_ohe, frame_rate=1.0):
#     """
#     name : Identifier printed in the header.
#     y_pred_prob : (N, seq_len, C) torch.Tensor | ndarray
#         Predicted probabilities / logits.
#     y_true_ohe  : (N, seq_len, C) torch.Tensor | ndarray
#         One-hot ground-truth labels.
#     frame_rate : float, optional
#         Frames per second (interval length = 1/frame_rate sec).
#     """
#     # to numpy (CPU)
#     y_pred_prob = _to_numpy(y_pred_prob)
#     y_true_ohe  = _to_numpy(y_true_ohe)

#     # flatten to 1-D vectors of class indices
#     y_pred_int = y_pred_prob.argmax(-1).flatten()
#     y_true_int = y_true_ohe.argmax(-1).flatten()

#     # map int to mir_eval chord strings 
#     est_labels = ints_to_chords(y_pred_int)
#     ref_labels = ints_to_chords(y_true_int)

#     n = len(y_true_int)
#     intervals = np.column_stack([np.arange(n)/frame_rate,
#                                  (np.arange(n)+1)/frame_rate])

#     result = mir_eval.chord.evaluate(
#         intervals, ref_labels, intervals, est_labels
#     )

#     # normalize output (names, scores)
#     if isinstance(result, dict):
#         score_names = list(result.keys())
#         scores      = list(result.values())

#     elif isinstance(result, (list, tuple)) and len(result) == 2:
#         a, b = result
#         score_names, scores = (a, b) if isinstance(a[0], str) else (b, a)

#     else:
#         scores = list(result)
#         score_names = ["root", "majmin", "thirds",
#                        "triads", "sevenths", "tetrads", "mirex"][:len(scores)]

#     print(f"\n=== {name} ===")
#     for nm, sc in zip(score_names, scores):
#         print(f"{nm:>10}: {sc:.4f}")

#     frame_acc = (y_pred_int == y_true_int).mean()
#     print(f"{'frame_acc':>10}: {frame_acc:.4f}")


# evaluate_chord_predictions("RNN Model",  rnn_probs,  y_te)
# evaluate_chord_predictions("LSTM Model", lstm_probs, y_te)


InvalidChordException: Invalid chord label: D Minor