In [1]:
import numpy as np
import matplotlib.pyplot as plt
import mirdata
from tensorflow.keras.utils import to_categorical
from tqdm import tqdm

ModuleNotFoundError: No module named 'compiam'

## Load dataset with mirdata dataset loader

In [None]:
data_home = 'dataset'
dataset_name = 'compmusic_raga'

In [41]:
dataset = mirdata.initialize(dataset_name, data_home=data_home)

In [42]:
#dataset.download()

In [43]:
data = dataset.load_tracks()

## Preprocess data

In [49]:
X_srutis = list()
X_counts = list()
y = list()

window_length = 1024
hop_length = 512

min_sruti = int(np.floor(24 * np.log2(1 / 2)))
max_sruti = int(np.ceil(24 * np.log2(4)))
all_srutis = np.arange(min_sruti, max_sruti + 1)

for track_id, track in tqdm(data.items()):
    # Skip hindustani
    if track.tradition != 'carnatic':
        continue

    # Get annotations
    tonic = track.tonic_fine_tuned
    pitch = track.pitch_post_processed
    frequencies = pitch.frequencies

    # Get ragam
    raga = track.raga

    # process frequencies
    frequencies = frequencies[frequencies != 0]
    frequencies = frequencies[(frequencies >= tonic / 2) & (frequencies <= tonic * 4)]

    for i in range(0, len(frequencies) - window_length, hop_length):
        curr_frequencies = frequencies[i: i + window_length]

        # convert to srutis
        srutis = np.round(24 * np.log2(curr_frequencies / tonic))

        # Compute sruti counts
        unique_srutis, counts = np.unique(srutis, return_counts=True)
        sruti_counts = dict(zip(unique_srutis, counts))
        full_sruti_counts = np.array(list({sruti: sruti_counts.get(sruti, 0) for sruti in all_srutis}.values()))

        X_srutis.append(srutis)
        X_counts.append(full_sruti_counts)
        y.append(raga)

X_srutis = np.array(X_srutis)
X_counts = np.array(X_counts)
y = np.array(y)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 477/477 [00:12<00:00, 39.30it/s]


## Convert labels to one hot encodings

In [51]:
all_ragas = list(set(y))
len(all_ragas)

for i in range(len(y)):
    y[i] = all_ragas.index(y[i])

y = to_categorical(y, num_classes=len(all_ragas))
all_ragas = np.array(all_ragas)

## Save dataset

In [55]:
np.save('dataset/cents.npy', X_cents)
np.save('dataset/counts.npy', X_counts)
np.save('dataset/labels.npy', y)
np.save('dataset/ragas.npy', all_ragas)