---

# Feature Extraction

---
### Pitch Histogram
- Serves as an indicator of **vadi**, **samvadi**, and **vivadi** swaras.
- Also indicates whether a particular **svara** is **tivra**, **shuddha**, or **komal**.

### Pitch Gradient
The pitch gradient serves as an indicator of whether a particular svara occurs in the **avarohana**, **arohana**, or both within a **raga**.
                                                                                
### Pitch Standard Deviation
The pitch standard deviation serves as an indicator of **svaras** that exhibit **gamakas** (ornamentations).

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import mirdata
from tensorflow.keras.utils import to_categorical
from tqdm import tqdm

2025-04-11 09:25:37.588166: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-11 09:25:37.605357: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744356337.619478   11291 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744356337.623930   11291 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1744356337.638254   11291 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

## Load dataset with mirdata dataset loader

In [2]:
data_home = '../dataset'
dataset_name = 'compmusic_raga'

In [None]:
# Initialize the dataset
dataset = mirdata.initialize(dataset_name, data_home=data_home)

In [None]:
#dataset.download(force_overwrite=True)

In [None]:
# Load the dataset
data = dataset.load_tracks()

In [None]:
# Check dataset validation and completeness
validation = dataset.validate()

100%|██████████| 477/477 [12:34<00:00,  1.58s/it]
INFO: Success: the dataset is complete and all files are valid.
INFO: --------------------


Dataset validation results: ({'tracks': {}}, {'tracks': {}})


## Preprocess data

In [7]:
X_srutis = list()
X_counts = list()
X_slopes = list()
X_stds = list()
y = list()

window_length = 4096
hop_length = 2048

min_sruti = int(np.floor(24 * np.log2(1 / 2)))
max_sruti = int(np.ceil(24 * np.log2(4)))
all_srutis = np.arange(min_sruti, max_sruti + 1)

for track_id, track in tqdm(data.items()):
    # Skip hindustani
    if track.tradition != 'carnatic':
        continue

    # Get annotations
    tonic = track.tonic_fine_tuned
    pitch = track.pitch_post_processed
    frequencies = pitch.frequencies

    # Get ragam
    raga = track.raga

    # process frequencies
    frequencies = frequencies[frequencies != 0]
    frequencies = frequencies[(frequencies >= tonic / 2) & (frequencies <= tonic * 4)]

    for i in range(hop_length, len(frequencies) - window_length, hop_length):
        curr_frequencies = frequencies[i: i + window_length]

        # convert to srutis
        srutis = 24 * np.log2(curr_frequencies / tonic)
        
        #smoothed_srutis = savgol_filter(srutis, window_length=7, polyorder=3)
        srutis = np.round(srutis)

        # Compute sruti counts
        unique_srutis, counts = np.unique(srutis, return_counts=True)
        sruti_counts = dict(zip(unique_srutis, counts))
        full_sruti_counts = np.array(list({sruti: sruti_counts.get(sruti, 0) for sruti in all_srutis}.values()))
        full_sruti_counts = full_sruti_counts / np.max(full_sruti_counts)

        # Compute slopes of the pitch track
        slopes = np.zeros_like(srutis)
        dt = 5
        for i in range(dt, len(srutis)):
            slopes[i] = (srutis[i] - srutis[i - dt]) / dt

        # Get slopes of every sruti
        slopes_mapping = dict()
        for sruti, slope in zip(srutis, slopes):
            if sruti not in slopes_mapping:
                slopes_mapping[sruti] = []
            slopes_mapping[sruti].append(slope)
        for sruti in slopes_mapping:
            slopes_mapping[sruti] = np.mean(slopes_mapping[sruti])
        full_slopes = np.array(list({sruti: slopes_mapping.get(sruti, 0) for sruti in all_srutis}.values()))

        # Compute standard deviations of the pitch track
        stds = np.zeros_like(srutis)
        dt = 32
        for i in range(dt, len(srutis)):
            stds[i] = np.std(srutis[i - dt:i])

        # Get standard deviations of every sruti
        std_mapping = dict()
        for sruti, std in zip(srutis, stds):
            if sruti not in std_mapping:
                std_mapping[sruti] = []
            std_mapping[sruti].append(std)
        for sruti in std_mapping:
            std_mapping[sruti] = np.mean(std_mapping[sruti])
        full_stds = np.array(list({sruti: std_mapping.get(sruti, 0) for sruti in all_srutis}.values()))

        X_srutis.append(srutis)
        X_counts.append(full_sruti_counts)
        X_slopes.append(full_slopes)
        X_stds.append(full_stds)
        y.append(raga)

X_srutis = np.array(X_srutis)
X_counts = np.array(X_counts)
X_slopes = np.array(X_slopes)
X_stds = np.array(X_stds)
y = np.array(y)

100%|██████████| 477/477 [29:16<00:00,  3.68s/it] 


## Convert labels to one hot encodings

In [8]:
all_ragas = list(set(y))
len(all_ragas)

for i in range(len(y)):
    y[i] = all_ragas.index(y[i])

y = to_categorical(y, num_classes=len(all_ragas))
all_ragas = np.array(all_ragas)

## Save dataset

In [9]:
np.save('../dataset/srutis.npy', X_srutis)
np.save('../dataset/counts.npy', X_counts)
np.save('../dataset/slopes.npy', X_slopes)
np.save('../dataset/stds.npy', X_slopes)
np.save('../dataset/labels.npy', y)
np.save('../dataset/mappings.npy', all_ragas)