In [None]:
import os
import sys
import warnings

import numpy as np
import tensorflow as tf
import keras
import madmom
import mirdata
import librosa
import random
from sklearn.model_selection import KFold
import json
import pickle
import librosa.display

from scipy.ndimage import maximum_filter1d
from scipy.interpolate import interp1d
from scipy.signal import argrelmax

import matplotlib.pyplot as plt

In [None]:
gtzan = mirdata.initialize('gtzan_genre', data_home="./dataset/gtzan_genre")
# gtzan.download()
print("GTZAN", len(gtzan.track_ids))

ballroom = mirdata.initialize('ballroom', data_home="./dataset/ballroom")
# ballroom.download()
print("BALLROOM", len(ballroom.track_ids))

hainsworth = mirdata.initialize('hainsworth', data_home="./dataset/hainsworth/H_1.0")
# hainsworth.download()
print("HAINSWORTH", len(hainsworth.track_ids))

giantsteps = mirdata.initialize('giantsteps_tempo', data_home="./dataset/giantsteps_tempo")
# giantsteps.download()
print("GIANTSTEPS", len(giantsteps.track_ids))

smc = mirdata.initialize('smc', data_home="./project/dataset/smc")
# giantsteps.download()
print("SMC", len(smc.track_ids))

carnatic = mirdata.initialize('carnatic', data_home="./dataset/carnatic")
# carnatic.download()
print("CARNATIC", len(carnatic.track_ids))

harmonix = mirdata.initialize('harmonix', data_home="./dataset/harmonix")
# harmonix.download()
print("HARMONIX", len(harmonix.track_ids))

In [None]:
tracks = {
    "gtzan_genre": gtzan.load_tracks(),
    # "ballroom": ballroom.load_tracks(),
    # "hainsworth": hainsworth.load_tracks(),
    # "smc": smc.load_tracks(),
    # "giantsteps_tempo" : giantsteps.load_tracks(),
    # "carnatic": carnatic.load_tracks(),
    # "harmonix": harmonix.load_tracks()
}

# Creare fold-uri (8 fold cross validation)
- dupa se creaza DataSequence urilor pe baza lor

In [None]:
combined_ids = []
for name, tks in tracks.items():
    combined_ids.extend([(name, tid) for tid in tks.keys()])

random.seed(42)
random.shuffle(combined_ids) # se amesteca

k = 8
kf = KFold(n_splits=k, shuffle=True, random_state=42)

folds = []
for train_idx, test_idx in kf.split(combined_ids):
    train = [combined_ids[i] for i in train_idx]
    test = [combined_ids[i] for i in test_idx]
    folds.append({"train": train, "test": test})

# schimbam in functie de preferinte numele fisierului
with open("folds.json", "w") as f:
    json.dump(folds, f)

print("Fold-urile au fost salvate")

# Plotarea distributilor pe fold-uri

In [None]:
import json
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from collections import Counter

with open("folds.json") as f:
    folds = json.load(f)

all_datasets = sorted({ds for fold in folds for ds, _ in fold["train"] + fold["test"]})
colors = plt.cm.get_cmap("tab10", len(all_datasets))
dataset_colors = {ds: colors(i) for i, ds in enumerate(all_datasets)}

# pentru distibutia foldurilor pe dataset
def plot_stacked_bar(folds, split_type, filename):
    dataset_matrix = []
    for fold in folds:
        counter = Counter([ds for ds, _ in fold[split_type]])
        dataset_matrix.append([counter[ds] for ds in all_datasets])

    dataset_matrix = list(zip(*dataset_matrix))

    fig, ax = plt.subplots(figsize=(10, 6))
    bottom = [0] * len(folds)
    for i, ds in enumerate(all_datasets):
        ax.bar(
            [f"Fold {j+1}" for j in range(len(folds))],
            dataset_matrix[i],
            label=ds,
            color=dataset_colors[ds],
            bottom=bottom
        )
        bottom = [bottom[j] + dataset_matrix[i][j] for j in range(len(folds))]

    ax.set_title(f"Distributia dataset-urilor în cele 8 folduri ({split_type} seturi)")
    ax.set_ylabel(f"Numar trackuri în {split_type} set")
    ax.legend(title="Dataset")
    plt.tight_layout()
    plt.savefig(filename)
    plt.close()

# pentru distibutia foldurilor pe dataset dar cu tot cu ordinea lor
def plot_order_matrix(folds, split_type, filename):
    max_len = max(len(fold[split_type]) for fold in folds)
    color_matrix = []

    for fold in folds:
        row = [dataset_colors[ds] for ds, _ in fold[split_type]]
        row += [(1, 1, 1, 1)] * (max_len - len(row)) 
        color_matrix.append(row)

    fig, ax = plt.subplots(figsize=(14, 6))
    ax.imshow(color_matrix, aspect="auto")
    ax.set_title(f"Vizualizare colorata a ordinii pieselor in fiecare {split_type} fold")
    ax.set_ylabel("Fold")
    ax.set_xlabel(f"Pozitia piesei în {split_type} set")
    ax.set_yticks(range(len(folds)))
    ax.set_yticklabels([f"Fold {i+1}" for i in range(len(folds))])
    ax.set_xticks([])

    patches = [mpatches.Patch(color=dataset_colors[ds], label=ds) for ds in all_datasets]
    ax.legend(handles=patches, title="Dataset", bbox_to_anchor=(1.02, 1), loc="upper left")

    plt.tight_layout()
    plt.savefig(filename)
    plt.close()

plot_stacked_bar(folds, "test", "distributie_test1.png")
plot_stacked_bar(folds, "train", "distributie_train1.png")
plot_order_matrix(folds, "test", "ordine_test1.png")
plot_order_matrix(folds, "train", "ordine_train1.png")

print("Imagini salvate")
