In [1]:
# %pip install essentia
# %pip install librosa
# %pip install yt_dlp

In [2]:
import librosa
import essentia.standard as es
import numpy as np
import pandas as pd
import os

[   INFO   ] MusicExtractorSVM: no classifier models were configured by default


In [3]:
""" TEMPO (BPM) """
def get_tempo(y, sr):
    # Estimar el tempo
    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
    # El resultado 'tempo' es un número flotante, p. ej., 129.199
    print(tempo)
    return tempo

""" KEY (Tonalidad) """
def get_key(audio):
    key_extractor = es.KeyExtractor()
    key, scale, strength = key_extractor(audio)
    
    return(key, scale) # 'key' será la tónica (p. ej., "C") y 'scale' la modalidad (p. ej., "major")

""" LOUDNESS (Volumen) """

def get_loudness(audio):
    loudness = es.Loudness()(audio)
    return loudness

""" Time Signature (compás) """

def estimate_time_signature_value(y, sr):
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
    n_beats = len(beats)

    # Heurística simple para videojuegos
    # (solo interesa una aproximación numérica)
    if tempo < 70:
        return 3   # suave o vals
    elif tempo < 100:
        return 2   # binario, marchas
    else:
        return 4   # casi todo lo demás (loops, combate, etc.)
        
""" ACTUALIZACIÓN DE CARACTERISTICAS """

def get_mfccs(y, sr, n_mfcc=13):
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return {
        'mfcc_mean': np.mean(mfccs, axis=1),
        'mfcc_std': np.std(mfccs, axis=1)
    }

# (relacionado con el "brillo" del sonido)
def get_spectral_centroid(y, sr):
    centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    return np.mean(centroid)

# (diferencia entre picos y valles)
def get_spectral_contrast(y, sr):
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    return np.mean(contrast, axis=1)

# Zero Crossing Rate (indica percusividad/agresividad)
def get_zcr(y):
    zcr = librosa.feature.zero_crossing_rate(y)
    return np.mean(zcr)

# RMS Energy (energía/intensidad percibida)
def get_rms_energy(y):
    rms = librosa.feature.rms(y=y)
    return np.mean(rms)

# Spectral Rolloff (punto donde 85% de la energía espectral está contenida)
def get_spectral_rolloff(y, sr):
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    return np.mean(rolloff)

# Chroma Features (perfil armónico, útil para comparar armonías entre juegos)
def get_chroma(y, sr):
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    return np.mean(chroma, axis=1)

# Harmonic-Percussive Separation
def get_harmonic_percussive_ratio(y):
    y_harmonic, y_percussive = librosa.effects.hpss(y)
    harmonic_energy = np.sum(y_harmonic**2)
    percussive_energy = np.sum(y_percussive**2)
    return harmonic_energy / (percussive_energy + 1e-6)

# Onset Strength (cuán marcados son los ataques/inicios)
def get_onset_strength(y, sr):
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    return np.mean(onset_env)

# Tempogram (variabilidad temporal)
def get_tempo_stability(y, sr):
    tempogram = librosa.feature.tempogram(y=y, sr=sr)
    return np.std(tempogram)

# Estima qué tan “bailable” es un audio. Este algoritmo se basa en Detrended Fluctuation Analysis (DFA)
def get_danceability(audio):
    danceability = es.Danceability()(audio)
    return danceability

# Dissonance (tensión armónica)
# Dissonance (tensión armónica)
def get_dissonance(audio):
    # Primero extraer el espectro
    spectrum = es.Spectrum()(audio)
    # Luego calcular las frecuencias y magnitudes de los picos espectrales
    spectral_peaks = es.SpectralPeaks()
    frequencies, magnitudes = spectral_peaks(spectrum)
    # Finalmente calcular la disonancia
    dissonance = es.Dissonance()(frequencies, magnitudes)
    return dissonance

# Dynamic Complexity
def get_dynamic_complexity(audio):
    dynamic_complexity = es.DynamicComplexity()(audio)
    return dynamic_complexity

In [4]:
import pandas as pd
import os


audio_files = ["audios/27. It’s TV Time! (DELTARUNE Chapter 3+4 Soundtrack) - Toby Fox [F2PJbTuZlTU].mp3", "audios/39. BIG SHOT (DELTARUNE Chapter 2 Soundtrack) - Toby Fox [V31PVkwzpEY].mp3"] 
all_features = []

for audio_path in audio_files:
    # Cargar audios
    y, sr = librosa.load(audio_path, sr=None)
    audio_essentia = es.MonoLoader(filename=audio_path)()
    
    # Extraer todas las features
    tempo = get_tempo(y, sr)
    key, scale = get_key(audio_essentia)
    loudness = get_loudness(audio_essentia)
    time_signature = estimate_time_signature_value(y, sr)

    # Nuevas features - Librosa
    mfccs = get_mfccs(y, sr)
    spectral_centroid = get_spectral_centroid(y, sr)
    spectral_contrast = get_spectral_contrast(y, sr)
    zcr = get_zcr(y)
    rms_energy = get_rms_energy(y)
    spectral_rolloff = get_spectral_rolloff(y, sr)
    chroma = get_chroma(y, sr)
    harmonic_percussive_ratio = get_harmonic_percussive_ratio(y)
    onset_strength = get_onset_strength(y, sr)
    tempo_stability = get_tempo_stability(y, sr)
    
    # Nuevas features - Essentia
    danceability = get_danceability(audio_essentia)
    dissonance = get_dissonance(audio_essentia)
    dynamic_complexity = get_dynamic_complexity(audio_essentia)
    
    # Crear un diccionario para esta canción
    features_dict = {
        'filename': os.path.basename(audio_path),
        'tempo': tempo,
        'key': key,
        'scale': scale,
        'loudness': loudness,
        'time_signature': time_signature,
        'spectral_centroid': spectral_centroid,
        'zcr': zcr,
        'rms_energy': rms_energy,
        'spectral_rolloff': spectral_rolloff,
        'harmonic_percussive_ratio': harmonic_percussive_ratio,
        'onset_strength': onset_strength,
        'tempo_stability': tempo_stability,
        'danceability': danceability,
        'dissonance': dissonance,
        'dynamic_complexity': dynamic_complexity
    }
    
    all_features.append(features_dict)

# 2. Convertir la lista de diccionarios a un DataFrame
features_df = pd.DataFrame(all_features)

# 3. Guardar el DataFrame
print(features_df)

[148.02631579]
[140.625]
                                            filename                 tempo  \
0  27. It’s TV Time! (DELTARUNE Chapter 3+4 Sound...  [148.02631578947367]   
1  39. BIG SHOT (DELTARUNE Chapter 2 Soundtrack) ...             [140.625]   

  key  scale     loudness  time_signature  spectral_centroid       zcr  \
0   G  minor  4617.086426               4        2326.078571  0.044313   
1  Bb  minor  5995.484375               4        2948.470503  0.048551   

   rms_energy  spectral_rolloff  harmonic_percussive_ratio  onset_strength  \
0    0.184120       5054.073430                   2.020468        1.109881   
1    0.241555       6680.504671                   7.715598        1.051078   

   tempo_stability                                       danceability  \
0         0.217257  (1.1956698894500732, [0.50086564, 0.4832648, 0...   
1         0.285441  (1.1372249126434326, [0.58138096, 0.5025935, 0...   

   dissonance                         dynamic_complexity  
0  