<a href="https://colab.research.google.com/github/ym001/distancia/blob/master/notebook/soundDistance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install distancia==0.0.48



In [7]:
from distancia import SpectralConvergence

signal1 = [0.5, 0.1, 0.2, 0.4, 0.3, 0.2, 0.1, 0.0]
signal2 = [0.4, 0.2, 0.2, 0.5, 0.3, 0.1, 0.2, 0.0]

convergence = SpectralConvergence().compute(signal1, signal2)
print(f"Spectral Convergence: {convergence}")


Spectral Convergence: 0.14673244459294343


In [9]:
import math
from typing import List
import librosa
from distancia import MFCCProcessor

import math

def generate_test_signals(duration: float = 1.0, sample_rate: int = 16000) -> tuple[list[float], list[float]]:
    """
    Génère deux signaux audio de test.

    Args:
        duration (float): Durée du signal en secondes. Par défaut 1.0 seconde.
        sample_rate (int): Taux d'échantillonnage en Hz. Par défaut 16000 Hz.

    Returns:
        tuple[list[float], list[float]]: Deux signaux audio de test.
    """
    num_samples = int(duration * sample_rate)

    # Signal 1: Combinaison de deux sinusoïdes (440 Hz et 880 Hz)
    signal1 = [
        0.5 * math.sin(2 * math.pi * 440 * t / sample_rate) +
        0.3 * math.sin(2 * math.pi * 880 * t / sample_rate)
        for t in range(num_samples)
    ]

    # Signal 2: Combinaison de trois sinusoïdes (330 Hz, 660 Hz et 990 Hz)
    signal2 = [
        0.4 * math.sin(2 * math.pi * 330 * t / sample_rate) +
        0.3 * math.sin(2 * math.pi * 660 * t / sample_rate) +
        0.2 * math.sin(2 * math.pi * 990 * t / sample_rate)
        for t in range(num_samples)
    ]

    return signal1, signal2

# Générer les signaux de test
test_signal1, test_signal2 = generate_test_signals()

# Afficher les 10 premiers échantillons de chaque signal
print("10 premiers échantillons du signal 1:", test_signal1[:10])
print("10 premiers échantillons du signal 2:", test_signal2[:10])

# Informations sur les signaux
print(f"Nombre d'échantillons dans chaque signal: {len(test_signal1)}")
print(f"Fréquence d'échantillonnage: 16000 Hz")
print(f"Durée de chaque signal: 1.0 seconde")


# Créer une instance de MFCCProcessor
processor = MFCCProcessor()

# Calculer les MFCC pour les deux signaux
mfcc1, mfcc2 = processor.compute_mfcc(test_signal1, test_signal2)

# Comparer les MFCC
distance = processor.compare_mfcc(test_signal1, test_signal2)

print(f"Nombre de trames MFCC pour chaque signal: {len(mfcc1)}")
print(f"Nombre de coefficients MFCC par trame: {len(mfcc1[0])}")
print(f"Distance moyenne entre les MFCC des deux signaux: {distance}")

# Afficher les premiers coefficients MFCC de la première trame pour chaque signal
print("Premiers coefficients MFCC du signal 1:", mfcc1[0][:5])
print("Premiers coefficients MFCC du signal 2:", mfcc2[0][:5])


10 premiers échantillons du signal 1: [0.0, 0.1875859262132922, 0.3605961570472526, 0.5059519423173868, 0.6133981700929515, 0.6765094849785568, 0.6932630175151309, 0.6661155750120292, 0.6015809911697477, 0.5093615439642688]
10 premiers échantillons du signal 2: [0.0, 0.20438860983483298, 0.39145963493709207, 0.5459418258086269, 0.6563814019704781, 0.7164019346692834, 0.7252895158905303, 0.687835184778397, 0.6134709666303622, 0.5148340607737458]
Nombre d'échantillons dans chaque signal: 16000
Fréquence d'échantillonnage: 16000 Hz
Durée de chaque signal: 1.0 seconde
Nombre de trames MFCC pour chaque signal: 14
Nombre de coefficients MFCC par trame: 13
Distance moyenne entre les MFCC des deux signaux: 20.184593753832043
Premiers coefficients MFCC du signal 1: [15.923779200605054, 9.83322863572527, -3.845832597620646, -13.142400019412856, -11.848251033422947]
Premiers coefficients MFCC du signal 2: [19.99216500253236, 12.156240075081715, -4.210556501678201, -14.207220023885315, -11.0816032

In [11]:
from distancia import PowerSpectralDensityDistance

signal1 = [0.1 * math.sin(2 * math.pi * 440 * t / 16000) for t in range(16000)]
signal2 = [0.1 * math.sin(2 * math.pi * 880 * t / 16000) for t in range(16000)]

psd_calculator = PowerSpectralDensityDistance(sample_rate=16000)

psd_distance = psd_calculator.compute_psd_distance(signal1, signal2)

print("PSD Distance:", psd_distance)


PSD Distance: 513514.8559149296


In [13]:
from distancia import CrossCorrelation

signal1: List[float] = [0.1 * math.sin(2 * math.pi * 440 * t / 16000) for t in range(16000)]
signal2: List[float] = [0.1 * math.sin(2 * math.pi * 880 * t / 16000) for t in range(16000)]

cross_corr_calculator = CrossCorrelation(sample_rate=16000)

cross_corr_value: float = cross_corr_calculator.compute_cross_correlation(signal1, signal2)

print("Cross-correlation:", cross_corr_value)


Cross-correlation: 1.1540925550293483e-15


In [15]:
from distancia import PhaseDifferenceCalculator
# Paramètres
sample_rate: int = 44100  # Hz
window_size: int = 1024   # échantillons
hop_size: int = 512       # échantillons

# Créer une instance du calculateur
calculator: PhaseDifferenceCalculator = PhaseDifferenceCalculator(sample_rate, window_size, hop_size)

# Supposons que nous ayons deux signaux signal1 et signal2
signal1: List[float] = [0.1 * math.sin(2 * math.pi * 440 * t / 16000) for t in range(16000)]
signal2: List[float] = [0.1 * math.sin(2 * math.pi * 880 * t / 16000) for t in range(16000)]

# Analyser les signaux
phase_differences: List[float]
time_axis: List[float]
phase_differences, time_axis = calculator.analyze_signals(signal1, signal2)

# Afficher les résultats
print("Différences de phase:", phase_differences[:10])  # Affiche les 10 premières valeurs
print("Axe temporel:", time_axis[:10])  # Affiche les 10 premières valeurs


Différences de phase: [-2.4222941361062844e-13, -3.913015744760884e-14, -8.194330630706403e-13, -0.0030679615760171794, -0.003067961575875389, -0.0030679615775843837, -0.0030679615747780456, -0.003067961575879634, -0.0030679615761320207, 1.4784289244229587e-12]
Axe temporel: [0.0]


In [17]:
from distancia import TimeLagDistance

signal1: List[float] = [0.1 * math.sin(2 * math.pi * 440 * t / 16000) for t in range(16000)]
signal2: List[float] = [0.1 * math.sin(2 * math.pi * 440 * (t - 100) / 16000) for t in range(16000)]  # signal2 is shifted

time_lag_calculator = TimeLagDistance(sample_rate=16000)

best_lag: int = time_lag_calculator.compute_time_lag_distance(signal1, signal2, max_lag=500)

print("Optimal time lag:", best_lag)


Optimal time lag: 9


In [19]:
from distancia import PESQ

reference_signal: List[float] = [0.1 * math.sin(2 * math.pi * 440 * t / 16000) for t in range(16000)]
degraded_signal: List[float] = [0.1 * math.sin(2 * math.pi * 440 * t / 16000) + 0.01 * math.sin(2 * math.pi * 1000 * t / 16000) for t in range(16000)]

pesq_calculator = PESQ(sample_rate=16000)

pesq_score: float = pesq_calculator.compute_pesq(reference_signal, degraded_signal)

print("PESQ Score:", pesq_score)


PESQ Score: 4.492250820344904


In [21]:
from distancia import LogSpectralDistance

signal1: List[float] = [0.1 * math.sin(2 * math.pi * 440 * t / 16000) for t in range(16000)]
signal2: List[float] = [0.1 * math.sin(2 * math.pi * 450 * t / 16000) for t in range(16000)]  # Slightly different frequency

lsd_calculator = LogSpectralDistance(sample_rate=16000)

lsd_value: float = lsd_calculator.compute_lsd(signal1, signal2)

print("Log Spectral Distance:", lsd_value)


Log Spectral Distance: 14.377329255918537


In [23]:
from distancia import BarkSpectralDistortion

signal1: List[float] = [0.1 * math.sin(2 * math.pi * 440 * t / 16000) for t in range(16000)]
signal2: List[float] = [0.1 * math.sin(2 * math.pi * 445 * t / 16000) for t in range(16000)]  # Slightly different frequency

bsd_calculator = BarkSpectralDistortion(sample_rate=16000)

bsd_value: float = bsd_calculator.compute_bsd(signal1, signal2)

print("Bark Spectral Distortion:", bsd_value)


Bark Spectral Distortion: 14.65614015181136


In [25]:
from distancia import ItakuraSaitoDistance

# Example usage:

signal1: List[float] = [0.1 * math.sin(2 * math.pi * 440 * t / 16000) for t in range(16000)]
signal2: List[float] = [0.1 * math.sin(2 * math.pi * 445 * t / 16000) for t in range(16000)]  # Slightly different frequency

isd_calculator = ItakuraSaitoDistance()

isd_value: float = isd_calculator.compute_is_distance(signal1, signal2)

print("Itakura-Saito Distance:", isd_value)



Itakura-Saito Distance: 6386946.368221848


In [27]:
from distancia import SignalToNoiseRatio
# Example usage:

signal: List[float] = [0.1 * math.sin(2 * math.pi * 440 * t / 16000) for t in range(16000)]
noise: List[float] = [0.01 * math.sin(2 * math.pi * 440 * t / 16000) for t in range(16000)]  # Slight noise

snr_calculator = SignalToNoiseRatio()

snr_value: float = snr_calculator.compute_snr(signal, noise)

print("Signal-to-Noise Ratio (SNR):", snr_value)


Signal-to-Noise Ratio (SNR): 19.999999999999893


In [29]:
from distancia import PeakSignalToNoiseRatio

# Example usage:

signal1: List[float] = [0.1 * math.sin(2 * math.pi * 440 * t / 16000) for t in range(16000)]
signal2: List[float] = [0.1 * math.sin(2 * math.pi * 445 * t / 16000) for t in range(16000)]  # Slightly different frequency

max_signal_value: float = 1.0  # Maximum possible value for a normalized signal

psnr_calculator = PeakSignalToNoiseRatio()

psnr_value: float = psnr_calculator.compute_psnr(signal1, signal2, max_signal_value)

print("Peak Signal-to-Noise Ratio (PSNR):", psnr_value)


Peak Signal-to-Noise Ratio (PSNR): 19.999999999999936


In [30]:
from distancia import EnergyDistance

# Example usage:

signal1: List[float] = [0.1 * math.sin(2 * math.pi * 440 * t / 16000) for t in range(16000)]
signal2: List[float] = [0.1 * math.sin(2 * math.pi * 445 * t / 16000) for t in range(16000)]  # Slightly different frequency

energy_distance_calculator = EnergyDistance()

energy_distance_value: float = energy_distance_calculator.compute_energy_distance(signal1, signal2)

print("Energy Distance:", energy_distance_value)


Energy Distance: 9.805489753489383e-13


In [32]:
from distancia import EnvelopeCorrelation

# Example usage:

signal1: List[float] = [0.1 * math.sin(2 * math.pi * 440 * t / 16000) for t in range(16000)]
signal2: List[float] = [0.1 * math.sin(2 * math.pi * 445 * t / 16000) for t in range(16000)]  # Slightly different frequency

envelope_correlation_calculator = EnvelopeCorrelation()

correlation_value: float = envelope_correlation_calculator.compute_envelope_correlation(signal1, signal2)

print("Envelope Correlation:", correlation_value)


Envelope Correlation: 0.0006076026733088895


In [34]:
from distancia import ZeroCrossingRateDistance

# Example usage:

signal1: List[float] = [0.1 * math.sin(2 * math.pi * 440 * t / 16000) for t in range(16000)]
signal2: List[float] = [0.1 * math.sin(2 * math.pi * 445 * t / 16000) for t in range(16000)]  # Slightly different frequency

zcr_calculator = ZeroCrossingRateDistance()

zcr_distance_value: float = zcr_calculator.compute_zcr_distance(signal1, signal2)

print("Zero-Crossing Rate (ZCR) Distance:", zcr_distance_value)


Zero-Crossing Rate (ZCR) Distance: 0.0006250000000000006


In [36]:
from distancia import CochleagramDistance
# Example usage:

signal1: List[float] = [0.1 * math.sin(2 * math.pi * 440 * t / 16000) for t in range(16000)]
signal2: List[float] = [0.1 * math.sin(2 * math.pi * 445 * t / 16000) for t in range(16000)]  # Slightly different frequency

cochleagram_calculator = CochleagramDistance(num_bands=40)

distance_value: float = cochleagram_calculator.compute_cochleagram_distance(signal1, signal2)

print("Cochleagram Distance:", distance_value)


Cochleagram Distance: 0.000833203125000008


In [37]:
from distancia import ChromagramDistance
# Example usage:

signal1: List[float] = [0.1 * math.sin(2 * math.pi * 440 * t / 16000) for t in range(16000)]
signal2: List[float] = [0.1 * math.sin(2 * math.pi * 445 * t / 16000) for t in range(16000)]  # Slightly different frequency

chromagram_calculator = ChromagramDistance(num_bins=12)

distance_value: float = chromagram_calculator.compute_chromagram_distance(signal1, signal2)

print("Chromagram Distance:", distance_value)


ValueError: math domain error

In [38]:
from distancia import SpectrogramDistance
# Example usage:

signal1: List[float] = [0.1 * math.sin(2 * math.pi * 440 * t / 16000) for t in range(16000)]
signal2: List[float] = [0.1 * math.sin(2 * math.pi * 445 * t / 16000) for t in range(16000)]  # Slightly different frequency

spectrogram_calculator = SpectrogramDistance(window_size=256, overlap=128)

distance_value: float = spectrogram_calculator.compute_spectrogram_distance(signal1, signal2)

print("Spectrogram Distance:", distance_value)


NameError: name 'spec1_frame' is not defined

In [39]:
from distancia import CQTDistance
# Example usage:

signal1: List[float] = [0.1 * math.sin(2 * math.pi * 440 * t / 16000) for t in range(16000)]
signal2: List[float] = [0.1 * math.sin(2 * math.pi * 445 * t / 16000) for t in range(16000)]  # Slightly different frequency

cqt_calculator = CQTDistance(num_bins=24, window_size=512)

distance_value: float = cqt_calculator.compute_cqt_distance(signal1, signal2)

print("CQT Distance:", distance_value)


NameError: name 'cqt1_frame' is not defined