In [1]:
# assign directory
import git
from pathlib import Path
import os
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from transform import *
from transform_audio import *
from plotting import *
os.chdir(os.path.join(ROOT_DIR, "dataset-preparation"))

data_dir = os.path.join(ROOT_DIR, 'raw-data', 'ravdess', 'full-ravdess-wav-processed')
# iterate over files in that directory
file_list = [os.path.join(data_dir, filename) for filename in os.listdir(data_dir) if filename != ".DS_Store"]
file_names = os.listdir(data_dir)

In [2]:
# random (seeded) subset of files
p = 0.3 # proportion of files to include (exact)

import random
random.seed(42)
n_to_include = int(len(file_list) * p)
random_filter = [True] * n_to_include + [False] * (len(file_list) - n_to_include)
random.shuffle(random_filter)
filtered_file_list = [path for path, include in zip(file_list, random_filter) if include]
filtered_file_names = [name for name, include in zip(file_names, random_filter) if include]

In [5]:
transform_list(cwt_file, filtered_file_list, filtered_file_names, high_freq=24000, num_scales=50, compress=True)

Computing and exporting coefficients:   0%|          | 0/432 [00:00<?, ?it/s]

# Wavelet representation
Morlet continuous wavelet transform on some example files to start

In [4]:
import numpy as np
import pywt
import matplotlib.pyplot as plt
from scipy.io import wavfile

In [None]:
ravdess_df = pd.read_csv(os.path.join(ROOT_DIR, "raw-data", "ravdess", 'full-ravdess.csv'))

In [None]:
test_file_idx = 42
print(ravdess_df.iloc[test_file_idx])

rate, signal = wavfile.read(file_list[test_file_idx])

wavelet = 'cmor1.5-1.0' # complex morlet: bandwidth = 1.5, center frequency = 1.0
low_freq, high_freq = 80, 8000  # typical range for human speech
num_scales = 100  # can be adjusted for computational efficiency

frequencies = np.logspace(np.log10(low_freq), np.log10(high_freq), num_scales)
scales = pywt.frequency2scale(wavelet, frequencies / rate)
coefs, freqs = pywt.cwt(signal, scales, wavelet, 1/rate)

In [None]:
plt.figure(figsize=(10, 6))
plt.imshow(
    np.abs(coefs),
    aspect='auto',
    cmap='jet',
    extent=[0, len(signal)/rate, low_freq, high_freq]
)
plt.colorbar(label='Magnitude')
# plt.yscale('log') # log scale frequency axis
plt.ylabel('Frequency (Hz)')
plt.xlabel('Time (s)')
plt.title('CWT with Morlet Wavelet')
plt.show()

In [None]:
# generalize above logic into function

def cwt_file(filename, wavelet='cmor1.5-1.0', low_freq=80, high_freq=8000, num_scales=100, visualize=False, title='CWT with Morlet Wavelet'):
    rate, signal = wavfile.read(os.path.join(data_dir, filename)) # currently references global data_dir
    frequencies = np.logspace(np.log10(low_freq), np.log10(high_freq), num_scales)
    scales = pywt.frequency2scale(wavelet, frequencies / rate)
    coefs, freqs = pywt.cwt(signal, scales, wavelet, 1/rate)

    if visualize:
        plt.figure(figsize=(10, 6))
        plt.imshow(
            np.abs(coefs),
            aspect='auto',
            cmap='jet',
            extent=[0, len(signal)/rate, low_freq, high_freq]
        )
        plt.colorbar(label='Magnitude')
        # plt.yscale('log')
        plt.ylabel('Frequency (Hz)')
        plt.xlabel('Time (s)')
        plt.title(title)
        plt.show()

    return coefs, freqs

In [None]:
# compare male versus female

from functools import reduce

def homogenize(df, *cols):
    return df[reduce(lambda a, b: a & b, (df[col] == df[col][0] for col in cols), np.ones(df.shape[0]))]

hom_df = homogenize(ravdess_df, 'statement', 'emotion', 'intensity')
male_vs_female = hom_df.groupby('sex').first()
male_vs_female

In [None]:
for sex, filename in zip(male_vs_female.index, male_vs_female['filename']):
    cwt_file(filename, visualize=True, title=f'CWT with Morlet Wavelet: {sex.capitalize()}')

## Frequency band grouping

In [2]:
bands = freq_band_groupings(
    coefs_npz_path='/Users/benrabin/Archive/ravdess-transforms/cwt_coefs_30.npz',
    freqs_npy_path='/Users/benrabin/Archive/ravdess-transforms/cwt_freqs.npy',
    subsample_every=12,
    batch_size=12,
    debug=True
)
bands

[0, 25) ~ [25, 50): presplit
  [0, 12) ~ [12, 25): 0.01367, 0.0
  [25, 37) ~ [37, 50): 0.23203, 0.0
    [25, 31) ~ [31, 37): 0.10239, 0.0
      [25, 28) ~ [28, 31): 0.04783, 0.0
      [31, 34) ~ [34, 37): 0.06452, 0.0
        [31, 32) ~ [32, 34): 0.02836, 0.0
        [34, 35) ~ [35, 37): 0.03642, 0.0
    [37, 43) ~ [43, 50): 0.15383, 0.0
      [37, 40) ~ [40, 43): 0.04812, 0.0
      [43, 46) ~ [46, 50): 0.13174, 0.0
        [43, 44) ~ [44, 46): 0.04310, 0.0
        [46, 48) ~ [48, 50): 0.10086, 0.0
          [46, 47) ~ [47, 48): 0.04212, 0.0
          [48, 49) ~ [49, 50): 0.08897, 0.0


[(0, 25),
 (25, 31),
 (31, 34),
 (34, 37),
 (37, 43),
 (43, 46),
 (46, 48),
 (48, 49),
 (49, 50)]