In [None]:
# assign directory
import git
from pathlib import Path
import os
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from transform import *
from plotting import *
os.chdir(os.path.join(ROOT_DIR, "dataset-preparation"))

data_dir  = os.path.join(ROOT_DIR, 'raw-data', 'ravdess', 'full-ravdess-wav')
# iterate over files in that directory
file_list = [os.path.join(data_dir, filename) for filename in os.listdir(data_dir) if filename != ".DS_Store"]
file_names = os.listdir(data_dir)

## Audio Processing using Librosa and soundfile

* Detects audio files that contain clipping and removes them from the data set
* Filters out beginning and end silence 
* Normalizes loudness

In [None]:
import librosa #Need to pip install librosa 
import soundfile as sf #Need to pip install soundfile
from joblib import Parallel, delayed
import numpy as np

In [None]:
#Creates new wav files that have been processed

CLIP_THRESHOLD = 1 
OUTPUT_DIR = os.path.join(ROOT_DIR, 'raw-data', 'ravdess', 'full-ravdess-wav-processed')

def is_clipped(y, threshold = CLIP_THRESHOLD):
    return np.any(np.abs(y) >= threshold)

def normalize_peaks(y):
    return (y - np.mean(y))/ np.std(y) if np.std(y) > 0 else y

def normalize_pitch_shifting(y, sr, target_hz=150.0):
    f0, voiced_flag, _ = librosa.pyin(y, 
                                      fmin=librosa.note_to_hz('C2'), 
                                      fmax=librosa.note_to_hz('C7'))
    
    voiced_f0 = f0[voiced_flag]
    voiced_f0 = voiced_f0[~np.isnan(voiced_f0)]

    current_pitch_hz = np.median(voiced_f0)
    if current_pitch_hz <= 0:
        return y
    n_steps = 12 * np.log2(target_hz / current_pitch_hz)

    y_shifted = librosa.effects.pitch_shift(y=y, sr=sr, n_steps=n_steps)
    
    return y_shifted

def process_audio_file(filename):
    if filename.lower().endswith('wav'):
        path_in = os.path.join(data_dir, filename)
        path_out = os.path.join(OUTPUT_DIR, filename)
        
        y, sr = librosa.load(path_in, sr=None)

        if is_clipped(y):
            return

        y_trimmed, _ = librosa.effects.trim(y, top_db=20)
        
        # pitch normalization
        y_normalized = normalize_pitch_shifting(y_trimmed, sr, target_hz=150.0)

        y_loudness_normalized = normalize_peaks(y_normalized) 

        sf.write(path_out, y_loudness_normalized, sr)

try:
    os.mkdir(OUTPUT_DIR)
except FileExistsError:
    pass

total_files = len(file_names)
print(f"Starting processing for {total_files} files...")

Parallel(n_jobs=-1)(delayed(process_audio_file)(filename) for filename in tqdm(file_names, desc="Processing audio files"))

print("\nProcessing complete. All files have been handled.")
