# Add noise to data

## Goal

The goal is to add noise to data (train, test, val) to test whether model works well.

## Data

The data in this notebook is publicly available voice recordings featuring hypernasality and control groups. In the future we hope to train our model on private patient data from Vanderbilt University Medical Center (VUMC).

In [1]:
!pip install torch
!pip install datasets
!pip install librosa
!pip install transformers

Collecting datasets
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: dill, multiprocess, datasets
Successfully installed datasets-2.18.0 dill-0.3.8 multiprocess-0.70.16


In [2]:
# import libraries
import datasets
from datasets import load_dataset, DatasetDict,  Audio
import pandas as pd
import os
import glob
import librosa
import io
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report, accuracy_score
from transformers import WhisperModel, WhisperFeatureExtractor, AdamW
import torch
import torch.nn as nn
import torch.utils.data
from torch.utils.data import Dataset, DataLoader
from datasets import load_dataset
from sklearn.metrics import f1_score, classification_report, accuracy_score

In [3]:
!pip install SoundFile
import soundfile as sf



## Load data

In [4]:
# prompt: mount google drive

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [5]:
data_path = "/content/drive/My Drive/vandy 24spring/SLP project/data/WAV_PUBLIC_SAMPLES" #set a path for the files
files = os.listdir(data_path)
print(files)

train_catalog = "/content/drive/My Drive/vandy 24spring/SLP project/data/train.csv"
test_catalog = "/content/drive/My Drive/vandy 24spring/SLP project/data/test.csv"

['CONTROLS_WAV', 'CASES_WAV']


In [6]:
train_metadata = pd.read_csv(train_catalog)
train_metadata #already has .wav_filename

Unnamed: 0,File_Name,Sampling_Rate_(Hz),Channels,Duration_(seconds),folder,hypernasality,original_text,OPENAI_Whisper_text,WAV_filename,WAV_folder
0,ACPA ted had a dog with white feet-3.mp3,44100.0,1.0,4.13,CASES,1.0,ted had a dog with white feet,Ted and a dog with white feet.,ACPA ted had a dog with white feet-3.wav,CASES_WAV
1,cdc 4 (and then go to school).mp3,44100.0,2.0,1.41,CONTROLS,0.0,and then go to school,and then go to school.,cdc 4 (and then go to school).wav,CONTROLS_WAV
2,Video 1_4 (and can I have some more material).mp3,44100.0,2.0,3.60,CONTROLS,0.0,and can I have some more material,And can I have some more material?,Video 1_4 (and can I have some more material).wav,CONTROLS_WAV
3,NEW - video 2 (three times).mp3,44100.0,2.0,1.28,CONTROLS,0.0,three times,Three times.,NEW - video 2 (three times).wav,CONTROLS_WAV
4,cdc 4 (and then he brushed his teeth).mp3,44100.0,2.0,1.52,CONTROLS,0.0,and then he brushed his teeth,And then he brushed his teeth.,cdc 4 (and then he brushed his teeth).wav,CONTROLS_WAV
...,...,...,...,...,...,...,...,...,...,...
142,video 1 (pizza bundt).mp3,44100.0,2.0,1.80,CONTROLS,0.0,pizza bundt,Pizza Funt!,video 1 (pizza bundt).wav,CONTROLS_WAV
143,ACPA most boys like to play football-3.mp3,48000.0,1.0,3.31,CASES,1.0,most boys like to play football,Most boys like to play football.,ACPA most boys like to play football-3.wav,CASES_WAV
144,Facebook (take a tire).mp3,44100.0,1.0,1.75,CASES,1.0,take a tire,See you next time!,Facebook (take a tire).wav,CASES_WAV
145,Video 5_1 (feet).mp3,44100.0,2.0,1.04,CASES,1.0,feet,Peace.,Video 5_1 (feet).wav,CASES_WAV


In [7]:
test_metadata = pd.read_csv(train_catalog)

test_metadata #already has .wav_filename

Unnamed: 0,File_Name,Sampling_Rate_(Hz),Channels,Duration_(seconds),folder,hypernasality,original_text,OPENAI_Whisper_text,WAV_filename,WAV_folder
0,ACPA ted had a dog with white feet-3.mp3,44100.0,1.0,4.13,CASES,1.0,ted had a dog with white feet,Ted and a dog with white feet.,ACPA ted had a dog with white feet-3.wav,CASES_WAV
1,cdc 4 (and then go to school).mp3,44100.0,2.0,1.41,CONTROLS,0.0,and then go to school,and then go to school.,cdc 4 (and then go to school).wav,CONTROLS_WAV
2,Video 1_4 (and can I have some more material).mp3,44100.0,2.0,3.60,CONTROLS,0.0,and can I have some more material,And can I have some more material?,Video 1_4 (and can I have some more material).wav,CONTROLS_WAV
3,NEW - video 2 (three times).mp3,44100.0,2.0,1.28,CONTROLS,0.0,three times,Three times.,NEW - video 2 (three times).wav,CONTROLS_WAV
4,cdc 4 (and then he brushed his teeth).mp3,44100.0,2.0,1.52,CONTROLS,0.0,and then he brushed his teeth,And then he brushed his teeth.,cdc 4 (and then he brushed his teeth).wav,CONTROLS_WAV
...,...,...,...,...,...,...,...,...,...,...
142,video 1 (pizza bundt).mp3,44100.0,2.0,1.80,CONTROLS,0.0,pizza bundt,Pizza Funt!,video 1 (pizza bundt).wav,CONTROLS_WAV
143,ACPA most boys like to play football-3.mp3,48000.0,1.0,3.31,CASES,1.0,most boys like to play football,Most boys like to play football.,ACPA most boys like to play football-3.wav,CASES_WAV
144,Facebook (take a tire).mp3,44100.0,1.0,1.75,CASES,1.0,take a tire,See you next time!,Facebook (take a tire).wav,CASES_WAV
145,Video 5_1 (feet).mp3,44100.0,2.0,1.04,CASES,1.0,feet,Peace.,Video 5_1 (feet).wav,CASES_WAV


### Process train_df, val_df, test data

In [8]:
train_df, val_df = train_test_split(train_metadata, test_size = 0.3, random_state = 42)

#### train_df

In [9]:
train_df

Unnamed: 0,File_Name,Sampling_Rate_(Hz),Channels,Duration_(seconds),folder,hypernasality,original_text,OPENAI_Whisper_text,WAV_filename,WAV_folder
93,ACPA Santa came home since the snow fell.mp3,44100.0,1.0,3.19,CONTROLS,0.0,Santa came home since the snow fell,Santa came home since the snow fell.,ACPA Santa came home since the snow fell.wav,CONTROLS_WAV
140,cdc 5 (can I play with Jack).mp3,44100.0,2.0,1.57,CONTROLS,0.0,can I play with Jack,Can I play with Jack?,cdc 5 (can I play with Jack).wav,CONTROLS_WAV
108,cdc 6 (the polar bears are dancing).mp3,44100.0,2.0,2.32,CONTROLS,0.0,the polar bears are dancing,"Um, the polar bears are dancing.",cdc 6 (the polar bears are dancing).wav,CONTROLS_WAV
0,ACPA ted had a dog with white feet-3.mp3,44100.0,1.0,4.13,CASES,1.0,ted had a dog with white feet,Ted and a dog with white feet.,ACPA ted had a dog with white feet-3.wav,CASES_WAV
73,Video 1_4 (seesaw).mp3,44100.0,2.0,1.15,CASES,1.0,seesaw,P.S.A.,Video 1_4 (seesaw).wav,CASES_WAV
...,...,...,...,...,...,...,...,...,...,...
71,Video 4_4 (well it will help me).mp3,44100.0,2.0,2.32,CASES,1.0,well it will help me,"Wow, em vừa học đĩa",Video 4_4 (well it will help me).wav,CASES_WAV
106,ACPA buy baby a bib.mp3,48000.0,1.0,1.92,CASES,1.0,buy baby a bib,"Hi, I'm Hayley Mim.",ACPA buy baby a bib.wav,CASES_WAV
14,Video 1_18 (pretend it stops running when the ...,44100.0,2.0,5.80,CONTROLS,0.0,pretend it stops running when the car is going,"When it stops running, when the car is going.",Video 1_18 (pretend it stops running when the ...,CONTROLS_WAV
92,Video 2_4 (daddy).mp3,44100.0,2.0,0.57,CASES,1.0,daddy,Fanny,Video 2_4 (daddy).wav,CASES_WAV


In [10]:
# train set
train_files = train_df["WAV_filename"].tolist() #extracts all the filenames from the column named "WAV_filename" of train_df and and converts them into a list

train_folder = train_df["WAV_folder"].tolist() #this line gets all the folder names from the column "WAV_folder" in train_df and converts them to a list

train_full_paths = [os.path.join(data_path,train_folder[i], train_files[i]) for i in range(0,len(train_files))]
#  iterate through each index i, combining data_path, the ith train_folder, and the ith train_files to create full paths to each audio file.

In [11]:
train_files

['ACPA Santa came home since the snow fell.wav',
 'cdc 5 (can I play with Jack).wav',
 'cdc 6 (the polar bears are dancing).wav',
 'ACPA ted had a dog with white feet-3.wav',
 'Video 1_4 (seesaw).wav',
 'ACPA Ted had a dog with white feet.wav',
 'cdc 7 (yellow).wav',
 'ACPA in the safe deposit department.wav',
 'Video 6_3 (puppy will pull a rope).wav',
 'ACPA opening and closing boxes.wav',
 'Video 1_5 (see the seesaw).wav',
 'ACPA Tom had ham and eggs for breakfast-2.wav',
 'Video 1_6 (good).wav',
 'cdc 8 (went to the park).wav',
 'video 1 (pizza bundt).wav',
 'cdc 4 (and then he ate breakfast).wav',
 'NEW - video 2 (bugs and spiders, I protect).wav',
 'ACPA do you have a brother or a sister-3.wav',
 'ACPA I like ice cream.wav',
 'Facebook (go get the cookie).wav',
 'video 1 (fire).wav',
 'video 1 (puppies).wav',
 'Video 3_2 (ted and todd).wav',
 'Video 1_14 (fill up the gas).wav',
 'ACPA we shouldn_t play in the street-3.wav',
 'ACPA my other son is a sophomore in high school.wav',
 

In [12]:
train_folder

['CONTROLS_WAV',
 'CONTROLS_WAV',
 'CONTROLS_WAV',
 'CASES_WAV',
 'CASES_WAV',
 'CONTROLS_WAV',
 'CONTROLS_WAV',
 'CASES_WAV',
 'CASES_WAV',
 'CASES_WAV',
 'CASES_WAV',
 'CASES_WAV',
 'CONTROLS_WAV',
 'CONTROLS_WAV',
 'CONTROLS_WAV',
 'CONTROLS_WAV',
 'CONTROLS_WAV',
 'CASES_WAV',
 'CASES_WAV',
 'CASES_WAV',
 'CONTROLS_WAV',
 'CONTROLS_WAV',
 'CASES_WAV',
 'CONTROLS_WAV',
 'CASES_WAV',
 'CASES_WAV',
 'CASES_WAV',
 'CASES_WAV',
 'CASES_WAV',
 'CONTROLS_WAV',
 'CASES_WAV',
 'CONTROLS_WAV',
 'CONTROLS_WAV',
 'CASES_WAV',
 'CASES_WAV',
 'CASES_WAV',
 'CASES_WAV',
 'CONTROLS_WAV',
 'CONTROLS_WAV',
 'CONTROLS_WAV',
 'CASES_WAV',
 'CONTROLS_WAV',
 'CASES_WAV',
 'CONTROLS_WAV',
 'CASES_WAV',
 'CONTROLS_WAV',
 'CASES_WAV',
 'CASES_WAV',
 'CONTROLS_WAV',
 'CONTROLS_WAV',
 'CASES_WAV',
 'CASES_WAV',
 'CONTROLS_WAV',
 'CASES_WAV',
 'CONTROLS_WAV',
 'CONTROLS_WAV',
 'CASES_WAV',
 'CONTROLS_WAV',
 'CONTROLS_WAV',
 'CASES_WAV',
 'CONTROLS_WAV',
 'CASES_WAV',
 'CASES_WAV',
 'CONTROLS_WAV',
 'CASES_WAV

In [13]:
train_labels = train_df["hypernasality"].tolist()

train_labels[0:10]

[0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0]

#### val_set

In [14]:
# val set
val_files = val_df["WAV_filename"].tolist()

val_folder = val_df["WAV_folder"].tolist()

val_full_paths = [os.path.join(data_path,val_folder[i], val_files[i]) for i in range(0,len(val_files))] #

val_labels = val_df["hypernasality"].tolist()

In [15]:
len(val_labels)

45

#### test

In [16]:
test_files = test_metadata["WAV_filename"].tolist()

test_folder = test_metadata["WAV_folder"].tolist()

test_full_paths = [os.path.join(data_path,test_folder[i], test_files[i]) for i in range(0,len(test_files))]

#test_full_paths

In [17]:
test_labels = test_metadata["hypernasality"].tolist()

### Create Pytorch datasets

In [18]:
train_audio_dataset = datasets.Dataset.from_dict({"audio": train_full_paths,
                                                  "labels":train_labels}
                                                 ).cast_column("audio", Audio(sampling_rate=16_000))

test_audio_dataset = datasets.Dataset.from_dict({"audio": test_full_paths,
                                                  "labels": test_labels}
                                                 ).cast_column("audio", Audio(sampling_rate=16_000))

val_audio_dataset = datasets.Dataset.from_dict({"audio": val_full_paths,
                                                 "labels": val_labels }
                                             ).cast_column("audio", Audio(sampling_rate=16_000))

In [19]:
train_audio_dataset

Dataset({
    features: ['audio', 'labels'],
    num_rows: 102
})

#### Listen

In [26]:
# Assuming 'train_audio_dataset' is your dataset
sample_audio = train_audio_dataset[0]['audio']['array']
sample_rate = train_audio_dataset[0]['audio']['sampling_rate']


In [27]:
from IPython.display import Audio

# Play the audio
Audio(sample_audio, rate=sample_rate)


### Define the Noise-Adding Function

In [22]:
def add_gaussian_noise(audio_path, noise_level):
    # Load the audio file
    signal, sr = librosa.load(audio_path, sr=None)

    # Generate Gaussian noise
    noise = np.random.normal(0, noise_level, signal.shape)

    # Add the noise to the original signal
    noisy_signal = signal + noise

    return noisy_signal, sr

### Practice: Define the Noise-Adding and Saving Function and Add Noise to One Sample

In [42]:
audio_path = '/content/drive/My Drive/vandy 24spring/SLP project/data/WAV_PUBLIC_SAMPLES/CONTROLS_WAV/Video 1_19 (tires).wav'
def add_gaussian_noise_and_save(audio_path, noise_level, save_dir):
    # Load the audio file
    signal, sr = librosa.load(audio_path, sr=None)

    # Generate Gaussian noise
    noise = np.random.normal(0, noise_level, signal.shape)

    # Add the noise to the original signal
    noisy_signal = signal + noise

    # Create the save directory if it doesn't exist
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # Determine the new file name and save path
    filename = os.path.basename(audio_path)
    noisy_filename = f"NOISE-{filename}"
    noisy_file_path = os.path.join(save_dir, noisy_filename)

    # Save the noisy signal
    sf.write(noisy_file_path, noisy_signal, sr)

    return noisy_file_path

noise_level= 0.01
save_dir = 'noisy_data_practice'
noisy_audio_path = add_gaussian_noise_and_save(audio_path, noise_level, save_dir)
print(f"Noisy audio file saved at: {noisy_audio_path}")


Noisy audio file saved at: noisy_data_practice/NOISE-Video 1_19 (tires).wav


In [43]:
# Play the noisy audio
Audio(noisy_audio_path)

### Define the Noise-Adding and Saving Function and Add Noise to ALL Data

In [25]:
import os
import librosa
import numpy as np
import soundfile as sf
from datasets import Dataset, Audio

# Define the function to add Gaussian noise and save the modified audio
def add_gaussian_noise_and_save(audio_path, noise_level, save_dir):
    # Load the audio file
    signal, sr = librosa.load(audio_path, sr=None)

    # Generate Gaussian noise
    noise = np.random.normal(0, noise_level, signal.shape)

    # Add the noise to the original signal
    noisy_signal = signal + noise

    # Create the save directory if it doesn't exist
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # Determine the new file name and save path
    filename = os.path.basename(audio_path)
    noisy_filename = f"NOISE-{filename}"
    noisy_file_path = os.path.join(save_dir, noisy_filename)

    # Save the noisy signal
    sf.write(noisy_file_path, noisy_signal, sr)

    return noisy_file_path

# Function to process a dataset and add Gaussian noise to each audio file
def process_dataset_and_add_noise(dataset, noise_level, save_dir):
    new_audio_paths = []
    for audio_path in dataset['audio']:
        noisy_path = add_gaussian_noise_and_save(audio_path['path'], noise_level, save_dir)
        new_audio_paths.append(noisy_path)

    # Update the dataset with new audio paths
    new_dataset = dataset.map(lambda x, idx: {"audio": {"path": new_audio_paths[idx]}}, with_indices=True)
    return new_dataset

# Example usage
noise_level = 0.01  # Adjust the noise level as needed
save_dir = "noisy_data"

# Assuming train_audio_dataset, test_audio_dataset, val_audio_dataset are defined as shown
# Apply noise and save the updated datasets
train_noisy_dataset = process_dataset_and_add_noise(train_audio_dataset, noise_level, os.path.join(save_dir, 'train'))
test_noisy_dataset = process_dataset_and_add_noise(test_audio_dataset, noise_level, os.path.join(save_dir, 'test'))
val_noisy_dataset = process_dataset_and_add_noise(val_audio_dataset, noise_level, os.path.join(save_dir, 'val'))


Map:   0%|          | 0/102 [00:00<?, ? examples/s]

Map:   0%|          | 0/147 [00:00<?, ? examples/s]

Map:   0%|          | 0/45 [00:00<?, ? examples/s]

### Download to local

In [33]:
!zip -r noisy_data.zip noisy_data/

  adding: noisy_data/ (stored 0%)
  adding: noisy_data/test/ (stored 0%)
  adding: noisy_data/test/NOISE-NEW - video 7 (pamper).wav (deflated 13%)
  adding: noisy_data/test/NOISE-Video 1_3 (its all of our birthdays).wav (deflated 14%)
  adding: noisy_data/test/NOISE-NEW - video 2 (they just sit on the table and I say hi to them).wav (deflated 18%)
  adding: noisy_data/test/NOISE-NEW - video 2 (play some games).wav (deflated 21%)
  adding: noisy_data/test/NOISE-video 1 (stop).wav (deflated 15%)
  adding: noisy_data/test/NOISE-ACPA cookies are good to eat-2.wav (deflated 16%)
  adding: noisy_data/test/NOISE-NEW - video 2 (bugs and spiders, I protect).wav (deflated 19%)
  adding: noisy_data/test/NOISE-Facebook (pick up the books).wav (deflated 11%)
  adding: noisy_data/test/NOISE-cdc 5 (can I play with Jack).wav (deflated 15%)
  adding: noisy_data/test/NOISE-Video 2_9 (she went shopping).wav (deflated 10%)
  adding: noisy_data/test/NOISE-ACPA whatever they need.wav (deflated 17%)
  adding

In [34]:
from google.colab import files
files.download('noisy_data.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>