In [1]:
!pip install SpeechRecognition
!pip install matplotlib
!pip install librosa
!pip install kagglehub



In [2]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("uldisvalainis/audio-emotions")

print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: /usr4/cs350rm/wnapier/.cache/kagglehub/datasets/uldisvalainis/audio-emotions/versions/1


In [3]:
import os
import librosa
import numpy as np
import json

# Base directory
emotions = ['Happy', 'Sad', 'Disgusted', 'Neutral', 'Angry', 'Suprised', 'Fearful']

# Base directory
# base_directory = "/usr4/ugrad/aditya12/.cache/kagglehub/datasets/uldisvalainis/audio-emotions/versions/1/Emotions"
base_directory = "/usr4/cs350rm/wnapier/.cache/kagglehub/datasets/uldisvalainis/audio-emotions/versions/1/Emotions"

In [14]:
import speech_recognition as sr
recognizer = sr.Recognizer()
# input a file_path to a .wav file
# returns the transcribed audio as a string
# we can use BERT like in the homework to then tokenize/make into array and analyze it
def getVectorOfWords(file_path):
    with sr.AudioFile(file_path) as source:
        audio = recognizer.record(source)
    try:
        print("Transcription:", recognizer.recognize_google(audio))
        return "" + recognizer.recognize_google(audio)
    except sr.UnknownValueError:
        print("UnknownValue")
        return None

In [5]:
import librosa.display
import matplotlib.pyplot as plt
# input a file_path to a .wav file
# returns a png of the spectogram and a filepath to it
def getSpectogram(file_path, emotion, count):
    y, sr = librosa.load(file_path, sr=None) # load in the audio file and preserve its sample rate (replace with 16,000 if needed)
    
    # Compute the spectrogram
    D = librosa.stft(y)                        # Short-Time Fourier Transform
    S_db = librosa.amplitude_to_db(abs(D), ref=np.max)  # Convert to decibel scale

    # Plot and save the spectrogram
    plt.figure(figsize=(10, 6))                # Set the figure size
    # can change the cmap to "viridis" or "plasma" for different color themes
    librosa.display.specshow(S_db, sr=sr, x_axis="time", y_axis="log", cmap="magma")  # Log frequency scale to mimic human audio perception
    plt.colorbar(format="%+2.0f dB")           # Add a colorbar
    plt.title("Spectrogram")
    plt.xlabel("Time (s)")
    plt.ylabel("Frequency (Hz)")
    plt.tight_layout()
    
    # Save the spectrogram as an image file
    output_image_path = f"./images/{emotion}{count}.png"  # TODO: figure out naming conventions for the file -- either use path or just have a counter that we pass in
    plt.savefig(output_image_path, dpi=300)    # Save as PNG with high resolution
    plt.close()                                # Close the figure to free memory
    
    return output_image_path

In [6]:
# removes all files from images folder so subsequent runs don't have weird overlaps
def clearImagesFolder():
    directory = os.getcwd() + "/images"
    for file_name in os.listdir(directory):
        file_path = os.path.join(directory, file_name)
        if os.path.isfile(file_path) and file_name.endswith('.png'):
            os.remove(file_path)
            print("" + file_path + " has been removed successfully")

In [7]:
def traverse_emotion(emotion, directory, data):
    audio_vectors = {}
    directory = directory + "/" + emotion
    
    # Traverse and process .wav files
    iterCount = 0
    for file_name in os.listdir(directory):
        # limit the number of loops so this doesn't take THAT long
        if iterCount >= 10:
            break
        file_path = os.path.join(directory, file_name)
        
        if os.path.isfile(file_path) and file_name.endswith('.wav'):
            transcription = getVectorOfWords(file_path)
            image_path = getSpectogram(file_path, emotion, iterCount)
            data.append({"Transcription": transcription, "Spectogram": image_path, "Emotion": emotion})
        iterCount += 1

In [8]:
def traverse_emotions():
    clearImagesFolder() # deletes everything from the image folder
    data = []
    for emotion in emotions:
        traverse_emotion(emotion, base_directory, data)
    df = pandas.DataFrame(data)
    return df

In [15]:
global_df = traverse_emotions()
print(global_df)

/projectnb/ds340/students/wnapier/images/Happy8.png has been removed successfully
/projectnb/ds340/students/wnapier/images/Happy9.png has been removed successfully
/projectnb/ds340/students/wnapier/images/Happy6.png has been removed successfully
/projectnb/ds340/students/wnapier/images/Happy4.png has been removed successfully
/projectnb/ds340/students/wnapier/images/Happy7.png has been removed successfully
/projectnb/ds340/students/wnapier/images/Happy2.png has been removed successfully
/projectnb/ds340/students/wnapier/images/Sad1.png has been removed successfully
/projectnb/ds340/students/wnapier/images/Happy3.png has been removed successfully
/projectnb/ds340/students/wnapier/images/Sad0.png has been removed successfully
/projectnb/ds340/students/wnapier/images/Happy5.png has been removed successfully
/projectnb/ds340/students/wnapier/images/Happy1.png has been removed successfully
/projectnb/ds340/students/wnapier/images/Happy0.png has been removed successfully
Transcription: dogs 

KeyboardInterrupt: 