In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import librosa.display
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Set up plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

In [3]:
def explore_dataset_structure(dataset_path):
    """
    Explore and understand the TESS dataset organization
    
    Args:
        dataset_path: Path to the TESS dataset folder
        
    Learning Point:
        - Understanding how data is organized is the FIRST step
        - This helps us write code to load all files systematically
    """
    print("="*70)
    print("DATASET STRUCTURE EXPLORATION")
    print("="*70)
    
    # Get all subdirectories (each represents actress + emotion combination)
    folders = [f for f in os.listdir(dataset_path) 
               if os.path.isdir(os.path.join(dataset_path, f))]
    
    print(f"\nüìÅ Total folders found: {len(folders)}")
    print(f"\nFolder names (Actress_Emotion format):")
    for folder in sorted(folders):
        # Count files in each folder
        folder_path = os.path.join(dataset_path, folder)
        wav_files = [f for f in os.listdir(folder_path) if f.endswith('.wav')]
        print(f"  - {folder}: {len(wav_files)} files")
    
    # Extract unique actresses and emotions
    actresses = set([f.split('_')[0] for f in folders])
    emotions = set([f.split('_')[1] for f in folders])
    
    print(f"\nüë§ Actresses: {sorted(actresses)}")
    print(f"üòä Emotions: {sorted(emotions)}")
    
    return folders, sorted(actresses), sorted(emotions)

def create_dataset_dataframe(dataset_path):
    """
    Create a pandas DataFrame with all audio file information
    
    Learning Point:
        - DataFrames help organize metadata
        - Makes it easy to analyze distribution and select files
    
    Returns:
        DataFrame with columns: file_path, actress, emotion, word
    """
    print("\n" + "="*70)
    print("CREATING DATASET INVENTORY")
    print("="*70)
    
    data = []
    
    # Iterate through all folders
    for folder in os.listdir(dataset_path):
        folder_path = os.path.join(dataset_path, folder)
        
        if not os.path.isdir(folder_path):
            continue
            
        # Parse folder name: e.g., "OAF_angry" -> actress=OAF, emotion=angry
        parts = folder.split('_')
        actress = parts[0]
        emotion = parts[1]
        
        # Get all WAV files in this folder
        for file in os.listdir(folder_path):
            if file.endswith('.wav'):
                # Example filename: "OAF_angry_back.wav"
                # Extract the word (last part before .wav)
                word = file.replace('.wav', '').split('_')[-1]
                
                data.append({
                    'file_path': os.path.join(folder_path, file),
                    'actress': actress,
                    'emotion': emotion,
                    'word': word,
                    'filename': file
                })
    
    df = pd.DataFrame(data)
    
    print(f"\n‚úÖ Dataset DataFrame created!")
    print(f"   Total audio files: {len(df)}")
    print(f"\nFirst few entries:")
    print(df.head())
    
    # Show distribution
    print(f"\nüìä Distribution by Emotion:")
    print(df['emotion'].value_counts().sort_index())
    
    print(f"\nüìä Distribution by Actress:")
    print(df['actress'].value_counts())
    
    return df

In [4]:
def load_and_analyze_audio(file_path):
    """
    Load an audio file and extract basic information
    
    Learning Points:
        - Audio is stored as an array of amplitude values
        - Sample rate determines time resolution
        - Duration = len(audio) / sample_rate
    
    Args:
        file_path: Path to WAV file
        
    Returns:
        audio: numpy array of audio samples
        sr: sample rate (samples per second)
    """
    # Load audio file
    # sr=None means keep original sample rate
    audio, sr = librosa.load(file_path, sr=None)
    
    # Calculate basic properties
    duration = len(audio) / sr
    
    print(f"\nüéµ Audio File Analysis:")
    print(f"   File: {os.path.basename(file_path)}")
    print(f"   Sample Rate: {sr} Hz (samples per second)")
    print(f"   Total Samples: {len(audio)}")
    print(f"   Duration: {duration:.2f} seconds")
    print(f"   Amplitude Range: [{audio.min():.4f}, {audio.max():.4f}]")
    print(f"   Shape: {audio.shape}")
    
    return audio, sr


def visualize_audio_waveform(audio, sr, emotion, word):
    """
    Visualize the audio waveform (time domain representation)
    
    Learning Point:
        - Waveform shows amplitude over time
        - X-axis: time, Y-axis: amplitude
        - You can SEE differences in emotions!
    """
    plt.figure(figsize=(14, 4))
    
    # Create time axis
    time = np.linspace(0, len(audio)/sr, len(audio))
    
    plt.plot(time, audio, linewidth=0.5)
    plt.xlabel('Time (seconds)', fontsize=12)
    plt.ylabel('Amplitude', fontsize=12)
    plt.title(f'Waveform: {emotion.upper()} - "{word}"', fontsize=14, fontweight='bold')
    plt.grid(True, alpha=0.3)
    
    # Add some annotations
    plt.axhline(y=0, color='r', linestyle='--', alpha=0.3, linewidth=1)
    
    plt.tight_layout()
    plt.show()
    
    print("\nüí° What you're seeing:")
    print("   - The wave shows how sound pressure changes over time")
    print("   - Higher peaks = louder moments")
    print("   - Rapid oscillations = higher pitch")
    print("   - Notice how different emotions have different patterns!")


def visualize_spectrogram(audio, sr, emotion, word):
    """
    Visualize the spectrogram (time-frequency representation)
    
    Learning Point:
        - Shows WHICH frequencies are present at WHICH times
        - Y-axis: frequency (pitch), X-axis: time, Color: intensity
        - This is what we'll feed to neural networks!
    """
    plt.figure(figsize=(14, 5))
    
    # Compute spectrogram
    D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
    
    # Display
    librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz', cmap='viridis')
    plt.colorbar(format='%+2.0f dB', label='Intensity (dB)')
    plt.title(f'Spectrogram: {emotion.upper()} - "{word}"', fontsize=14, fontweight='bold')
    plt.xlabel('Time (seconds)', fontsize=12)
    plt.ylabel('Frequency (Hz)', fontsize=12)
    
    plt.tight_layout()
    plt.show()
    
    print("\nüí° What you're seeing:")
    print("   - Bright areas = strong frequencies at that time")
    print("   - Horizontal bands = sustained pitches")
    print("   - Different emotions create different patterns!")
    print("   - Neural networks will learn to recognize these patterns")

In [5]:
def compare_emotions_waveforms(df, emotions_to_compare, word="back"):
    """
    Compare waveforms of different emotions for the same word
    
    Learning Point:
        - Visual comparison helps understand emotion differences
        - Same word, different emotion = different audio patterns
    """
    fig, axes = plt.subplots(len(emotions_to_compare), 1, figsize=(14, 3*len(emotions_to_compare)))
    
    if len(emotions_to_compare) == 1:
        axes = [axes]
    
    for idx, emotion in enumerate(emotions_to_compare):
        # Find a file with this emotion and word
        sample = df[(df['emotion'] == emotion) & (df['word'] == word)]
        
        if len(sample) > 0:
            file_path = sample.iloc[0]['file_path']
            audio, sr = librosa.load(file_path, sr=None)
            
            time = np.linspace(0, len(audio)/sr, len(audio))
            
            axes[idx].plot(time, audio, linewidth=0.5)
            axes[idx].set_title(f'{emotion.upper()} - "{word}"', fontsize=12, fontweight='bold')
            axes[idx].set_xlabel('Time (seconds)')
            axes[idx].set_ylabel('Amplitude')
            axes[idx].grid(True, alpha=0.3)
            axes[idx].axhline(y=0, color='r', linestyle='--', alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print("\nüí° Notice the differences:")
    print("   - Energy levels (amplitude)")
    print("   - Duration variations")
    print("   - Pattern complexity")


In [7]:
print("\n" + "üéì"*35)
print("TESS EMOTION RECOGNITION - LEARNING JOURNEY BEGINS!")
print("üéì"*35)
    
    # STEP 1: SET YOUR DATASET PATH
print("\nüìå IMPORTANT: Update the dataset_path variable below!")
dataset_path = "/home/umairimran/OLD DISK/Univeristy/7th Semester/Intro to NLP/nlp_lab/data/TESS-data"
    
    # Example: dataset_path = "/content/TESS" or "C:/Users/YourName/TESS"
    
    # Check if path exists
if not os.path.exists(dataset_path):
    print(f"\n‚ùå Dataset path not found: {dataset_path}")
    print("Please download the dataset and update the path above.")
else:
        # STEP 2: Explore dataset structure
    folders, actresses, emotions = explore_dataset_structure(dataset_path)
        
        # STEP 3: Create organized DataFrame
    df = create_dataset_dataframe(dataset_path)
        
        # STEP 4: Analyze a single audio file
    sample_file = df.iloc[0]['file_path']
    sample_emotion = df.iloc[0]['emotion']
    sample_word = df.iloc[0]['word']
        
    print("\n" + "="*70)
    print("ANALYZING SAMPLE AUDIO FILE")
    print("="*70)
        
    audio, sr = load_and_analyze_audio(sample_file)
        
        # STEP 5: Visualize waveform
    print("\nüìä Generating Waveform Visualization...")
    visualize_audio_waveform(audio, sr, sample_emotion, sample_word)
        
        # STEP 6: Visualize spectrogram
    print("\nüìä Generating Spectrogram Visualization...")
    visualize_spectrogram(audio, sr, sample_emotion, sample_word)
        
        # STEP 7: Compare different emotions
    print("\nüìä Comparing Different Emotions...")
    emotions_to_compare = ['angry', 'happy', 'sad']  # Choose 3 emotions
    compare_emotions_waveforms(df, emotions_to_compare, word="back")
        
    print("\n" + "‚úÖ"*35)
    print("STEP 1 COMPLETE! You now understand:")
    print("  ‚úì Dataset structure")
    print("  ‚úì How audio is represented as data")
    print("  ‚úì Waveforms vs Spectrograms")
    print("  ‚úì How emotions differ in audio signals")
    print("‚úÖ"*35)
        
    print("\nüéØ NEXT STEP: Feature Extraction!")
    print("   We'll extract MFCCs, Mel spectrograms, and more!")


üéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéì
TESS EMOTION RECOGNITION - LEARNING JOURNEY BEGINS!
üéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéìüéì

üìå IMPORTANT: Update the dataset_path variable below!
DATASET STRUCTURE EXPLORATION

üìÅ Total folders found: 14

Folder names (Actress_Emotion format):
  - OAF_angry: 200 files
  - OAF_disgust: 200 files
  - OAF_fear: 200 files
  - OAF_happy: 200 files
  - OAF_neutral: 200 files
  - OAF_pleasant_surprise: 200 files
  - OAF_sad: 200 files
  - YAF_angry: 200 files
  - YAF_disgust: 200 files
  - YAF_fear: 200 files
  - YAF_happy: 200 files
  - YAF_neutral: 200 files
  - YAF_pleasant_surprised: 200 files
  - YAF_sad: 200 files

üë§ Actresses: ['OAF', 'YAF']
üòä Emotions: ['angry', 'disgust', 'fear', 'happy', 'neutral', 'pleasant', 'sad']

CREATING DATASE

AttributeError: module 'coverage.types' has no attribute 'Tracer'