In [1]:
from hmmaudio.utils import load_all_data, predict_label, train_hmm
from hmmaudio.eval import evaluate_models


# HMMAudio Demo

This notebook demonstrates how to train and evaluate Hidden Markov Models for audio emotion classification using diagonal covariance matrices.

In [2]:
# Define paths
TRAIN_DATA_PATH = "./data/train"
TEST_DATA_PATH = "./data/test"

# Load training and test data
print("Loading training data...")
train_features, train_files = load_all_data(TRAIN_DATA_PATH, limit=10,              
                include_mfcc=True,
                include_delta=True,
                include_delta2=True,
                include_energy=True,
                num_cepstral=13,
                frame_size=0.025,
                frame_stride=0.01)
print("\nLoading test data...")
test_features, test_files = load_all_data(TEST_DATA_PATH, limit=10, 
                include_mfcc=True,
                include_delta=True,
                include_delta2=True,
                include_energy=True,
                num_cepstral=13,
                frame_size=0.025,
                frame_stride=0.01)

Loading training data...


Processing Happy: 100%|██████████| 10/10 [00:00<00:00, 80.07it/s]


Happy: Loaded 10 files


Processing Sad: 100%|██████████| 10/10 [00:00<00:00, 90.76it/s]


Sad: Loaded 10 files


Processing Fear: 100%|██████████| 10/10 [00:00<00:00, 92.68it/s]


Fear: Loaded 10 files


Processing Neutral: 100%|██████████| 10/10 [00:00<00:00, 89.29it/s]


Neutral: Loaded 10 files


Processing Anger: 100%|██████████| 10/10 [00:00<00:00, 94.13it/s]


Anger: Loaded 10 files


Processing Disgust: 100%|██████████| 10/10 [00:00<00:00, 92.15it/s]


Disgust: Loaded 10 files

Loading test data...


  sample_rate, audio = wavfile.read(file_path)
Processing Happy: 100%|██████████| 10/10 [00:00<00:00, 56.97it/s]


Happy: Loaded 10 files


Processing Sad: 100%|██████████| 10/10 [00:00<00:00, 56.70it/s]


Sad: Loaded 10 files


Processing Fear: 100%|██████████| 10/10 [00:00<00:00, 57.00it/s]


Fear: Loaded 10 files


Processing Neutral: 100%|██████████| 10/10 [00:00<00:00, 57.94it/s]


Neutral: Loaded 10 files


Processing Anger: 100%|██████████| 10/10 [00:00<00:00, 54.37it/s]


Anger: Loaded 10 files


Processing Disgust: 100%|██████████| 10/10 [00:00<00:00, 53.89it/s]

Disgust: Loaded 10 files





## Train Continuous HMMs with Diagonal Covariance

We'll use diagonal covariance matrices instead of full covariance matrices for faster training.

In [3]:
# Set HMM parameters
n_states = 5
n_features = 1
for features in train_features.values():
    n_features = features[0].shape[1]  # Number of features
    break  # Only need to check the first one
max_iter = 10  # Number of Baum-Welch iterations

# Train HMMs with diagonal covariance (faster)
hmm_models = train_hmm(
    train_features, 
    n_states=n_states, 
    n_symbols=n_features,
    max_iter=max_iter,
    continuous = True,  # Use continuous HMM
    diagonal_covariance=True,  # Use diagonal covariance for speed,
)

Training HMM for Happy
Iteration 1/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.52it/s]


Iteration 2/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.74it/s]


Iteration 3/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.76it/s]


Iteration 4/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.72it/s]


Iteration 5/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.49it/s]


Iteration 6/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.25it/s]


Iteration 7/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.70it/s]


Iteration 8/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.71it/s]


Iteration 9/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.53it/s]


Iteration 10/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.70it/s]


Training HMM for Sad
Iteration 1/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.08it/s]


Iteration 2/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.21it/s]


Iteration 3/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.23it/s]


Iteration 4/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.18it/s]


Iteration 5/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.16it/s]


Iteration 6/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.19it/s]


Iteration 7/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.78it/s]


Iteration 8/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.04it/s]


Iteration 9/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.15it/s]


Iteration 10/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.10it/s]


Training HMM for Fear
Iteration 1/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.73it/s]


Iteration 2/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.13it/s]


Iteration 3/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.97it/s]


Iteration 4/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.96it/s]


Iteration 5/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.08it/s]


Iteration 6/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.11it/s]


Iteration 7/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.74it/s]


Iteration 8/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.09it/s]


Iteration 9/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.10it/s]


Iteration 10/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.03it/s]


Training HMM for Neutral
Iteration 1/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.62it/s]


Iteration 2/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.58it/s]


Iteration 3/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.46it/s]


Iteration 4/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.64it/s]


Iteration 5/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.62it/s]


Iteration 6/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.61it/s]


Iteration 7/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.64it/s]


Iteration 8/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.54it/s]


Iteration 9/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.40it/s]


Iteration 10/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.46it/s]


Training HMM for Anger
Iteration 1/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.12it/s]


Iteration 2/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.16it/s]


Iteration 3/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.18it/s]


Iteration 4/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.09it/s]


Iteration 5/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.24it/s]


Iteration 6/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.96it/s]


Iteration 7/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.14it/s]


Iteration 8/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.26it/s]


Iteration 9/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.23it/s]


Iteration 10/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 12.13it/s]


Training HMM for Disgust
Iteration 1/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.76it/s]


Iteration 2/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.80it/s]


Iteration 3/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.87it/s]


Iteration 4/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.55it/s]


Iteration 5/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.79it/s]


Iteration 6/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.65it/s]


Iteration 7/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.59it/s]


Iteration 8/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.78it/s]


Iteration 9/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.79it/s]


Iteration 10/10


Baum-Welch Training Progress: 100%|██████████| 10/10 [00:00<00:00, 11.55it/s]


## Evaluate Models

Now we'll evaluate the trained HMM models on both the training and test sets.
We'll use the new score method to calculate the log-likelihood of each sequence and normalize by sequence length.

In [4]:
# Evaluate on training set
print("\nEvaluating on training set...")
train_accuracy, train_cm, _, _, _ = evaluate_models(
    hmm_models, 
    train_features,
    normalize_by_length=True  # Normalize by sequence length to handle variable-length audio
)
print(f"Train Accuracy: {train_accuracy:.2f}")
print("Train Confusion Matrix:")
print(train_cm)

# Evaluate on test set
print("\nEvaluating on test set...")
test_accuracy, test_cm, _, _, _ = evaluate_models(
    hmm_models, 
    test_features, 
    normalize_by_length=True
)
print(f"Test Accuracy: {test_accuracy:.2f}")
print("Test Confusion Matrix:")
print(test_cm)


Evaluating on training set...
Evaluating HMMs...


Evaluating Happy: 100%|██████████| 10/10 [00:01<00:00,  5.59it/s]
Evaluating Sad: 100%|██████████| 10/10 [00:01<00:00,  5.78it/s]
Evaluating Fear: 100%|██████████| 10/10 [00:01<00:00,  5.79it/s]
Evaluating Neutral: 100%|██████████| 10/10 [00:01<00:00,  5.56it/s]
Evaluating Anger: 100%|██████████| 10/10 [00:01<00:00,  5.84it/s]
Evaluating Disgust: 100%|██████████| 10/10 [00:01<00:00,  5.66it/s]


Evaluation complete.
Train Accuracy: 0.80
Train Confusion Matrix:
[[ 5  1  0  0  4  0]
 [ 0 10  0  0  0  0]
 [ 0  0  5  1  2  2]
 [ 0  0  0 10  0  0]
 [ 0  0  0  0 10  0]
 [ 0  0  0  2  0  8]]

Evaluating on test set...
Evaluating HMMs...


Evaluating Happy: 100%|██████████| 10/10 [00:02<00:00,  4.20it/s]
Evaluating Sad: 100%|██████████| 10/10 [00:02<00:00,  4.23it/s]
Evaluating Fear: 100%|██████████| 10/10 [00:02<00:00,  4.25it/s]
Evaluating Neutral: 100%|██████████| 10/10 [00:02<00:00,  4.25it/s]
Evaluating Anger: 100%|██████████| 10/10 [00:02<00:00,  4.02it/s]
Evaluating Disgust: 100%|██████████| 10/10 [00:02<00:00,  3.98it/s]

Evaluation complete.
Test Accuracy: 0.17
Test Confusion Matrix:
[[ 0  0  0  0 10  0]
 [ 0  0  0  0 10  0]
 [ 0  0  0  0 10  0]
 [ 2  0  0  0  8  0]
 [ 0  0  0  0 10  0]
 [ 1  0  0  0  9  0]]





## Example: Classify a Single Audio Sample

Let's test the model on a single audio sample and print the scores for each emotion.

In [6]:
# Choose a sample from the test set
true_emotion = "Anger"
sample_index = 5  # Choose any sample index
sample_features = test_features[true_emotion][sample_index]

# Predict using utility function
predicted_emotion, scores = predict_label(hmm_models, sample_features)

# Print results
print(f"True emotion: {true_emotion}")
print(f"Predicted emotion: {predicted_emotion}")
print("\nScores for each emotion (log-likelihood/frame, higher is better):")

# Normalize scores by sequence length for fair comparison
normalized_scores = {emotion: score/len(sample_features) for emotion, score in scores.items()}

# Sort and print scores from highest to lowest
for emotion, score in sorted(normalized_scores.items(), key=lambda x: x[1], reverse=True):
    print(f"{emotion}: {score:8.2f}")

True emotion: Anger
Predicted emotion: Anger

Scores for each emotion (log-likelihood/frame, higher is better):
Anger:    -5.74
Happy:   -13.44
Disgust:   -15.74
Fear:   -17.78
Neutral:   -23.44
Sad:   -31.11
