In [1]:
import pandas as pd
import os
import torch
from torch.utils.data import Dataset, random_split, DataLoader, Subset
import string
import re
import os
import torch.nn.utils.rnn as rnn_utils
from transformers.feature_extraction_utils import BatchFeature
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import numpy as np
!pip install python-Levenshtein
import Levenshtein
from collections import defaultdict

Collecting python-Levenshtein
  Downloading python_levenshtein-0.27.1-py3-none-any.whl.metadata (3.7 kB)
Collecting Levenshtein==0.27.1 (from python-Levenshtein)
  Downloading levenshtein-0.27.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting rapidfuzz<4.0.0,>=3.9.0 (from Levenshtein==0.27.1->python-Levenshtein)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading python_levenshtein-0.27.1-py3-none-any.whl (9.4 kB)
Downloading levenshtein-0.27.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (161 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m161.7/161.7 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m63.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collect

In [None]:
df = pd.read_csv('/kaggle/input/mlpr-data/torgo_vectors_transcripts.csv')
df['FeaturePath'] = df['FeaturePath'].str.replace("E:\\MLPR Data\\Features\\", "/kaggle/input/mlpr-data/Features/Features/")

# mask to filter out entries with 'input' and 'jpg'
mask1 = ~(df['transcipt'].str.contains('input', case=False, na=False) & 
         df['transcipt'].str.contains('jpg', case=False, na=False))

# mask to filter out entries with 'say' and 'repeatedly'
mask2 = ~(df['transcipt'].str.contains('say', case=False, na=False) & 
         df['transcipt'].str.contains('repeatedly', case=False, na=False))

mask = mask1 & mask2

df = df[mask]
speakers = df["Speaker"].unique() 
df.to_csv('mlpr-torgo-kaggle.csv', index=False)

In [None]:
class TorgoASRDataset(Dataset):
    def __init__(self, csv_file):
        self.df = pd.read_csv(csv_file)

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        feature_path = row["FeaturePath"]
        transcript = row["transcipt"]
        speaker = row["Speaker"]
    
        try:
            features = torch.load(feature_path, map_location='cpu')
        except Exception as e:
            raise RuntimeError(f"Failed to load feature from '{feature_path}': {e}")
        
        if isinstance(features, dict):
            input_values = features.get("input_values")
            if input_values is None:
                raise ValueError(f"'input_values' key not found in features loaded from {feature_path}")
        elif hasattr(features, "input_values"):
            input_values = features.input_values
        else:
            input_values = features
    
        if not isinstance(input_values, torch.Tensor):
            input_values = torch.tensor(input_values)
    
        if input_values.dim() == 3:
            input_values = input_values.squeeze(0)  
    
        seq_length = input_values.size(0)
        
        return {
            "input_values": input_values,
            "seq_length": seq_length,
            "transcript": transcript,
            "speaker": speaker
        }

In [None]:
def collate_fn(batch):
    input_values_list = []
    seq_lengths = []
    transcripts = []
    speakers = []
    
    for sample in batch:
        
        x = sample["input_values"]
        sample_seq_length = x.size(0)
        
        input_values_list.append(x)
        seq_lengths.append(sample_seq_length)
        transcripts.append(sample["transcript"])
        speakers.append(sample["speaker"])
    

    padded_inputs = torch.nn.utils.rnn.pad_sequence(input_values_list, batch_first=True, padding_value=0)
    
    
    padded_inputs = padded_inputs.contiguous()
    
    return {
        "input_values": padded_inputs, 
        "seq_lengths": torch.tensor(seq_lengths),
        "transcripts": transcripts,
        "speakers": speakers
    }


def transcript_to_indices(transcript, char_to_idx):
    return [char_to_idx[char] for char in transcript if char in char_to_idx]

In [None]:
class AttentionLayer(nn.Module):
    def __init__(self, hidden_dim):
        super(AttentionLayer, self).__init__()
        self.query = nn.Linear(hidden_dim, hidden_dim)
        self.key = nn.Linear(hidden_dim, hidden_dim)
        self.value = nn.Linear(hidden_dim, hidden_dim)
        self.scale = torch.sqrt(torch.tensor(hidden_dim, dtype=torch.float32))
        
    def forward(self, x):
        # x shape: [batch_size, seq_len, hidden_dim]
        batch_size, seq_len, hidden_dim = x.size()
        
        q = self.query(x) 
        k = self.key(x)   
        v = self.value(x)  
        
        # attention scores (scaled dot-product attention)
        scores = torch.matmul(q, k.transpose(-2, -1)) / self.scale  
        
        # softmax to get attention weights
        attn_weights = torch.softmax(scores, dim=-1) 
        
        context = torch.matmul(attn_weights, v) 
        
        # Combine with residual connection
        output = context + x  
        
        return output

class Model(nn.Module):
    def __init__(self, input_dim, hidden_dim, vocab_size, num_layers=3, dropout_rate=0.3):
        super(Model, self).__init__()
        
        self.input_projection = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.Dropout(dropout_rate),
            nn.ReLU()
        )
        
        self.lstm_layers = nn.ModuleList([
            nn.LSTM(
                input_size=hidden_dim if i==0 else hidden_dim*2,
                hidden_size=hidden_dim,
                batch_first=True,
                bidirectional=True
            ) for i in range(num_layers)
        ])
        
        self.dropouts = nn.ModuleList([
            nn.Dropout(dropout_rate) for _ in range(num_layers)
        ])
        
        self.layer_norms = nn.ModuleList([
            nn.LayerNorm(hidden_dim*2) for _ in range(num_layers)
        ])
        
        self.attention = AttentionLayer(hidden_dim*2)
        
        self.fc = nn.Sequential(
            nn.Linear(hidden_dim*2, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.Dropout(dropout_rate),
            nn.ReLU(),
            nn.Linear(hidden_dim, vocab_size)
        )
        
    def forward(self, x):
        if x.dim() == 3 and x.size(2) == 1:  
            x = x.squeeze(2)
            x = x.unsqueeze(2)
            
        batch_size, seq_len = x.size(0), x.size(1)
        

        x = self.input_projection(x)
        
        residual = None
        for i, (lstm, dropout, layer_norm) in enumerate(zip(self.lstm_layers, self.dropouts, self.layer_norms)):
            lstm_out, _ = lstm(x)
            lstm_out = dropout(lstm_out)
            
            if residual is not None and lstm_out.size() == residual.size():
                lstm_out = lstm_out + residual
                
            lstm_out = layer_norm(lstm_out)
            
            # attention after the final LSTM layer
            if i == len(self.lstm_layers) - 1:
                lstm_out = self.attention(lstm_out)
                
            residual = lstm_out
            x = lstm_out
        
        logits = self.fc(x)
        
        logits = logits.transpose(0, 1)
        
        return logits
    
    def decode(self, x, seq_lengths):
        """Generate text predictions for the input batch"""
        logits = self.forward(x) 
        

        predictions = torch.argmax(logits, dim=2)  
        predictions = predictions.transpose(0, 1)  
        
        return predictions



In [None]:
def calculate_cer(reference, prediction):
    distance = Levenshtein.distance(reference, prediction)
    return distance / max(len(reference), 1)

def calculate_wer(reference, prediction):
    ref_words = reference.split()
    pred_words = prediction.split()
    distance = Levenshtein.distance(ref_words, pred_words)
    return distance / max(len(ref_words), 1)

def trainModel(model, train_loader, val_loader, char_to_idx, num_epochs=10, learning_rate=1e-4, patience=3, min_delta=0.001):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    model.to(device)
    ctc_loss = nn.CTCLoss(blank=0, zero_infinity=True)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2)

    best_val_loss = float('inf')
    epochs_without_improvement = 0
    best_model_state = None
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        train_pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]")
        
        for batch in train_pbar:
            inputs = batch["input_values"].to(device)
            input_lengths = batch["seq_lengths"].to(device)
            
            targets_list = [torch.tensor(transcript_to_indices(t, char_to_idx), dtype=torch.long)
                            for t in batch["transcripts"]]
            targets_list = [t if len(t) > 0 else torch.tensor([0], dtype=torch.long) for t in targets_list]
            
            targets = torch.cat(targets_list).to(device)
            target_lengths = torch.tensor([len(t) for t in targets_list], dtype=torch.long).to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            log_probs = torch.nn.functional.log_softmax(outputs, dim=2)
            loss = ctc_loss(log_probs, targets, input_lengths, target_lengths)
            loss.backward()
            
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
            
            optimizer.step()
            running_loss += loss.item()
            train_pbar.set_postfix({"loss": f"{loss.item():.4f}"})
            
        avg_train_loss = running_loss / len(train_loader)
        
        model.eval()
        val_loss = 0.0
        val_pbar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Val]")
        
        with torch.no_grad():
            for batch in val_pbar:
                inputs = batch["input_values"].to(device)
                input_lengths = batch["seq_lengths"].to(device)
                
                targets_list = [torch.tensor(transcript_to_indices(t, char_to_idx), dtype=torch.long)
                                for t in batch["transcripts"]]
                targets_list = [t if len(t) > 0 else torch.tensor([0], dtype=torch.long) for t in targets_list]
                targets = torch.cat(targets_list).to(device)
                target_lengths = torch.tensor([len(t) for t in targets_list], dtype=torch.long).to(device)
                
                outputs = model(inputs)
                log_probs = torch.nn.functional.log_softmax(outputs, dim=2)
                loss = ctc_loss(log_probs, targets, input_lengths, target_lengths)
                val_loss += loss.item()
                val_pbar.set_postfix({"val_loss": f"{loss.item():.4f}"})
                
        avg_val_loss = val_loss / len(val_loader)
        print(f"Epoch [{epoch+1}/{num_epochs}] Training Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}")
        
        scheduler.step(avg_val_loss)
        current_lr = optimizer.param_groups[0]['lr']
        print(f"Current learning rate: {current_lr:.6f}")
        
        if avg_val_loss < best_val_loss - min_delta:
            best_val_loss = avg_val_loss
            epochs_without_improvement = 0
            best_model_state = model.state_dict().copy()
            print(f"New best validation loss: {best_val_loss:.4f}")
        else:
            epochs_without_improvement += 1
            print(f"No improvement for {epochs_without_improvement} epochs")
            
        if epochs_without_improvement >= patience:
            print(f"Early stopping after {epoch+1} epochs")
            model.load_state_dict(best_model_state)
            break
    
    if best_model_state is not None and epochs_without_improvement < patience:
        model.load_state_dict(best_model_state)
        
    print("Training complete.")
    return best_val_loss


def evaluateModel(model, test_loader, char_to_idx, idx_to_char, output_csv="evaluation_results.csv"):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    ctc_loss = nn.CTCLoss(blank=0, zero_infinity=True)
    test_loss = 0.0
    all_predictions = []
    all_transcripts = []
    all_speakers = []
    
    with torch.no_grad():
        test_pbar = tqdm(test_loader, desc="Evaluating")
        for batch in test_pbar:
            inputs = batch["input_values"].to(device)
            input_lengths = batch["seq_lengths"].to(device)
            transcripts = batch["transcripts"]
            speakers = batch["speakers"]
            
            targets_list = [torch.tensor(transcript_to_indices(t, char_to_idx), dtype=torch.long)
                            for t in transcripts]
            targets_list = [t if len(t) > 0 else torch.tensor([0], dtype=torch.long) for t in targets_list]
            targets = torch.cat(targets_list).to(device)
            target_lengths = torch.tensor([len(t) for t in targets_list], dtype=torch.long).to(device)
            
            outputs = model(inputs)
            log_probs = torch.nn.functional.log_softmax(outputs, dim=2)
            loss = ctc_loss(log_probs, targets, input_lengths, target_lengths)
            test_loss += loss.item()
            
            predictions = torch.argmax(outputs, dim=2).transpose(0, 1)  # [batch, time]
            
            batch_texts = []
            for pred in predictions:
                pred_collapsed = []
                prev = None
                for p in pred:
                    if p.item() != prev:
                        pred_collapsed.append(p.item())
                        prev = p.item()
                
                text = ''.join([idx_to_char.get(p, '') for p in pred_collapsed if p > 0])
                batch_texts.append(text)
            
            all_predictions.extend(batch_texts)
            all_transcripts.extend(transcripts)
            all_speakers.extend(speakers)
            
            test_pbar.set_postfix({"loss": f"{loss.item():.4f}"})
            
        avg_test_loss = test_loss / len(test_loader)
        print(f"Final Test Loss: {avg_test_loss:.4f}")
        
        # overall WER and CER
        total_cer = 0.0
        total_wer = 0.0
        for ref, pred in zip(all_transcripts, all_predictions):
            total_cer += calculate_cer(ref, pred)
            total_wer += calculate_wer(ref, pred)
        
        avg_cer = total_cer / len(all_predictions)
        avg_wer = total_wer / len(all_predictions)
        print(f"Overall Character Error Rate: {avg_cer:.4f}")
        print(f"Overall Word Error Rate: {avg_wer:.4f}")
        
        # per-speaker metrics
        speaker_predictions = defaultdict(list)
        speaker_references = defaultdict(list)
        
        for speaker, ref, pred in zip(all_speakers, all_transcripts, all_predictions):
            speaker_predictions[speaker].append(pred)
            speaker_references[speaker].append(ref)
        
        print("\nPer-Speaker Metrics:")
        for speaker in sorted(speaker_predictions.keys()):
            preds = speaker_predictions[speaker]
            refs = speaker_references[speaker]
            
            speaker_cer = sum(calculate_cer(r, p) for r, p in zip(refs, preds)) / len(preds)
            speaker_wer = sum(calculate_wer(r, p) for r, p in zip(refs, preds)) / len(preds)
            
            print(f"Speaker {speaker} (samples: {len(preds)})")
            print(f"  - Character Error Rate: {speaker_cer:.4f}")
            print(f"  - Word Error Rate: {speaker_wer:.4f}")
        
        for i in range(min(15, len(all_predictions))):
            print(f"Example {i+1} (Speaker: {all_speakers[i]}):\nReference: '{all_transcripts[i]}'\nPrediction: '{all_predictions[i]}'")
        
        # Save results to CSV file
        results_df = pd.DataFrame({
            'speaker': all_speakers,
            'reference': all_transcripts,
            'prediction': all_predictions,
            'cer': [calculate_cer(ref, pred) for ref, pred in zip(all_transcripts, all_predictions)],
            'wer': [calculate_wer(ref, pred) for ref, pred in zip(all_transcripts, all_predictions)]
        })
        
        results_df.to_csv(output_csv, index=False)
        print(f"\nEvaluation results saved to {output_csv}")
        
        return avg_test_loss, all_predictions, all_transcripts

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vocab = "abcdefghijklmnopqrstuvwxyz " #output chars
char_to_idx = {char: i+1 for i, char in enumerate(vocab)}
idx_to_char = {i+1: char for i, char in enumerate(vocab)}
vocab_size = len(vocab) + 1


csv_file = "/kaggle/working/mlpr-torgo-kaggle.csv"
full_dataset = TorgoASRDataset(csv_file)

train_idx, test_idx = train_test_split(
    range(len(full_dataset)), 
    test_size=0.2, 
    random_state=42 
)


train_idx, val_idx = train_test_split(
    train_idx, 
    test_size=0.25,  
    random_state=42
)

train_dataset = Subset(full_dataset, train_idx)
val_dataset = Subset(full_dataset, val_idx)
test_dataset = Subset(full_dataset, test_idx)


train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, collate_fn=collate_fn)

input_dim = 1024
hidden_dim = 256
vocab_size = len(vocab) + 1

model = Model(input_dim=input_dim, hidden_dim=hidden_dim, vocab_size=vocab_size, num_layers=3, dropout_rate=0.3)
ckpt = torch.load("/kaggle/input/libri-pretrained-model/libri_asr_model.pt", map_location=device)
model.load_state_dict(ckpt["model_state_dict"])
model.to(device)

best_val_loss = trainModel(
    model, 
    train_loader, 
    val_loader, 
    char_to_idx, 
    num_epochs=30, 
    learning_rate=5e-4, 
    patience=5, 
    min_delta=0.01
)

print(f"Best validation loss: {best_val_loss:.4f}")
print("Evaluating best model on test set:")
test_loss, predictions, references = evaluateModel(model, test_loader, char_to_idx, idx_to_char)

Using device: cuda


Epoch 1/30 [Train]: 100%|██████████| 81/81 [01:29<00:00,  1.11s/it, loss=1.8415]
Epoch 1/30 [Val]: 100%|██████████| 27/27 [00:23<00:00,  1.13it/s, val_loss=1.2794]


Epoch [1/30] Training Loss: 1.3898, Validation Loss: 1.3069
Current learning rate: 0.000500
New best validation loss: 1.3069


Epoch 2/30 [Train]: 100%|██████████| 81/81 [00:46<00:00,  1.75it/s, loss=1.0414]
Epoch 2/30 [Val]: 100%|██████████| 27/27 [00:09<00:00,  2.82it/s, val_loss=1.2053]


Epoch [2/30] Training Loss: 1.2135, Validation Loss: 1.2833
Current learning rate: 0.000500
New best validation loss: 1.2833


Epoch 3/30 [Train]: 100%|██████████| 81/81 [00:47<00:00,  1.72it/s, loss=1.4629]
Epoch 3/30 [Val]: 100%|██████████| 27/27 [00:09<00:00,  2.78it/s, val_loss=1.1633]


Epoch [3/30] Training Loss: 1.1374, Validation Loss: 1.1832
Current learning rate: 0.000500
New best validation loss: 1.1832


Epoch 4/30 [Train]: 100%|██████████| 81/81 [00:47<00:00,  1.71it/s, loss=1.0847]
Epoch 4/30 [Val]: 100%|██████████| 27/27 [00:09<00:00,  2.80it/s, val_loss=1.1470]


Epoch [4/30] Training Loss: 1.0870, Validation Loss: 1.2149
Current learning rate: 0.000500
No improvement for 1 epochs


Epoch 5/30 [Train]: 100%|██████████| 81/81 [00:46<00:00,  1.74it/s, loss=0.7069]
Epoch 5/30 [Val]: 100%|██████████| 27/27 [00:09<00:00,  2.80it/s, val_loss=1.1511]


Epoch [5/30] Training Loss: 1.0277, Validation Loss: 1.2081
Current learning rate: 0.000500
No improvement for 2 epochs


Epoch 6/30 [Train]: 100%|██████████| 81/81 [00:47<00:00,  1.72it/s, loss=0.8190]
Epoch 6/30 [Val]: 100%|██████████| 27/27 [00:09<00:00,  2.81it/s, val_loss=1.0924]


Epoch [6/30] Training Loss: 1.0000, Validation Loss: 1.1791
Current learning rate: 0.000500
No improvement for 3 epochs


Epoch 7/30 [Train]: 100%|██████████| 81/81 [00:47<00:00,  1.72it/s, loss=0.7155]
Epoch 7/30 [Val]: 100%|██████████| 27/27 [00:09<00:00,  2.86it/s, val_loss=1.1175]


Epoch [7/30] Training Loss: 0.9585, Validation Loss: 1.1811
Current learning rate: 0.000500
No improvement for 4 epochs


Epoch 8/30 [Train]: 100%|██████████| 81/81 [00:47<00:00,  1.72it/s, loss=1.1346]
Epoch 8/30 [Val]: 100%|██████████| 27/27 [00:09<00:00,  2.83it/s, val_loss=1.0692]


Epoch [8/30] Training Loss: 0.9233, Validation Loss: 1.1411
Current learning rate: 0.000500
New best validation loss: 1.1411


Epoch 9/30 [Train]: 100%|██████████| 81/81 [00:47<00:00,  1.72it/s, loss=1.1313]
Epoch 9/30 [Val]: 100%|██████████| 27/27 [00:09<00:00,  2.86it/s, val_loss=1.1293]


Epoch [9/30] Training Loss: 0.8946, Validation Loss: 1.1777
Current learning rate: 0.000500
No improvement for 1 epochs


Epoch 10/30 [Train]: 100%|██████████| 81/81 [00:46<00:00,  1.73it/s, loss=0.8709]
Epoch 10/30 [Val]: 100%|██████████| 27/27 [00:09<00:00,  2.83it/s, val_loss=1.0920]


Epoch [10/30] Training Loss: 0.8459, Validation Loss: 1.1895
Current learning rate: 0.000500
No improvement for 2 epochs


Epoch 11/30 [Train]: 100%|██████████| 81/81 [00:46<00:00,  1.72it/s, loss=0.9172]
Epoch 11/30 [Val]: 100%|██████████| 27/27 [00:09<00:00,  2.81it/s, val_loss=1.1326]


Epoch [11/30] Training Loss: 0.8244, Validation Loss: 1.1797
Current learning rate: 0.000250
No improvement for 3 epochs


Epoch 12/30 [Train]: 100%|██████████| 81/81 [00:46<00:00,  1.74it/s, loss=0.7721]
Epoch 12/30 [Val]: 100%|██████████| 27/27 [00:09<00:00,  2.86it/s, val_loss=1.0749]


Epoch [12/30] Training Loss: 0.7404, Validation Loss: 1.1735
Current learning rate: 0.000250
No improvement for 4 epochs


Epoch 13/30 [Train]: 100%|██████████| 81/81 [00:47<00:00,  1.72it/s, loss=0.7569]
Epoch 13/30 [Val]: 100%|██████████| 27/27 [00:09<00:00,  2.87it/s, val_loss=1.0251]


Epoch [13/30] Training Loss: 0.7003, Validation Loss: 1.1316
Current learning rate: 0.000250
No improvement for 5 epochs
Early stopping after 13 epochs
Training complete.
Best validation loss: 1.1411
Evaluating best model on test set:


Evaluating: 100%|██████████| 27/27 [00:36<00:00,  1.35s/it, loss=1.0196]

Final Test Loss: 1.0385
Overall Character Error Rate: 0.2662
Overall Word Error Rate: 0.4875

Per-Speaker Metrics:
Speaker F01 (samples: 22)
  - Character Error Rate: 0.6014
  - Word Error Rate: 0.9331
Speaker F03 (samples: 128)
  - Character Error Rate: 0.4322
  - Word Error Rate: 0.7225
Speaker F04 (samples: 89)
  - Character Error Rate: 0.1840
  - Word Error Rate: 0.3938
Speaker FC01 (samples: 42)
  - Character Error Rate: 0.3069
  - Word Error Rate: 0.5347
Speaker FC02 (samples: 244)
  - Character Error Rate: 0.1776
  - Word Error Rate: 0.4225
Speaker FC03 (samples: 192)
  - Character Error Rate: 0.2677
  - Word Error Rate: 0.4635
Speaker M01 (samples: 13)
  - Character Error Rate: 0.6358
  - Word Error Rate: 1.0000
Speaker M02 (samples: 79)
  - Character Error Rate: 0.6662
  - Word Error Rate: 0.9491
Speaker M03 (samples: 96)
  - Character Error Rate: 0.1804
  - Word Error Rate: 0.3495
Speaker M04 (samples: 89)
  - Character Error Rate: 0.6250
  - Word Error Rate: 0.9287
Speaker M


