In [45]:
from datasets import load_dataset
from transformers import pipeline
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

SEED=1

## Loading the data

In [46]:
special_labels = ['yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go', 'silence']

In [47]:
data = load_dataset("train/audio", name="en-US", split='train')
data = data.train_test_split(test_size=0.2, seed=SEED)

Resolving data files:   0%|          | 0/65123 [00:00<?, ?it/s]

In [48]:
all_labels = data["train"].features["label"].names

label2id, id2label = dict(), dict()

for i, label in enumerate(all_labels):
    label2id[label] = str(i)
    id2label[str(i)] = label

special_ids = [int(label2id[label]) for label in special_labels]

In [49]:
def get_confusion_matrix(classifier, audio, true_labels, all_labels):
    predicted = classifier(audio)
    predicted_labels = [pred[0]["label"] for pred in predicted]
    conf_matrix = confusion_matrix(true_labels, predicted_labels, labels=all_labels)

    return conf_matrix

def plot_confusion_matrix(matrix, labels, model_name):
    plt.figure(figsize=(14, 7))
    sns.heatmap(matrix, annot=True, fmt="d", xticklabels=labels, yticklabels=labels)
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title(f'Confusion Matrix for model {model_name}')
    plt.show()

In [50]:
def get_true_labels(input):
    return [id2label[str(a)] for a in input["label"]]

In [None]:
def test_one(data, classifier, model_name):
    all_labels_data = data["test"]
    test_conf_matrix = get_confusion_matrix(classifier, all_labels_data["audio"], get_true_labels(all_labels_data), all_labels)
    plot_confusion_matrix(test_conf_matrix, all_labels, model_name)
    
    train_conf_matrix = get_confusion_matrix(classifier, data["train"]["audio"], get_true_labels(data["train"]), all_labels)

    accuracies_test = np.diag(test_conf_matrix) / np.sum(test_conf_matrix, axis=1)
    accuracies_train = np.diag(train_conf_matrix) / np.sum(train_conf_matrix, axis=1)

    plt.figure(figsize=(10, 6))
    plt.bar(all_labels, accuracies_test, color='skyblue', label="Test")
    plt.bar(all_labels, accuracies_train, color='red', label="Train")
    plt.xlabel('Classes')
    plt.ylabel('Accuracy')
    plt.title('Accuracy Per Class')
    plt.ylim(0, 1)
    plt.show()

    train_accuracy = np.trace(train_conf_matrix) / np.sum(train_conf_matrix)
    test_accuracy = np.trace(test_conf_matrix) / np.sum(test_conf_matrix)

    print(f'Overall model train accuracy: {train_accuracy:.2f}')
    print(f'Overall model test accuracy: {test_accuracy:.2f}')

def test_two(data, classifier, model_name):
    filtered_audio_test = []
    filtered_true_labels_test = []
    for pair in data["test"]:
        if id2label[str(pair["label"])] in special_labels:
            filtered_audio_test.append(pair["audio"])
            filtered_true_labels_test.append(id2label[str(pair["label"])])

    filtered_audio_train = []
    filtered_true_labels_train = []
    for pair in data["train"]:
        if id2label[str(pair["label"])] in special_labels:
            filtered_audio_test.append(pair["audio"])
            filtered_true_labels_test.append(id2label[str(pair["label"])])

    test_conf_matrix = get_confusion_matrix(classifier, filtered_audio_test, filtered_true_labels_test, all_labels)
    plot_confusion_matrix(test_conf_matrix, all_labels, model_name)

    train_conf_matrix = get_confusion_matrix(classifier, filtered_audio_train, filtered_true_labels_train, all_labels)

    accuracies_test = np.diag(test_conf_matrix) / np.sum(test_conf_matrix, axis=1)
    accuracies_train = np.diag(train_conf_matrix) / np.sum(train_conf_matrix, axis=1)

    selected_accuracies_test = [accuracies_test[i] for i, label in enumerate(all_labels) if label in special_labels]
    selected_accuracies_train = [accuracies_train[i] for i, label in enumerate(all_labels) if label in special_labels]
    selected_label_names = [label for label in all_labels if label in special_labels]
    plt.figure(figsize=(8, 5))
    plt.bar(all_labels, selected_accuracies_test, color='skyblue', label="Test")
    plt.bar(all_labels, selected_accuracies_train, color='red', label="Train")
    plt.xlabel('Classes')
    plt.ylabel('Accuracy')
    plt.title('Accuracy Per Selected Class')
    plt.ylim(0, 1)
    plt.show()

    train_accuracy = np.trace(train_conf_matrix) / np.sum(train_conf_matrix)
    test_accuracy = np.trace(test_conf_matrix) / np.sum(test_conf_matrix)

    print(f'Selected model train accuracy: {train_accuracy:.2f}')
    print(f'Selected model test accuracy: {test_accuracy:.2f}')

## Model Wav2Vec

In [51]:
model_name = "wojtek2288/Wav2Vec"
classifier = pipeline("audio-classification", model=model_name)

### Testing the model on all possible classes

In [None]:
test_one(data, classifier, model_name)

### Testing the model only on data with labels defined in the task

In [None]:
test_two(data, classifier, model_name)

## Model HUBERT

In [None]:
model_name = "wojtek2288/HUBERT"
classifier = pipeline("audio-classification", model=model_name)

### Testing the model on all possible classes

In [None]:
test_one(data, classifier, model_name)

### Testing the model only on data with labels defined in the task

In [None]:
test_two(data, classifier, model_name)

## Model Wav2Vec-LR

In [None]:
model_name = "wojtek2288/Wav2Vec-LR"
classifier = pipeline("audio-classification", model=model_name)

### Testing the model on all possible classes

In [None]:
test_one(data, classifier, model_name)

### Testing the model only on data with labels defined in the task

In [None]:
test_two(data, classifier, model_name)

## Model HUBERT-LR

In [None]:
model_name = "wojtek2288/HUBERT-LR"
classifier = pipeline("audio-classification", model=model_name)

### Testing the model on all possible classes

In [None]:
test_one(data, classifier, model_name)

### Testing the model only on data with labels defined in the task

In [None]:
test_two(data, classifier, model_name)

## Model Wav2Vec-Unknown

In [None]:
model_name = "wojtek2288/Wav2Vec-Unknown"
classifier = pipeline("audio-classification", model=model_name)

In [None]:
test_one(data, classifier, model_name)

## Model HUBERT-Unknown

In [None]:
model_name = "wojtek2288/HUBERT-Unknown"
classifier = pipeline("audio-classification", model=model_name)

In [None]:
test_one(data, classifier, model_name)