<a href="https://colab.research.google.com/github/yaya-sy/speechscorer/blob/main/demo/speechscorer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installation

## Install torch

In [None]:
!pip3 uninstall torch torchaudio torchvision -y

In [None]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [None]:
!pip3 install tensorboardX

## Install speechscorer

In [None]:
!pip3 install git+https://github.com/yaya-sy/speechscorer.git

# Scoring a simple utterance

In [None]:
!sudo apt-get install subversion

In [None]:
!svn export https://github.com/jimbozhang/speechocean762/trunk/WAVE/SPEAKER0001/000010011.WAV

In [None]:
!speechscore -a 000010011.WAV

# Scoring english learners utterances

For this experiment, we will use HuBERT-Large model. For this model, we will need to download the checkpoint:

In [None]:
!wget https://dl.fbaipublicfiles.com/hubert/hubert_large_ll60k.pt

## Use the speechocean762 dataset.

In [None]:
!git clone https://github.com/jimbozhang/speechocean762.git

In [None]:
from shutil import copy
from pathlib import Path

wav_scp = "speechocean762/test/wav.scp"
output_folder = Path("test_wavs")
output_folder.mkdir(exist_ok=True, parents=True)
with open(wav_scp, "r") as utterance_ids:
    for line in utterance_ids:
        line = line.strip()
        _, path = line.split("\t")
        copy(f"speechocean762/{path}", "test_wavs")

## Run the scorer

In [None]:
!speechscore -h

In [None]:
!speechscore -a test_wavs/ -m hubert_large_ll60k.pt -s hubert-mlm -d longest -p facebook/hubert-large-ls960-ft --use-gpu

## Results

In [None]:
import json
import pandas as pd

dataframe = []
with open("speechocean762/test/all-info.json", "r") as json_file:
    targets = json.load(json_file)
    for utterance_id in targets:
        scores = {"utterance_id": utterance_id}
        for score in targets[utterance_id]:
            if score in {"words", "text"}:
                continue
            scores[score] = targets[utterance_id][score]
        dataframe.append(scores)

dataframe = pd.DataFrame(dataframe)
dataframe.to_csv("results/scores.csv", index=None)

In [None]:
scores = pd.read_csv("results/scores.csv")
preds = pd.read_csv("results/results.csv")
merged = preds.merge(scores, on="utterance_id")
merged = merged.drop(columns=["gender"])
merged = merged.groupby(by=["age"]).mean()

In [None]:
import matplotlib.pyplot as plt
plt.style.use("ggplot")

In [None]:
fig = plt.figure()

plt.scatter(
    x=merged["total"],
    y=merged["entropy"],
    color="lightgreen",
    ec="black",
    marker="o",
)
ax = plt.gca()

plt.title("English language learners scores as assigned by HuBERT and humans\n(Dataset: speechocean762)")
plt.ylabel("HuBERT-assignd score (entropy)")
plt.xlabel("Human-assigned score")
# ax.set_facecolor('xkcd:salmon')
plt.show()

In [None]:
fig.savefig("hubert-mlm-scorer.svg", dpi=420, format="svg")