In [None]:
from jiwer import wer
import os
import pandas as pd
import whisper
import zeno_client
import dotenv

dotenv.load_dotenv(override=True)

In [None]:
df = pd.read_csv("speech_accent_archive.csv")


In [None]:
df["data"] = "https://zenoml.s3.amazonaws.com/accents/" + df["id"]

In [None]:
client = zeno_client.ZenoClient(os.environ.get("ZENO_API_KEY"))

project = client.create_project(
    name="Transcription Test", 
    view="audio-transcription",
    description="Test of audio transcription",
    metrics=[
        zeno_client.ZenoMetric(name="avg wer", type="mean", columns=["wer"])
    ]
)

project.upload_dataset(df, id_column="id", data_column="data", label_column="label")

In [None]:
models = ["tiny", "tiny.en", "base", "base.en", "large"]

In [None]:
os.makedirs("cache", exist_ok=True)

for model in models:
    try:
        df_system = pd.read_parquet(f"cache/{model}.parquet")
    except:
        whisper_model = whisper.load_model(model)
        df_system = df[["id", "data", "label"]].copy()

        df_system["output"] = df_system["data"].apply(lambda x: whisper_model.transcribe(x)["text"])
        df_system["wer"] = df_system.apply(lambda x: wer(x["label"], x["output"]), axis=1)
        df_system.to_parquet(f"cache/{model}.parquet", index=False)
        
    project.upload_system(df_system[["id", "output", "wer"]], name=model, id_column="id", output_column="output")
