In [4]:
from deep_learning_features_audio import *
from deep_learning_dict_api import AudioAnalysisAPI
from deep_learning_dict_datasets import Datasets
import evaluate
from evaluate import load
import pandas as pd
from pathlib import Path

In [5]:
def asr_evaluate_metric_with_model_on_commonvoice(task, dataset, model, metrics, n_test):
    test_done = 0
    errors = 0
    predictions = []
    references = []
    result = {
        "evaluation": {}
    }
    # os.walk(dataset_path)
    test_table = pd.read_table(Datasets[task][dataset]["test_file"])
    test_audio_path = Datasets[task][dataset]["path"]
    tot_sample = test_table.shape[0]

    for i, row in enumerate(test_table.iterrows()):
        print("Benchmarking: {}/{}".format(i, test_table.shape[0]))
        audiofile_path = row[1]["path"]
        wav_audiofile_path = os.path.splitext(audiofile_path)[0] + '.wav'
        reference = row[1]["sentence"]
        print("reference:{}".format(reference))
        try:
            audio_path = os.path.join(test_audio_path, "wavs", wav_audiofile_path)
            if os.path.isfile(audio_path):
                prediction = AudioAnalysisAPI[model]['function'](audiofile_path=audio_path)
                if prediction is not None and prediction != "":
                    predictions.append(prediction.lower())
                    references.append(reference.lower())
                    print("audiofile_path: {}".format(audio_path))
                    print("reference: {}".format(reference.lower()))
                    print("prediction:{}\n".format(prediction.lower()))
                    test_done += 1
            else:
                print(audio_path, " file doesn't exist")
                errors+=1
            if i == n_test:
                break
        except Exception as e:
            print(e)
            errors += 1
            pass
    for metric in metrics:
        loaded_metric = load(metric)
        # wer = load("wer")
        caluculated_metric = loaded_metric.compute(predictions=predictions, references=references)
        # wer_score = wer.compute(predictions=predictions, references=references)
        print("{}: {}".format(metric, caluculated_metric))
        # print("wer_score: {}".format(wer_score))
        result['evaluation'][metric] = caluculated_metric
    params = {"model": model,
            "dataset": dataset,
            "n_test": n_test,
            "test_done": test_done,
            "errors": errors,
            "tot_sample": tot_sample,

            }
    evaluate.save(path_or_file="./results/", **result, **params)
    return result    
    


In [6]:
task = "Automatic Speech Recognition"
dataset = "CommonVoice-FR-10.0"
models = [
    '/api/automatic_speech_recognition/asr_wav2vec2_commonvoice_fr',
    '/api/automatic_speech_recognition/asr_wav2vec2_voxpopuli_fr',
    '/api/automatic_speech_recognition/asr_crdnn_commonvoice_fr',
    ]
metrics = ["wer", "cer"]

n_test = 3

for model in models:
    print("===== Benchmark of model: {} -- dataset: {} ".format(model.split("/")[-1], dataset))
    asr_evaluate_metric_with_model_on_commonvoice(
        task=task,
        dataset=dataset,
        model=model,
        metrics=metrics,
        n_test=n_test
    )

===== Benchmark of model: asr_wav2vec2_voxpopuli_es -- dataset: CommonVoice-ES-10.0 
Benchmarking: 0/15459
reference:Habita en aguas poco profundas y rocosas.
Waveform shape: torch.Size([1, 176256]) - Sample Rate: 48000
Sample Rate: 16000
Labels: ('-', '|', 'e', 'a', 'o', 's', 'n', 'r', 'i', 'l', 'd', 'c', 't', 'u', 'p', 'm', 'b', 'q', 'y', 'g', 'v', 'h', 'ó', 'f', 'í', 'á', 'j', 'z', 'ñ', 'é', 'x', 'ú', 'k', 'w', 'ü')
Class labels: ('-', '|', 'e', 'a', 'o', 's', 'n', 'r', 'i', 'l', 'd', 'c', 't', 'u', 'p', 'm', 'b', 'q', 'y', 'g', 'v', 'h', 'ó', 'f', 'í', 'á', 'j', 'z', 'ñ', 'é', 'x', 'ú', 'k', 'w', 'ü')
audiofile_path: /storage/data_8T/datasets/audio/common-voice-corpus-10.0-2022-07-04/es/wavs/common_voice_es_19698530.wav
reference: habita en aguas poco profundas y rocosas.
prediction:abintaen aguas poco profinas quedo costes

Benchmarking: 1/15459
reference:Opera principalmente vuelos de cabotaje y regionales de carga.
Waveform shape: torch.Size([1, 329472]) - Sample Rate: 48000
Sam