In [2]:
### Benchmark f_whisper Model for English-Chinese Speech-to-Text Translation using BLEU

In [24]:
import torch
import torchaudio
import librosa
import sacrebleu
import numpy as np
from faster_whisper import WhisperModel
from datasets import load_dataset
from tqdm import tqdm

In [4]:
silence_timeout = 1.2

In [5]:
dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="test")

In [6]:
model_size="tiny"
model = WhisperModel(model_size, device="cpu", compute_type="int8")

In [27]:
predictions = []
references = []

In [8]:
def prepare_audio(batch):
    speech_array, sampling_rate = torchaudio.load(batch["path"])
    if sampling_rate != 16000:
        resampler = torchaudio.transforms.Resample(sampling_rate, 16000)
        speech_array = resampler(speech_array)
    batch["speech"] = speech_array.squeeze(0)
    return batch

In [9]:
if "path" in dataset.column_names:
    dataset = dataset.map(prepare_audio)

In [28]:
for i in tqdm(range(len(dataset)), desc="Transcribing"):
    sample = dataset[i]
    # print(sample.keys())
    # print(np.asarray(sample["speech"]))
    if "speech" in sample:
        waveform = np.asarray(sample["speech"])
        segments, info = model.transcribe(waveform, 
                                          language="en", 
                                          beam_size=3, 
                                          vad_parameters={"threshold": 0.5,                          # VAD sensitivity
                                                          "min_silence_duration_ms": silence_timeout * 1000,
                                                          "min_speech_duration_ms": 250,            # Filter short bursts
                                                          }
        )
        transcription = "".join([segment.text for segment in segments]).strip()
    
        predictions.append(transcription)
        references.append(sample.get("sentence", ""))
        # print(predictions)
        # print(references)

Transcribing:   0%|          | 1/16354 [00:01<8:52:11,  1.95s/it]

['Joel Keaton disapproved of films and buster also had reservations about the medium.']
['Joe Keaton disapproved of films, and Buster also had reservations about the medium.']


Transcribing:   0%|          | 2/16354 [00:03<7:07:15,  1.57s/it]

['Joel Keaton disapproved of films and buster also had reservations about the medium.', "She'd be alright."]
['Joe Keaton disapproved of films, and Buster also had reservations about the medium.', "She'll be all right."]


Transcribing:   0%|          | 3/16354 [00:04<6:20:21,  1.40s/it]

['Joel Keaton disapproved of films and buster also had reservations about the medium.', "She'd be alright.", '6.']
['Joe Keaton disapproved of films, and Buster also had reservations about the medium.', "She'll be all right.", 'six']


Transcribing:   0%|          | 4/16354 [00:05<6:04:18,  1.34s/it]

['Joel Keaton disapproved of films and buster also had reservations about the medium.', "She'd be alright.", '6.', 'All as well, that ends well.']
['Joe Keaton disapproved of films, and Buster also had reservations about the medium.', "She'll be all right.", 'six', "All's well that ends well."]


Transcribing:   0%|          | 5/16354 [00:07<6:38:04,  1.46s/it]

['Joel Keaton disapproved of films and buster also had reservations about the medium.', "She'd be alright.", '6.', 'All as well, that ends well.', 'It is a busy market town that serves a large surrounded area.']
['Joe Keaton disapproved of films, and Buster also had reservations about the medium.', "She'll be all right.", 'six', "All's well that ends well.", 'It is a busy market town that serves a large surrounding area.']


Transcribing:   0%|          | 6/16354 [00:09<7:54:09,  1.74s/it]

['Joel Keaton disapproved of films and buster also had reservations about the medium.', "She'd be alright.", '6.', 'All as well, that ends well.', 'It is a busy market town that serves a large surrounded area.', 'The team had all of the champion, Carol later maddened in this quads for this season.']
['Joe Keaton disapproved of films, and Buster also had reservations about the medium.', "She'll be all right.", 'six', "All's well that ends well.", 'It is a busy market town that serves a large surrounding area.', 'The team had Olympic Champion Carolina Marin in their squad for the season.']


Transcribing:   0%|          | 7/16354 [00:10<7:18:30,  1.61s/it]

['Joel Keaton disapproved of films and buster also had reservations about the medium.', "She'd be alright.", '6.', 'All as well, that ends well.', 'It is a busy market town that serves a large surrounded area.', 'The team had all of the champion, Carol later maddened in this quads for this season.', 'Do you mean it?']
['Joe Keaton disapproved of films, and Buster also had reservations about the medium.', "She'll be all right.", 'six', "All's well that ends well.", 'It is a busy market town that serves a large surrounding area.', 'The team had Olympic Champion Carolina Marin in their squad for the season.', 'Do you mean it?']


Transcribing:   0%|          | 8/16354 [00:12<7:19:40,  1.61s/it]

['Joel Keaton disapproved of films and buster also had reservations about the medium.', "She'd be alright.", '6.', 'All as well, that ends well.', 'It is a busy market town that serves a large surrounded area.', 'The team had all of the champion, Carol later maddened in this quads for this season.', 'Do you mean it?', 'The new patch is less invasive than the whole one, but still causes regression.']
['Joe Keaton disapproved of films, and Buster also had reservations about the medium.', "She'll be all right.", 'six', "All's well that ends well.", 'It is a busy market town that serves a large surrounding area.', 'The team had Olympic Champion Carolina Marin in their squad for the season.', 'Do you mean it?', 'The new patch is less invasive than the old one, but still causes regressions.']


Transcribing:   0%|          | 9/16354 [00:14<7:36:39,  1.68s/it]

['Joel Keaton disapproved of films and buster also had reservations about the medium.', "She'd be alright.", '6.', 'All as well, that ends well.', 'It is a busy market town that serves a large surrounded area.', 'The team had all of the champion, Carol later maddened in this quads for this season.', 'Do you mean it?', 'The new patch is less invasive than the whole one, but still causes regression.', 'How is most of you are going to handle and be with this? Thank you and Q.']
['Joe Keaton disapproved of films, and Buster also had reservations about the medium.', "She'll be all right.", 'six', "All's well that ends well.", 'It is a busy market town that serves a large surrounding area.', 'The team had Olympic Champion Carolina Marin in their squad for the season.', 'Do you mean it?', 'The new patch is less invasive than the old one, but still causes regressions.', 'How is Mozilla going to handle ambiguities like queue and cue?']


Transcribing:   0%|          | 10/16354 [00:15<7:01:32,  1.55s/it]

['Joel Keaton disapproved of films and buster also had reservations about the medium.', "She'd be alright.", '6.', 'All as well, that ends well.', 'It is a busy market town that serves a large surrounded area.', 'The team had all of the champion, Carol later maddened in this quads for this season.', 'Do you mean it?', 'The new patch is less invasive than the whole one, but still causes regression.', 'How is most of you are going to handle and be with this? Thank you and Q.', 'Thank you very much.']
['Joe Keaton disapproved of films, and Buster also had reservations about the medium.', "She'll be all right.", 'six', "All's well that ends well.", 'It is a busy market town that serves a large surrounding area.', 'The team had Olympic Champion Carolina Marin in their squad for the season.', 'Do you mean it?', 'The new patch is less invasive than the old one, but still causes regressions.', 'How is Mozilla going to handle ambiguities like queue and cue?', "I guess you must think I'm kinda b

Transcribing:   0%|          | 11/16354 [00:17<7:01:50,  1.55s/it]

['Joel Keaton disapproved of films and buster also had reservations about the medium.', "She'd be alright.", '6.', 'All as well, that ends well.', 'It is a busy market town that serves a large surrounded area.', 'The team had all of the champion, Carol later maddened in this quads for this season.', 'Do you mean it?', 'The new patch is less invasive than the whole one, but still causes regression.', 'How is most of you are going to handle and be with this? Thank you and Q.', 'Thank you very much.', 'No one near the remote machine you could ring.']
['Joe Keaton disapproved of films, and Buster also had reservations about the medium.', "She'll be all right.", 'six', "All's well that ends well.", 'It is a busy market town that serves a large surrounding area.', 'The team had Olympic Champion Carolina Marin in their squad for the season.', 'Do you mean it?', 'The new patch is less invasive than the old one, but still causes regressions.', 'How is Mozilla going to handle ambiguities like qu

Transcribing:   0%|          | 12/16354 [00:18<7:13:04,  1.59s/it]

['Joel Keaton disapproved of films and buster also had reservations about the medium.', "She'd be alright.", '6.', 'All as well, that ends well.', 'It is a busy market town that serves a large surrounded area.', 'The team had all of the champion, Carol later maddened in this quads for this season.', 'Do you mean it?', 'The new patch is less invasive than the whole one, but still causes regression.', 'How is most of you are going to handle and be with this? Thank you and Q.', 'Thank you very much.', 'No one near the remote machine you could ring.', 'Sauce for the goose is sauce for the gonder.']
['Joe Keaton disapproved of films, and Buster also had reservations about the medium.', "She'll be all right.", 'six', "All's well that ends well.", 'It is a busy market town that serves a large surrounding area.', 'The team had Olympic Champion Carolina Marin in their squad for the season.', 'Do you mean it?', 'The new patch is less invasive than the old one, but still causes regressions.', 'Ho

Transcribing:   0%|          | 13/16354 [00:20<7:29:58,  1.65s/it]

['Joel Keaton disapproved of films and buster also had reservations about the medium.', "She'd be alright.", '6.', 'All as well, that ends well.', 'It is a busy market town that serves a large surrounded area.', 'The team had all of the champion, Carol later maddened in this quads for this season.', 'Do you mean it?', 'The new patch is less invasive than the whole one, but still causes regression.', 'How is most of you are going to handle and be with this? Thank you and Q.', 'Thank you very much.', 'No one near the remote machine you could ring.', 'Sauce for the goose is sauce for the gonder.', 'Graphs is thought of writing songs when she was four years old.']
['Joe Keaton disapproved of films, and Buster also had reservations about the medium.', "She'll be all right.", 'six', "All's well that ends well.", 'It is a busy market town that serves a large surrounding area.', 'The team had Olympic Champion Carolina Marin in their squad for the season.', 'Do you mean it?', 'The new patch is 

Transcribing:   0%|          | 14/16354 [00:22<7:49:17,  1.72s/it]

['Joel Keaton disapproved of films and buster also had reservations about the medium.', "She'd be alright.", '6.', 'All as well, that ends well.', 'It is a busy market town that serves a large surrounded area.', 'The team had all of the champion, Carol later maddened in this quads for this season.', 'Do you mean it?', 'The new patch is less invasive than the whole one, but still causes regression.', 'How is most of you are going to handle and be with this? Thank you and Q.', 'Thank you very much.', 'No one near the remote machine you could ring.', 'Sauce for the goose is sauce for the gonder.', 'Graphs is thought of writing songs when she was four years old.', "Isn't Kirk Mike Schula was the former H coach at Alabama?"]
['Joe Keaton disapproved of films, and Buster also had reservations about the medium.', "She'll be all right.", 'six', "All's well that ends well.", 'It is a busy market town that serves a large surrounding area.', 'The team had Olympic Champion Carolina Marin in their 

Transcribing:   0%|          | 14/16354 [00:23<7:42:32,  1.70s/it]


KeyboardInterrupt: 

In [14]:
print(predictions)

[]


In [12]:
bleu = sacrebleu.corpus_bleu(predictions, [references])
print(f"\n\ud83d\udcc8 BLEU Score: {bleu.score:.5f}")

IndexError: list index out of range

In [None]:
for i in range(5):
    print(f"\nExample {i+1}:")
    print(f"Reference English: {references[i]}")
    print(f"Model Prediction: {predictions[i]}")