In [None]:
!python asr-1.py record myvoice.wav 30

In [None]:
!python asr-1.py play myvoice.wav 30

In [None]:
!python asr-1.py transcribe myvoice.wav

In [None]:
!python asr-1.py evaluate myvoice_transcription.txt ground-truth.txt

In [1]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# Load pre-trained spelling correction model and tokenizer
spelling_correction = 'oliverguhr/spelling-correction-english-base'
spelling_model = AutoModelForSeq2SeqLM.from_pretrained(spelling_correction)
spelling_tokenizer = AutoTokenizer.from_pretrained(spelling_correction)

In [2]:
# Load pre-trained grammar correction model and tokenizer
grammar_correction = 'vennify/t5-base-grammar-correction'
grammar_model = AutoModelForSeq2SeqLM.from_pretrained(grammar_correction)
grammar_tokenizer = AutoTokenizer.from_pretrained(grammar_correction)

In [12]:
# Function to correct spelling
def correct_spelling(input_text):
    sentences = sent_tokenize(input_text)
    corrected_sentences = []
    for sentence in sentences:
        tokens = spelling_tokenizer(sentence, return_tensors="pt", truncation=True, max_length=spelling_tokenizer.model_max_length)
        outputs = spelling_model.generate(**tokens, max_length=spelling_tokenizer.model_max_length)
        corrected_sentence = spelling_tokenizer.decode(outputs[0], skip_special_tokens=True)
        corrected_sentences.append(corrected_sentence)
    return ' '.join(corrected_sentences)

# Function to correct grammar
def correct_grammar(input_text):
    tokens = grammar_tokenizer(input_text, return_tensors="pt", truncation=True, max_length=grammar_tokenizer.model_max_length)
    outputs = grammar_model.generate(**tokens, max_length=grammar_tokenizer.model_max_length)
    return grammar_tokenizer.decode(outputs[0], skip_special_tokens=True)

# Function to correct both spelling and grammar
def correct_spelling_and_grammar(input_text):
    spelling_corrected = correct_spelling(input_text)
    return correct_grammar(spelling_corrected)

In [None]:
# Read input text from file
with open('../Data/myvoice_transcription.txt', 'r', encoding='utf-8') as file:
    input_text = file.read().strip()

# Apply corrections
spelling_corrected_text = correct_spelling(input_text)
grammar_corrected_text = correct_grammar(input_text)
combined_corrected_text = correct_spelling_and_grammar(input_text)

In [13]:
print("Original text:\n", input_text)
print("\nSpelling corrected text:\n", spelling_corrected_text)
print("\nGrammar corrected text:\n", grammar_corrected_text)
print("\nCombined spelling and grammar corrected text:\n", combined_corrected_text)

Original text:
 there don't sien to be any firm ruse above houseman demands correspond to follower accounts is the grantel taquon that the pogram is his early stage and that is still experimenting with the foremad we are continuing to test the payments as we roll up to more creatious and except the twillian san chentel flacture v

Spelling corrected text:
 There don't seem to be any firm rules above houseman demands correspond to follower accounts. is the grantel taquon that the program is his early stage, and that is still experimenting with the foremad. We are continuing to test the payments as we roll up to more creative and except the twillian san chentel flacture. .

Grammar corrected text:
 There don't seem to be any firm ruse above houseman demands correspond to follower accounts is the grantel taquon that the program is his early stage and that is still experimenting with the foremad we are continuing to test the payments as we roll up to more creative and except the twillian s

In [14]:
import numpy as np

def calculate_wer(reference, hypothesis):
    r = reference.split()
    h = hypothesis.split()
    # Building the matrix
    d = np.zeros((len(r)+1)*(len(h)+1), dtype=np.uint8)
    d = d.reshape((len(r)+1, len(h)+1))
    for i in range(len(r)+1):
        for j in range(len(h)+1):
            if i == 0: 
                d[0][j] = j
            elif j == 0: 
                d[i][0] = i
    # Calculation
    for i in range(1, len(r)+1):
        for j in range(1, len(h)+1):
            if r[i-1] == h[j-1]:
                d[i][j] = d[i-1][j-1]
            else:
                substitute = d[i-1][j-1] + 1
                insert = d[i][j-1] + 1
                delete = d[i-1][j] + 1
                d[i][j] = min(substitute, insert, delete)
    return d[len(r)][len(h)] / float(len(r))


In [15]:
def calculate_cer(reference, hypothesis):
    r = reference
    h = hypothesis
    # Building the matrix
    d = np.zeros((len(r)+1)*(len(h)+1), dtype=np.uint8)
    d = d.reshape((len(r)+1, len(h)+1))
    for i in range(len(r)+1):
        for j in range(len(h)+1):
            if i == 0:
                d[0][j] = j
            elif j == 0:
                d[i][0] = i
    # Calculation
    for i in range(1, len(r)+1):
        for j in range(1, len(h)+1):
            if r[i-1] == h[j-1]:
                d[i][j] = d[i-1][j-1]
            else:
                substitute = d[i-1][j-1] + 1
                insert = d[i][j-1] + 1
                delete = d[i-1][j] + 1
                d[i][j] = min(substitute, insert, delete)
    return d[len(r)][len(h)] / float(len(r))


In [16]:
# Read ground truth text from file
with open('../Data/ground-truth.txt', 'r', encoding='utf-8') as file:
    ground_truth_text = file.read().strip()

# Calculate WER and CER for spelling-corrected text
spelling_wer = calculate_wer(ground_truth_text, spelling_corrected_text)
spelling_cer = calculate_cer(ground_truth_text, spelling_corrected_text)

# Calculate WER and CER for grammar-corrected text
grammar_wer = calculate_wer(ground_truth_text, grammar_corrected_text)
grammar_cer = calculate_cer(ground_truth_text, grammar_corrected_text)

# Calculate WER and CER for combined corrected text
combined_wer = calculate_wer(ground_truth_text, combined_corrected_text)
combined_cer = calculate_cer(ground_truth_text, combined_corrected_text)

print("Spelling Corrected Text - Word Error Rate:", spelling_wer)
print("Spelling Corrected Text - Character Error Rate:", spelling_cer)
print("Grammar Corrected Text - Word Error Rate:", grammar_wer)
print("Grammar Corrected Text - Character Error Rate:", grammar_cer)
print("Combined Corrected Text - Word Error Rate:", combined_wer)
print("Combined Corrected Text - Character Error Rate:", combined_cer)


Spelling Corrected Text - Word Error Rate: 0.5333333333333333
Spelling Corrected Text - Character Error Rate: 0.3159340659340659
Grammar Corrected Text - Word Error Rate: 0.55
Grammar Corrected Text - Character Error Rate: 0.33516483516483514
Combined Corrected Text - Word Error Rate: 0.5333333333333333
Combined Corrected Text - Character Error Rate: 0.31868131868131866
