In [30]:
from transformers import T5ForConditionalGeneration, T5Tokenizer
from flask import request, jsonify
from pytesseract import pytesseract
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
from sklearn.model_selection import train_test_split

In [31]:
# Define the path to the T5 model
model_path = r'D:\Personal\Edu\OCR-Expresso\t5_gec_model' 

In [32]:
# Load the pre-trained T5 model and tokenizer
loaded_model = T5ForConditionalGeneration.from_pretrained(model_path)
tokenizer = T5Tokenizer.from_pretrained(model_path, legacy=False)

In [33]:
def grammar_corrector(input_texts, num_return_sequences):
    # Tokenize the input texts
    batch = tokenizer(input_texts, truncation=True, padding='max_length', max_length=64, return_tensors="pt")
    
    # Generate corrected text using the loaded model
    translated = loaded_model.generate(**batch, max_length=64, num_beams=4, num_return_sequences=num_return_sequences, temperature=1.5, do_sample=True)
    
    # Decode the generated text
    tgt_texts = tokenizer.batch_decode(translated, skip_special_tokens=True)
    
    return tgt_texts

In [34]:
df = pd.read_csv('D:/Personal/Edu/Study/FYP/NLP/data_set/c4_200m_550k.csv')
df.shape

(550000, 2)

In [36]:
train_df, test_df = train_test_split(df, test_size=0.1, shuffle=False)
train_df.shape, test_df.shape

((495000, 2), (55000, 2))

In [37]:
def evaluate_model(model, tokenizer, dataset, batch_size=16):
    # Initialize lists to store predictions and ground truth labels
    all_predictions = []
    all_labels = []

    for start_idx in range(0, len(dataset), batch_size):
        end_idx = min(start_idx + batch_size, len(dataset))
        batch_examples = dataset[start_idx:end_idx]
        
        input_texts = [example['input'] for example in batch_examples]
        ground_truths = [example['output'] for example in batch_examples]

        # Generate predictions using the model
        predictions = grammar_corrector(input_texts, num_return_sequences=1)
        
        # Append predictions and ground truth to the respective lists
        all_predictions.extend(predictions)
        all_labels.extend(ground_truths)
        
        print(f"Processed batch {start_idx // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
    
    # Compute confusion matrix
    cm = confusion_matrix(all_labels, all_predictions)
    
    # Compute accuracy
    accuracy = accuracy_score(all_labels, all_predictions)
    
    # Compute precision
    precision = precision_score(all_labels, all_predictions, average='weighted', zero_division=1)
    
    # Compute recall
    recall = recall_score(all_labels, all_predictions, average='weighted', zero_division=1)
    
    # Compute F1 score
    f1 = f1_score(all_labels, all_predictions, average='weighted', zero_division=1)
    
    return cm , accuracy, precision, recall, f1

In [38]:
test_data = test_df.to_dict(orient='records')

In [None]:
conf_matrix, accuracy, precision, recall, f1 = evaluate_model(loaded_model, tokenizer, test_data)

In [45]:

print("Accuracy:", accuracy)


Accuracy: 0.76157


In [46]:
print("F1 Score:", f1)

F1 Score: 0.83636


In [47]:
print("Precision:", precision)


Precision: 0.68452


In [48]:
print("Recall:", recall)


Recall: 0.56147
