In [None]:
import matplotlib.pyplot as plt

epochs = list(range(1, 11))
training_loss = [3.726500, 2.443600, 2.121000, 1.893800, 1.568600, 1.327100, 1.201800, 1.108600, 1.030000, 0.982400]
validation_loss = [2.590547, 2.388471, 2.294731, 2.243684, 1.953684, 1.864233, 1.815732, 1.844709, 1.797269, 1.770485]
cer = [0.671199, 0.659956, 0.623345, 0.643769, 0.563482, 0.532992, 0.533522, 0.534817, 0.517276, 0.514097]

plt.figure(figsize=(14, 6))

## Vẽ biểu đồ hàm mất mát

In [None]:
plt.subplot(1, 2, 1)
plt.plot(epochs, training_loss, label='Training Loss', marker='o')
plt.plot(epochs, validation_loss, label='Validation Loss', marker='o')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Model Loss')
plt.legend()
plt.grid(True)

## Vẽ biểu đồ CER

In [None]:
plt.subplot(1, 2, 2)
plt.plot(epochs, cer, label='CER', marker='o', color='r')
plt.xlabel('Epochs')
plt.ylabel('CER')
plt.title('Character Error Rate')
plt.legend()
plt.grid(True)

## Hiển thi biểu đồ

In [None]:
plt.tight_layout()
plt.show()

# Đánh giá mô hình 

In [None]:
# Cài đặt các thư viện cần thiết
# %pip install nltk
# %pip install rouge_score

In [None]:
import os
import torch
import evaluate
import pandas as pd
from PIL import Image
from dataclasses import dataclass
from torch.utils.data import Dataset
from transformers import (
    VisionEncoderDecoderModel,
    TrOCRProcessor,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments,
    default_data_collator
)
from nltk.translate.bleu_score import sentence_bleu
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#### Gọi mô hình đã tinh chỉnh

In [None]:
finetuned_model_path = "CheckPoints/SaveModel/saved_model_03"

In [None]:
@dataclass(frozen=True)
class TrainingConfig:
    BATCH_SIZE: int = 48
    EPOCHS: int = 10
    LEARNING_RATE: float = 0.00005

@dataclass(frozen=True)
class DatasetConfig:
    DATA_ROOT: str = 'scut_data'

@dataclass(frozen=True)
class ModelConfig:
    MODEL_NAME: str = 'microsoft/trocr-small-printed'

In [None]:
train_df = pd.read_fwf(
    os.path.join(DatasetConfig.DATA_ROOT, 'scut_train.txt'), header=None
)
train_df.rename(columns={0: 'file_name', 1: 'text'}, inplace=True)
test_df = pd.read_fwf(
    os.path.join(DatasetConfig.DATA_ROOT, 'scut_test.txt'), header=None
)
test_df.rename(columns={0: 'file_name', 1: 'text'}, inplace=True)

#### Tăng cường dữ liệu

In [None]:
train_transforms = transforms.Compose([
    transforms.ColorJitter(brightness=.5, hue=.3),
    transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
])

#### Mã hóa dư liêu

In [None]:
processor = TrOCRProcessor.from_pretrained(ModelConfig.MODEL_NAME)

In [None]:
class CustomOCRDataset(Dataset):
    def __init__(self, root_dir, df, processor, max_target_length=128):
        self.root_dir = root_dir
        self.df = df
        self.processor = processor
        self.max_target_length = max_target_length

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.df['file_name'][idx]
        text = self.df['text'][idx]
        image = Image.open(self.root_dir + file_name).convert('RGB')
        image = train_transforms(image)
        pixel_values = self.processor(image, return_tensors='pt').pixel_values
        labels = self.processor.tokenizer(
            text,
            padding='max_length',
            max_length=self.max_target_length
        ).input_ids
        labels = [label if label != self.processor.tokenizer.pad_token_id else -100 for label in labels]
        encoding = {"pixel_values": pixel_values.squeeze(), "labels": torch.tensor(labels)}
        return encoding

In [None]:

train_dataset = CustomOCRDataset(
    root_dir=os.path.join(DatasetConfig.DATA_ROOT, 'scut_train/'),
    df=train_df,
    processor=processor
)
valid_dataset = CustomOCRDataset(
    root_dir=os.path.join(DatasetConfig.DATA_ROOT, 'scut_test/'),
    df=test_df,
    processor=processor
)

In [None]:
print("Number of training examples:", len(train_dataset))
print("Number of validation examples:", len(valid_dataset))

### BLEU

In [None]:
# def compute_bleu(pred):
#     labels_ids = pred.label_ids
#     pred_ids = pred.predictions

#     pred_str = processor.batch_decode(pred_ids, skip_special_tokens=True)
#     labels_ids[labels_ids == -100] = processor.tokenizer.pad_token_id
#     label_str = processor.batch_decode(labels_ids, skip_special_tokens=True)

#     bleu_scores = [sentence_bleu([ref.split()], pred.split()) for ref, pred in zip(label_str, pred_str)]
#     avg_bleu = sum(bleu_scores) / len(bleu_scores)

#     return {"bleu": avg_bleu}

### ROUGE

In [None]:
# rouge_metric = evaluate.load('rouge')

# def compute_rouge(pred):
#     labels_ids = pred.label_ids
#     pred_ids = pred.predictions

#     pred_str = processor.batch_decode(pred_ids, skip_special_tokens=True)
#     labels_ids[labels_ids == -100] = processor.tokenizer.pad_token_id
#     label_str = processor.batch_decode(labels_ids, skip_special_tokens=True)

#     rouge = rouge_metric.compute(predictions=pred_str, references=label_str)

#     return rouge

### CER

In [None]:
cer_metric = evaluate.load('cer')

def compute_cer(pred):
    labels_ids = pred.label_ids
    pred_ids = pred.predictions

    pred_str = processor.batch_decode(pred_ids, skip_special_tokens=True)
    labels_ids[labels_ids == -100] = processor.tokenizer.pad_token_id
    label_str = processor.batch_decode(labels_ids, skip_special_tokens=True)

    cer = cer_metric.compute(predictions=pred_str, references=label_str)

    return {"cer": cer}

### WER

In [None]:
wer_metric = evaluate.load("wer")

def compute_wer(pred):
    labels_ids = pred.label_ids
    pred_ids = pred.predictions

    pred_str = processor.batch_decode(pred_ids, skip_special_tokens=True)
    labels_ids[labels_ids == -100] = processor.tokenizer.pad_token_id
    label_str = processor.batch_decode(labels_ids, skip_special_tokens=True)

    wer = wer_metric.compute(predictions=pred_str, references=label_str)

    return {"wer": wer}

In [None]:
training_args = Seq2SeqTrainingArguments(
    predict_with_generate=True,
    evaluation_strategy='epoch',
    per_device_train_batch_size=TrainingConfig.BATCH_SIZE,
    per_device_eval_batch_size=TrainingConfig.BATCH_SIZE,
    fp16=True,
    output_dir='CheckPoints/Seq2seq/seq2seq_model_printed_03/',
    logging_strategy='epoch',
    save_strategy='epoch',
    save_total_limit=5,
    report_to='tensorboard',
    num_train_epochs=TrainingConfig.EPOCHS
)

In [None]:
def combined_metrics(pred):
    metrics = compute_cer(pred)
    metrics.update(compute_wer(pred))
    # metrics.update(compute_bleu(pred))
    # metrics.update(compute_rouge(pred))
    return metrics

In [None]:
pretrained_model_path = "microsoft/trocr-small-printed"
pretrained_model = VisionEncoderDecoderModel.from_pretrained(pretrained_model_path).to(device)

In [None]:
processor = TrOCRProcessor.from_pretrained(pretrained_model_path)

In [None]:
pretrained_model.config.decoder_start_token_id = processor.tokenizer.cls_token_id
pretrained_model.config.pad_token_id = processor.tokenizer.pad_token_id if processor.tokenizer.pad_token_id is not None else -100

In [None]:
training_args = Seq2SeqTrainingArguments(
    predict_with_generate=True,
    evaluation_strategy='epoch',
    per_device_train_batch_size=TrainingConfig.BATCH_SIZE,
    per_device_eval_batch_size=TrainingConfig.BATCH_SIZE,
    fp16=True,
    output_dir='CheckPoints/Seq2seq/seq2seq_model_printed_03/',
    logging_strategy='epoch',
    save_strategy='epoch',
    save_total_limit=5,
    report_to='tensorboard',
    num_train_epochs=TrainingConfig.EPOCHS
)

In [None]:
pretrained_trainer = Seq2SeqTrainer(
    model=pretrained_model,
    tokenizer=processor.feature_extractor,
    args=training_args,
    compute_metrics=combined_metrics,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    data_collator=default_data_collator
)

In [None]:
pretrained_eval_results = pretrained_trainer.evaluate()
print("Original Model Evaluation Results:", pretrained_eval_results)

In [None]:
finetuned_model = VisionEncoderDecoderModel.from_pretrained(finetuned_model_path).to(device)
finetuned_processor = TrOCRProcessor.from_pretrained(finetuned_model_path)

In [None]:
finetuned_trainer = Seq2SeqTrainer(
    model=finetuned_model,
    tokenizer=finetuned_processor.feature_extractor,
    args=training_args,
    compute_metrics=combined_metrics,
    eval_dataset=valid_dataset,
    data_collator=default_data_collator
)

In [None]:
finetuned_eval_results = finetuned_trainer.evaluate()
print("Finetuned Model Evaluation Results:", finetuned_eval_results)

In [None]:
metrics = ["wer", "cer"]
pretrained_scores = [pretrained_eval_results[f"eval_{metric}"] for metric in metrics]
finetuned_scores = [finetuned_eval_results[f"eval_{metric}"] for metric in metrics]

In [None]:
fig, ax = plt.subplots(figsize=(6, 6))

bar_width = 0.2
index = range(len(metrics))

bar1 = ax.bar(index, pretrained_scores, bar_width, label='Pretrained Model')
bar2 = ax.bar([i + bar_width for i in index], finetuned_scores, bar_width, label='Finetuned Model')

ax.set_xlabel('Metrics')
ax.set_ylabel('Scores')
ax.set_title('Graphs evaluate pre-trained and fine-tuned model results')
ax.set_xticks([i + bar_width / 2 for i in index])
ax.set_xticklabels(metrics)
ax.legend()

plt.show()