# üîç Test Model Accuracy
‡∏ó‡∏î‡∏™‡∏≠‡∏ö‡∏Ñ‡∏ß‡∏≤‡∏°‡πÅ‡∏°‡πà‡∏ô‡∏¢‡∏≥‡∏Ç‡∏≠‡∏á‡πÇ‡∏°‡πÄ‡∏î‡∏• OCR ‡πÄ‡∏û‡∏∑‡πà‡∏≠‡∏´‡∏≤‡∏£‡∏π‡∏õ‡∏†‡∏≤‡∏û‡∏ó‡∏µ‡πà‡πÉ‡∏´‡πâ‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå‡∏î‡∏µ‡∏ó‡∏µ‡πà‡∏™‡∏∏‡∏î

In [None]:
import os
import sys
import torch
from PIL import Image
import sentencepiece as spm
from transformers import VisionEncoderDecoderModel, TrOCRProcessor, ViTImageProcessor
from IPython.display import display, HTML
import glob

# Paths
BASE_DIR = r'e:\TrOCR_Antigravity'
MODEL_PATH = os.path.join(BASE_DIR, 'Model', 'best_model.pt')
TOKENIZER_PATH = os.path.join(BASE_DIR, 'Model_Implement', 'thai_sp_30000.model')

print(f'Model path: {MODEL_PATH}')
print(f'Tokenizer path: {TOKENIZER_PATH}')
print(f'Model exists: {os.path.exists(MODEL_PATH)}')
print(f'Tokenizer exists: {os.path.exists(TOKENIZER_PATH)}')

In [None]:
# Thai Tokenizer Class
class ThaiTokenizer:
    def __init__(self, model_path):
        self.sp = spm.SentencePieceProcessor()
        self.sp.Load(model_path)
        self.vocab_size = self.sp.GetPieceSize()
        
        # Special tokens
        self.bos_token_id = self.sp.PieceToId('<s>')
        self.eos_token_id = self.sp.PieceToId('</s>')
        self.pad_token_id = self.sp.PieceToId('<pad>') if self.sp.PieceToId('<pad>') != -1 else 0
        self.unk_token_id = self.sp.PieceToId('<unk>')
        
        print(f'Vocab size: {self.vocab_size}')
        print(f'BOS: {self.bos_token_id}, EOS: {self.eos_token_id}, PAD: {self.pad_token_id}')
    
    def encode(self, text):
        return self.sp.EncodeAsIds(text)
    
    def decode(self, ids):
        if isinstance(ids, torch.Tensor):
            ids = ids.tolist()
        # Filter special tokens
        ids = [i for i in ids if i not in [self.bos_token_id, self.eos_token_id, self.pad_token_id]]
        return self.sp.DecodeIds(ids)
    
    def batch_decode(self, batch_ids, skip_special_tokens=True):
        return [self.decode(ids) for ids in batch_ids]

# Load tokenizer
tokenizer = ThaiTokenizer(TOKENIZER_PATH)
print('Tokenizer loaded!')

In [None]:
# Load model
print('Loading model...')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Device: {device}')

# Load state dict
checkpoint = torch.load(MODEL_PATH, map_location=device, weights_only=False)

# Create model
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')

# Resize token embeddings
model.decoder.resize_token_embeddings(tokenizer.vocab_size)

# Load weights
if 'model_state_dict' in checkpoint:
    model.load_state_dict(checkpoint['model_state_dict'], strict=False)
else:
    model.load_state_dict(checkpoint, strict=False)

model.to(device)
model.eval()

# Load image processor
image_processor = ViTImageProcessor.from_pretrained('microsoft/trocr-base-handwritten')

print('Model loaded!')

In [None]:
# Prediction function
def predict(image_path):
    """‡∏ó‡∏≥‡∏ô‡∏≤‡∏¢‡∏Ç‡πâ‡∏≠‡∏Ñ‡∏ß‡∏≤‡∏°‡∏à‡∏≤‡∏Å‡∏£‡∏π‡∏õ‡∏†‡∏≤‡∏û"""
    # Load and preprocess image
    image = Image.open(image_path).convert('RGB')
    pixel_values = image_processor(image, return_tensors='pt').pixel_values.to(device)
    
    # Generate
    with torch.no_grad():
        generated_ids = model.generate(
            pixel_values,
            max_length=128,
            num_beams=4,
            decoder_start_token_id=tokenizer.bos_token_id,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    
    # Decode
    text = tokenizer.decode(generated_ids[0])
    return text

print('Prediction function ready!')

In [None]:
# Test with existing sample images
sample_dir = os.path.join(BASE_DIR, 'webapp', 'static', 'sample_images')
print(f'Sample directory: {sample_dir}')
print(f'Exists: {os.path.exists(sample_dir)}')

if os.path.exists(sample_dir):
    for img_file in os.listdir(sample_dir):
        if img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
            img_path = os.path.join(sample_dir, img_file)
            print(f'\n--- {img_file} ---')
            
            # Show image
            img = Image.open(img_path)
            display(img)
            
            # Predict
            result = predict(img_path)
            print(f'Prediction: {result}')

## üìÅ ‡∏ó‡∏î‡∏™‡∏≠‡∏ö‡∏£‡∏π‡∏õ‡∏†‡∏≤‡∏û‡∏à‡∏≤‡∏Å folder ‡∏ó‡∏µ‡πà‡∏Å‡∏≥‡∏´‡∏ô‡∏î
‡πÄ‡∏õ‡∏•‡∏µ‡πà‡∏¢‡∏ô `TEST_FOLDER` ‡πÄ‡∏õ‡πá‡∏ô path ‡∏ó‡∏µ‡πà‡∏°‡∏µ‡∏£‡∏π‡∏õ‡∏†‡∏≤‡∏û‡∏ó‡∏µ‡πà‡∏ï‡πâ‡∏≠‡∏á‡∏Å‡∏≤‡∏£‡∏ó‡∏î‡∏™‡∏≠‡∏ö

In [None]:
# ========================================
# ‡∏Å‡∏≥‡∏´‡∏ô‡∏î folder ‡∏ó‡∏µ‡πà‡∏°‡∏µ‡∏£‡∏π‡∏õ‡∏†‡∏≤‡∏û‡∏ó‡∏î‡∏™‡∏≠‡∏ö
# ========================================
TEST_FOLDER = r'e:\TrOCR_Antigravity\test_images'  # <-- ‡πÄ‡∏õ‡∏•‡∏µ‡πà‡∏¢‡∏ô‡∏ï‡∏£‡∏á‡∏ô‡∏µ‡πâ

# ‡∏™‡∏£‡πâ‡∏≤‡∏á folder ‡∏ñ‡πâ‡∏≤‡∏¢‡∏±‡∏á‡πÑ‡∏°‡πà‡∏°‡∏µ
if not os.path.exists(TEST_FOLDER):
    os.makedirs(TEST_FOLDER)
    print(f'Created folder: {TEST_FOLDER}')
    print('‡∏Å‡∏£‡∏∏‡∏ì‡∏≤‡πÉ‡∏™‡πà‡∏£‡∏π‡∏õ‡∏†‡∏≤‡∏û‡∏ó‡∏µ‡πà‡∏ï‡πâ‡∏≠‡∏á‡∏Å‡∏≤‡∏£‡∏ó‡∏î‡∏™‡∏≠‡∏ö‡πÉ‡∏ô folder ‡∏ô‡∏µ‡πâ')
else:
    print(f'Test folder: {TEST_FOLDER}')

In [None]:
# ‡∏ó‡∏î‡∏™‡∏≠‡∏ö‡∏£‡∏π‡∏õ‡∏†‡∏≤‡∏û‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î‡πÉ‡∏ô folder
results = []

image_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.webp')
image_files = [f for f in os.listdir(TEST_FOLDER) if f.lower().endswith(image_extensions)]

print(f'Found {len(image_files)} images\n')

for img_file in image_files:
    img_path = os.path.join(TEST_FOLDER, img_file)
    
    print(f'\n{"="*50}')
    print(f'File: {img_file}')
    print(f'{"="*50}')
    
    # Show image
    img = Image.open(img_path)
    # Resize for display if too large
    max_width = 600
    if img.width > max_width:
        ratio = max_width / img.width
        img_display = img.resize((max_width, int(img.height * ratio)))
    else:
        img_display = img
    display(img_display)
    
    # Predict
    try:
        result = predict(img_path)
        print(f'\nüî§ Prediction: {result}')
        results.append({'file': img_file, 'prediction': result, 'path': img_path})
    except Exception as e:
        print(f'Error: {e}')
        results.append({'file': img_file, 'prediction': f'ERROR: {e}', 'path': img_path})

## üìä ‡∏™‡∏£‡∏∏‡∏õ‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå

In [None]:
# ‡πÅ‡∏™‡∏î‡∏á‡∏™‡∏£‡∏∏‡∏õ‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå
print('\n' + '='*60)
print('‡∏™‡∏£‡∏∏‡∏õ‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î')
print('='*60)

for i, r in enumerate(results, 1):
    print(f"\n{i}. {r['file']}")
    print(f"   ‚Üí {r['prediction']}")

## üéØ ‡∏ó‡∏î‡∏™‡∏≠‡∏ö‡∏£‡∏π‡∏õ‡∏†‡∏≤‡∏û‡πÄ‡∏î‡∏µ‡πà‡∏¢‡∏ß
‡πÉ‡∏™‡πà path ‡∏£‡∏π‡∏õ‡∏†‡∏≤‡∏û‡∏ó‡∏µ‡πà‡∏ï‡πâ‡∏≠‡∏á‡∏Å‡∏≤‡∏£‡∏ó‡∏î‡∏™‡∏≠‡∏ö

In [None]:
# ========================================
# ‡∏ó‡∏î‡∏™‡∏≠‡∏ö‡∏£‡∏π‡∏õ‡∏†‡∏≤‡∏û‡πÄ‡∏î‡∏µ‡πà‡∏¢‡∏ß
# ========================================
SINGLE_IMAGE = r''  # <-- ‡πÉ‡∏™‡πà path ‡∏£‡∏π‡∏õ‡∏†‡∏≤‡∏û‡∏ó‡∏µ‡πà‡∏ô‡∏µ‡πà

if SINGLE_IMAGE and os.path.exists(SINGLE_IMAGE):
    print(f'Testing: {SINGLE_IMAGE}')
    
    # Show image
    img = Image.open(SINGLE_IMAGE)
    display(img)
    
    # Predict
    result = predict(SINGLE_IMAGE)
    print(f'\nüî§ Prediction: {result}')
else:
    print('‡∏Å‡∏£‡∏∏‡∏ì‡∏≤‡πÉ‡∏™‡πà path ‡∏£‡∏π‡∏õ‡∏†‡∏≤‡∏û‡πÉ‡∏ô SINGLE_IMAGE')

## üìã ‡πÄ‡∏•‡∏∑‡∏≠‡∏Å‡∏£‡∏π‡∏õ‡∏ó‡∏µ‡πà‡∏î‡∏µ‡∏ó‡∏µ‡πà‡∏™‡∏∏‡∏î‡πÄ‡∏õ‡πá‡∏ô Sample
‡∏´‡∏•‡∏±‡∏á‡∏à‡∏≤‡∏Å‡∏ó‡∏î‡∏™‡∏≠‡∏ö‡πÅ‡∏•‡πâ‡∏ß ‡πÉ‡∏´‡πâ copy ‡∏£‡∏π‡∏õ‡∏ó‡∏µ‡πà‡πÅ‡∏°‡πà‡∏ô‡∏¢‡∏≥‡πÑ‡∏õ‡∏ó‡∏µ‡πà `webapp/static/sample_images/`

In [None]:
import shutil

# ========================================
# ‡πÄ‡∏•‡∏∑‡∏≠‡∏Å‡∏£‡∏π‡∏õ‡∏ó‡∏µ‡πà‡∏ï‡πâ‡∏≠‡∏á‡∏Å‡∏≤‡∏£‡πÄ‡∏õ‡πá‡∏ô sample
# ========================================
SELECTED_IMAGES = [
    # ‡πÉ‡∏™‡πà path ‡∏£‡∏π‡∏õ‡∏ó‡∏µ‡πà‡∏ï‡πâ‡∏≠‡∏á‡∏Å‡∏≤‡∏£ ‡πÄ‡∏ä‡πà‡∏ô:
    # r'e:\TrOCR_Antigravity\test_images\good_image1.jpg',
    # r'e:\TrOCR_Antigravity\test_images\good_image2.jpg',
]

SAMPLE_DIR = os.path.join(BASE_DIR, 'webapp', 'static', 'sample_images')

for i, img_path in enumerate(SELECTED_IMAGES, 4):  # ‡πÄ‡∏£‡∏¥‡πà‡∏°‡∏à‡∏≤‡∏Å sample4
    if os.path.exists(img_path):
        ext = os.path.splitext(img_path)[1]
        dest = os.path.join(SAMPLE_DIR, f'sample{i}{ext}')
        shutil.copy2(img_path, dest)
        print(f'Copied: {img_path} -> {dest}')
    else:
        print(f'Not found: {img_path}')

print('\nDone! ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö sample_images folder')