# Step Tracker OCR Pipeline
Pipeline untuk melakukan OCR pada screenshot aplikasi step tracker menggunakan Tesseract

In [1]:
import cv2
import pytesseract
from PIL import Image
import numpy as np

In [2]:
def preprocess_image(image_path):
    """Preprocessing gambar untuk meningkatkan akurasi OCR"""
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Upscale untuk OCR lebih baik
    scale = 2
    gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
    
    # Bilateral filter untuk smooth tapi tetap jaga edges
    filtered = cv2.bilateralFilter(gray, 9, 75, 75)
    
    # Otsu thresholding
    _, thresh = cv2.threshold(filtered, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    return thresh

In [3]:
def is_valid_text(text, width, height):
    """Filter icon berdasarkan karakteristik text"""
    # Filter text terlalu pendek atau simbol aneh
    if len(text) < 2 and not text.isdigit():
        return False
    
    # Filter box terlalu kecil (kemungkinan icon)
    if width < 20 or height < 15:
        return False
    
    # Filter simbol aneh
    weird_chars = ['@', '()', '(%)', 'G)', 'Qd', 'ks', 'ey', 'il']
    if text in weird_chars or any(text.startswith(c) for c in ['@', 'G)', 'Qd']):
        return False
    
    return True

def extract_steps_ocr(image_path):
    """Extract text dan confidence dari screenshot step tracker"""
    processed_img = preprocess_image(image_path)
    
    # Config: whitelist untuk angka, huruf, dan karakter umum
    custom_config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.,:/- '
    
    ocr_data = pytesseract.image_to_data(processed_img, config=custom_config, 
                                         output_type=pytesseract.Output.DICT)
    
    results = []
    for i, text in enumerate(ocr_data['text']):
        if text.strip():
            conf = float(ocr_data['conf'][i])
            width = ocr_data['width'][i]
            height = ocr_data['height'][i]
            
            # Filter confidence rendah dan icon
            if conf > 50 and is_valid_text(text, width, height):
                results.append({
                    'text': text,
                    'confidence': conf
                })
    
    return results

In [4]:
# Contoh penggunaan
image_path = 'WhatsApp Image 2025-11-06 at 11.15.40.jpeg'

results = extract_steps_ocr(image_path)

print("OCR Results:")
print("-" * 50)
for item in results:
    print(f"Text: {item['text']:<20} | Confidence: {item['confidence']:.2f}%")

OCR Results:
--------------------------------------------------
Text: 11.12                | Confidence: 89.00%
Text: Lo                   | Confidence: 85.00%
Text: Duration             | Confidence: 89.00%
Text: GD                   | Confidence: 53.00%
Text: 001938               | Confidence: 79.00%
Text: 147                  | Confidence: 72.00%
Text: kcal                 | Confidence: 90.00%
Text: 1                    | Confidence: 68.00%
Text: 137                  | Confidence: 81.00%
Text: steps/min            | Confidence: 91.00%
Text: 82                   | Confidence: 83.00%
Text: cm                   | Confidence: 90.00%
Text: Steps                | Confidence: 91.00%
Text: 2697                 | Confidence: 76.00%
Text: steps                | Confidence: 92.00%
Text: 179                  | Confidence: 79.00%
Text: bpm                  | Confidence: 88.00%
Text: Performance          | Confidence: 88.00%
Text: CD                   | Confidence: 68.00%
Text: 2,6               