In [None]:
import os
import json
from PIL import Image
import numpy as np
import zipfile

"""
The .json file has the following format:
"label": "drugname"
"text": "..."
"box" : [x1, x2, y1, y2]
This file convert a .json file that contains label class and bounding boxes for objects (such as "drugname", "usage", "date",...) 
to a .txt following the YOLO format (c, x_center, y_center, width, height)
"""
# Mapping for label conversion
class_mapping = {
    "drugname": 0
}

DATA_DIR = "/kaggle/input/vaipe-dataset/public_train/prescription"
LABEL_DIR = os.path.join(DATA_DIR, "label")  # Assumes label files are in the 'label' folder

# Function to convert label files to YOLO format and save output
def convert_label():
    # Get all .json files in the label directory
    labels_name = [f for f in os.listdir(LABEL_DIR) if f.endswith('.json')]

    for label_name in labels_name:
        # Read the JSON content from the label file
        with open(os.path.join(LABEL_DIR, label_name), 'r', encoding='utf-8') as f:
            data = json.load(f)

        # Open the output file to write YOLO format labels
        output_file_path = os.path.join("/kaggle/working", label_name.replace('.json', '.txt'))  # Convert .json to .txt
        with open(output_file_path, "w") as file:
            for item in data:
                if item['label'] in ["drugname"]:
                    c = [class_mapping[item['label']]]
                    c.extend(convert_yolo(item['box'], label_name))
                    file.write(" ".join(map(str, c)))  # Write in YOLO format (space-separated values)
                    file.write("\n")

# Function to convert bounding box coordinates to YOLO format
def convert_yolo(box, label_name):
    # Replace the .json extension with .png for the corresponding image file
    image_name = label_name.replace('.json', '.png')
    
    # Load the image
    img = Image.open(os.path.join(DATA_DIR, "image", image_name))
    w_img, h_img = img.size  # Get image dimensions

    # Extract box coordinates
    x1, y1, x2, y2 = box

    # Convert box to YOLO format
    x_center = ((x1 + x2) / 2) / w_img
    y_center = ((y1 + y2) / 2) / h_img
    w_yolo = (x2 - x1) / w_img
    h_yolo = (y2 - y1) / h_img

    yolo_box = [x_center, y_center, w_yolo, h_yolo]
    return yolo_box

# Function to zip all .txt files into a single zip file
def zip_output():
    # Create a ZIP file in the /kaggle/working directory
    with zipfile.ZipFile("/kaggle/working/output.zip", 'w') as zipf:
        # Iterate through the files in the working directory
        for root, _, files in os.walk("/kaggle/working"):
            for file in files:
                if file.endswith('.txt'):  # Add only .txt files to the zip
                    zipf.write(os.path.join(root, file), arcname=file)

if __name__ == "__main__":
    convert_label()
    zip_output()  # Zip all the .txt files after conversion


In [None]:
%pip install ultralytics
import ultralytics
ultralytics.checks()

!mkdir -p /kaggle/working/images/train
!mkdir -p /kaggle/working/labels/train

from distutils.dir_util import copy_tree
fromDir = '/kaggle/input/vaipe-dataset/public_train/prescription/image'
toDirectory = '/kaggle/working/images/train'
copy_tree(fromDir, toDirectory)

fromDir = '/kaggle/input/label-yolo'
toDirectory = '/kaggle/working/labels/train'
copy_tree(fromDir, toDirectory)

from ultralytics import YOLO

model = YOLO("yolo11n.yaml")
results = model.train(data="/kaggle/input/yaml11/model.yaml", epochs=5)
model.export()

In [None]:
! ls /kaggle/working/runs/detect/train/weights
!mv /kaggle/runs/detect/train4/weights/best.pt /kaggle/working/


## Prepare for dataset OCR drugname

In [None]:
import json
import os

LABEL_DIR = "/kaggle/input/vaipe-dataset/public_train/prescription/label"

def get_drugnames():
    
    drugnames = []
    labels_name = [f for f in os.listdir(LABEL_DIR)]
    for label_name in labels_name:
        with open(os.path.join(LABEL_DIR, label_name), 'r', encoding="utf-8") as f:
            data = json.load(f)

        for item in data:
            if item['label'] in ["drugname"]:
                drugnames.append(item['text'])

    drugnames = set(drugnames)

    with open("/kaggle/working/drugnames.txt", 'w') as f:
        for drugname in drugnames:
            f.write(f"{drugname}\n")
            
    return drugnames

if __name__ == "__main__":
    drugnames = get_drugnames()
    print(len(list(drugnames)))

In [None]:
import os
import json
import cv2
import numpy as np
import zipfile

DATA_DIR = "/kaggle/input/vaipe-dataset/public_train/prescription"
IMAGE_DIR = "/kaggle/working/output_image/"  # Directory to save cropped images
ZIP_PATH = "/kaggle/working/cropped_images.zip"  # Path to save the zip file

def train_generator():
    labels_name = [f for f in os.listdir(os.path.join(DATA_DIR, "label"))]
    
    with open("/kaggle/working/training_generator.txt", 'w', encoding="utf-8") as file:
        with zipfile.ZipFile(ZIP_PATH, 'w') as zipf:
            for label_name in labels_name:
                with open(os.path.join(DATA_DIR, "label", label_name), 'r', encoding="utf-8") as f:
                    data = json.load(f)
                
                for index, item in enumerate(data):
                    if item['label'] == "drugname":
                        image_path = crop_image(label_name, index, item['box'], zipf)
                        file.write(f"{image_path}: {item['text']}\n")

def crop_image(label_name, index, box, zipf):
    image_name = label_name.replace('.json', '.png')
    img = np.array(cv2.imread(os.path.join(DATA_DIR, "image", image_name)))
    x1, y1, x2, y2 = box
    cropped_img = img[int(y1):int(y2), int(x1):int(x2)]

    # Path to save the cropped image
    cropped_image_name = f"{image_name.replace('.png','')}_{index}.png"
    cropped_image_path = os.path.join(IMAGE_DIR, cropped_image_name)
    
    cv2.imwrite(cropped_image_path, cropped_img)
    zipf.write(cropped_image_path, arcname=cropped_image_name)
    
    return cropped_image_name

if __name__ == "__main__":
    # Ensure output directory exists
    os.makedirs(IMAGE_DIR, exist_ok=True)
    train_generator()
