# Variables

In [1]:
import sys
sys.path.append('..')
print(sys.path)

from dotenv import load_dotenv
from typing import TypedDict, Annotated
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage, AIMessage
from langchain.agents import create_tool_calling_agent, AgentExecutor
from pydantic import BaseModel, Field
from langchain.tools import Tool
from firecrawl import FirecrawlApp
import pandas as pd
import pprint
import os

from tools.fetching_description_from_huggingface import fetching_description_from_huggingface
load_dotenv(override = True)

MODEL = os.getenv("MODEL")
llm = ChatOpenAI(
    model=MODEL,
    temperature=0.2
)


['/usr/local/lib/python312.zip', '/usr/local/lib/python3.12', '/usr/local/lib/python3.12/lib-dynload', '', '/home/hieuclc/ise_chenla/venv/lib/python3.12/site-packages', '..']


# Testing

In [2]:
df_task = pd.read_csv("data/ISE - AutoCode Challenge 2 Public - Public task.csv")
df_model = pd.read_csv("data/ISE - AutoCode Challenge 2 Public - Model zoo.csv")

In [3]:
task = df_task["Task"][2]

In [4]:
def format_model_list(df):
    model_strings = []
    for i, row in df.iterrows():
        desc = str(row[1]).replace("{", "{{").replace("}", "}}")
        url = str(row[2])
        model_strings.append(f"{i + 1} - {desc} [More info]({url})")
    return "\n".join(model_strings)


## Tools

In [7]:
class AgentCodingState(TypedDict):
    problem_description: str
    model_description: str
    input_description: str
    output_description: str
    output_classes: str | None
    code: str | None

In [30]:
base_prompt = r"""
    You are a specialist in Machine Learning. Your task is to generate a **fully functional with all necessary imports and dependencies** codebase in **Python** that can be executed flawlessly.

    You will be provided with:
    - A problem description
    - An input specification
    - An output specification
    - A model description

    ### Input:
    - Problem description: {problem_description}
    - Model description: {model_description}
    - Input specification: {input_description}
    - Output specification: {output_description}
    - Output classes: {output_classes}

    ### Guidelines

    You **must** strictly follow the following guidelines:
    - The preprocessing step should be suitable for the data type.
    - The postprocessing step should notices the differences between the data returned by the model and the output requirements. You must extract and use the exact class labels as defined in the output specification.
    - Do **not invent new labels or translate** the class names. Use them exactly as given.

    Your implementation **must strictly follow** the structure below:
    1. **Imports**: All required libraries.
    2. **Preprocessing**: Handle and transform the input as defined.
    3. **Inference logic**: Use the described model for prediction. You **must** use `tqdm` or similar logging library to track progress.
    4. **Postprocessing**: Format or transform the raw output into the final result as described.

    You must **not** include any explanations, markdown, or logging outside what is required by the problem.

    Return **only** the complete Python codebase. Wrap it with:
    \`\`\`python
    # code here
    \`\`\`
    """

In [31]:
class OutputState(TypedDict):
    output_classes: str | None

In [32]:
def output_agent(state: OutputState) -> OutputState:
    prompt = r"""
        Your are a specialist in machine learning. Your task is to identify the absolute classes of the given problem description, following with an output description.
        ### Input:
        - Problem description: {problem_description}
        - Output description: {output_description}

        You must return an array, strictly following these guidelines:
        - Understand the context from the given problem description.
        - Extract the class names from the output description. You **must not** invent new labels or translate the class names. Use them exactly as given in the output description.
        - Create an array containing the classes.

        You must return only the array containing those classes, without any formatting.
    """

    prompt = prompt.format(
        problem_description = state["problem_description"],
        output_description = state["output_description"]
    )
    response = llm.invoke(prompt)
    return {**state, "output_classes": response.content}

In [33]:
output_state = {
    "problem_description": """B·ªëi c·∫£nh c·ªßa v·∫•n ƒë·ªÅ:
        Nh·∫≠n d·∫°ng ch·ªØ vi·∫øt tay l√† m·ªôt b√†i to√°n c∆° b·∫£n trong lƒ©nh v·ª±c h·ªçc m√°y v√† x·ª≠ l√Ω ·∫£nh, v·ªõi nhi·ªÅu ·ª©ng d·ª•ng th·ª±c ti·ªÖn nh∆∞ nh·∫≠n d·∫°ng ch·ªØ s·ªë tr√™n phi·∫øu kh·∫£o s√°t, h√≥a ƒë∆°n hay b√†i thi t·ª± ƒë·ªông. Vi·ªác ph√¢n lo·∫°i ch·ªØ s·ªë vi·∫øt tay th√†nh s·ªë nguy√™n t·ªë ho·∫∑c kh√¥ng gi√∫p m·ªü r·ªông kh·∫£ nƒÉng ·ª©ng d·ª•ng trong c√°c b√†i to√°n to√°n h·ªçc t·ª± ƒë·ªông, ki·ªÉm tra b√†i t·∫≠p, ho·∫∑c c√°c ·ª©ng d·ª•ng gi√°o d·ª•c.

        Y√™u c·∫ßu c·ª• th·ªÉ c·∫ßn ƒë·∫°t ƒë∆∞·ª£c:
        X√¢y d·ª±ng m·ªôt h·ªá th·ªëng c√≥ kh·∫£ nƒÉng nh·∫≠n di·ªán ch·ªØ s·ªë vi·∫øt tay t·ª´ ·∫£nh v√† x√°c ƒë·ªãnh xem s·ªë ƒë√≥ c√≥ ph·∫£i l√† s·ªë nguy√™n t·ªë hay kh√¥ng.
        ƒê·∫ßu v√†o l√† ·∫£nh ch·ª©a m·ªôt ch·ªØ s·ªë vi·∫øt tay.
        ƒê·∫ßu ra l√† nh√£n ph√¢n lo·∫°i nh·ªã ph√¢n: """"nguy√™n t·ªë"""" ho·∫∑c """"kh√¥ng nguy√™n t·ªë"""".

        ƒê·ªãnh d·∫°ng d·ªØ li·ªáu ƒë·∫ßu v√†o cho b√†i to√°n t·ªïng th·ªÉ:
        M·ªôt th∆∞ m·ª•c c√≥ t√™n l√† """"images"""" ch·ª©a c√°c ·∫£nh grayscale 28x28 pixel, m·ªói ·∫£nh ch·ª©a m·ªôt ch·ªØ s·ªë vi·∫øt tay t·ª´ 0 ƒë·∫øn 9.

        ƒê·ªãnh d·∫°ng k·∫øt qu·∫£ ƒë·∫ßu ra mong mu·ªën cho b√†i to√°n t·ªïng th·ªÉ:
        File output.csv m·ªói h√†ng l√† k·∫øt qu·∫£ d·ª± ƒëo√°n m·ªói ·∫£nh
        c√≥ c√°c c·ªôt:
        file_name: t√™n file ·∫£nh
        prediction: nh√£n c·ªßa ·∫£nh:
        """"s·ªë nguy√™n t·ªë"""" n·∫øu s·ªë trong ·∫£nh l√† s·ªë nguy√™n t·ªë.
        """"kh√¥ng nguy√™n t·ªë"""" n·∫øu s·ªë trong ·∫£nh kh√¥ng ph·∫£i s·ªë nguy√™n t·ªë.""",
    "output_description": """ƒê·ªãnh d·∫°ng k·∫øt qu·∫£ ƒë·∫ßu ra mong mu·ªën cho b√†i to√°n t·ªïng th·ªÉ:
        File output.csv m·ªói h√†ng l√† k·∫øt qu·∫£ d·ª± ƒëo√°n m·ªói ·∫£nh
        c√≥ c√°c c·ªôt:
        file_name: t√™n file ·∫£nh
        prediction: nh√£n c·ªßa ·∫£nh:
        """"s·ªë nguy√™n t·ªë"""" n·∫øu s·ªë trong ·∫£nh l√† s·ªë nguy√™n t·ªë.
        """"kh√¥ng nguy√™n t·ªë"""" n·∫øu s·ªë trong ·∫£nh kh√¥ng ph·∫£i s·ªë nguy√™n t·ªë."""
}

In [34]:
new_state = output_agent(output_state)

In [35]:
new_state["output_classes"]

'["s·ªë nguy√™n t·ªë", "kh√¥ng nguy√™n t·ªë"]'

In [36]:
def coding_agent(state: AgentCodingState) -> AgentCodingState:
    prompt = base_prompt.format(
        problem_description = state["problem_description"],
        model_description = state["model_description"],
        input_description = state["input_description"],
        output_description = state["output_description"],
        output_classes = state["output_classes"]
    )
    response = llm.invoke(prompt)
    return {**state, "code": response.content}

In [37]:
prev_state = {
    "problem_description": """B·ªëi c·∫£nh c·ªßa v·∫•n ƒë·ªÅ:
        Nh·∫≠n d·∫°ng ch·ªØ vi·∫øt tay l√† m·ªôt b√†i to√°n c∆° b·∫£n trong lƒ©nh v·ª±c h·ªçc m√°y v√† x·ª≠ l√Ω ·∫£nh, v·ªõi nhi·ªÅu ·ª©ng d·ª•ng th·ª±c ti·ªÖn nh∆∞ nh·∫≠n d·∫°ng ch·ªØ s·ªë tr√™n phi·∫øu kh·∫£o s√°t, h√≥a ƒë∆°n hay b√†i thi t·ª± ƒë·ªông. Vi·ªác ph√¢n lo·∫°i ch·ªØ s·ªë vi·∫øt tay th√†nh s·ªë nguy√™n t·ªë ho·∫∑c kh√¥ng gi√∫p m·ªü r·ªông kh·∫£ nƒÉng ·ª©ng d·ª•ng trong c√°c b√†i to√°n to√°n h·ªçc t·ª± ƒë·ªông, ki·ªÉm tra b√†i t·∫≠p, ho·∫∑c c√°c ·ª©ng d·ª•ng gi√°o d·ª•c.

        Y√™u c·∫ßu c·ª• th·ªÉ c·∫ßn ƒë·∫°t ƒë∆∞·ª£c:
        X√¢y d·ª±ng m·ªôt h·ªá th·ªëng c√≥ kh·∫£ nƒÉng nh·∫≠n di·ªán ch·ªØ s·ªë vi·∫øt tay t·ª´ ·∫£nh v√† x√°c ƒë·ªãnh xem s·ªë ƒë√≥ c√≥ ph·∫£i l√† s·ªë nguy√™n t·ªë hay kh√¥ng.
        ƒê·∫ßu v√†o l√† ·∫£nh ch·ª©a m·ªôt ch·ªØ s·ªë vi·∫øt tay.
        ƒê·∫ßu ra l√† nh√£n ph√¢n lo·∫°i nh·ªã ph√¢n: """"nguy√™n t·ªë"""" ho·∫∑c """"kh√¥ng nguy√™n t·ªë"""".

        ƒê·ªãnh d·∫°ng d·ªØ li·ªáu ƒë·∫ßu v√†o cho b√†i to√°n t·ªïng th·ªÉ:
        M·ªôt th∆∞ m·ª•c c√≥ t√™n l√† """"images"""" ch·ª©a c√°c ·∫£nh grayscale 28x28 pixel, m·ªói ·∫£nh ch·ª©a m·ªôt ch·ªØ s·ªë vi·∫øt tay t·ª´ 0 ƒë·∫øn 9.

        ƒê·ªãnh d·∫°ng k·∫øt qu·∫£ ƒë·∫ßu ra mong mu·ªën cho b√†i to√°n t·ªïng th·ªÉ:
        File output.csv m·ªói h√†ng l√† k·∫øt qu·∫£ d·ª± ƒëo√°n m·ªói ·∫£nh
        c√≥ c√°c c·ªôt:
        file_name: t√™n file ·∫£nh
        prediction: nh√£n c·ªßa ·∫£nh:
        """"s·ªë nguy√™n t·ªë"""" n·∫øu s·ªë trong ·∫£nh l√† s·ªë nguy√™n t·ªë.
        """"kh√¥ng nguy√™n t·ªë"""" n·∫øu s·ªë trong ·∫£nh kh√¥ng ph·∫£i s·ªë nguy√™n t·ªë.""",
    "model_description": """Image Classification Model - Handwritten Digit Recognition (Color Images)
        üìù M√¥ t·∫£
        ƒê√¢y l√† m·ªôt m√¥ h√¨nh ph√¢n lo·∫°i ·∫£nh ƒë∆∞·ª£c hu·∫•n luy·ªán ƒë·ªÉ nh·∫≠n di·ªán c√°c ch·ªØ s·ªë vi·∫øt tay (t·ª´ 0 ƒë·∫øn 9) trong ·∫£nh m√†u. M√¥ h√¨nh s·ª≠ d·ª•ng ki·∫øn tr√∫c Vision Transformer (ViT) v√† ƒë∆∞·ª£c hu·∫•n luy·ªán v·ªõi t·∫≠p d·ªØ li·ªáu g·ªìm c√°c ch·ªØ s·ªë vi·∫øt tay ƒë∆∞·ª£c ch·ª•p ho·∫∑c scan d∆∞·ªõi d·∫°ng ·∫£nh m√†u.
        üìå Chi ti·∫øt c√°c m√¥ h√¨nh
        Ph√¢n lo·∫°i ·∫£nh ch·ªØ s·ªë vi·∫øt tay t·ª´ 0 ƒë·∫øn 9
        üì• ƒê·∫ßu v√†o
        ƒê·ªãnh d·∫°ng: ·∫¢nh m√†u (RGB)
        Ki·ªÉu d·ªØ li·ªáu: PIL.Image.Image
        K√≠ch th∆∞·ªõc ·∫£nh: M√¥ h√¨nh s·∫Ω t·ª± resize theo c·∫•u h√¨nh c·ªßa ViTImageProcessor (th∆∞·ªùng l√† 224x224)
        Gi√° tr·ªã pixel: [0, 255] (chu·∫©n h√≥a t·ª± ƒë·ªông b·ªüi processor)
        üì§ ƒê·∫ßu ra
        M·ªôt s·ªë nguy√™n t·ª´ 0 ƒë·∫øn 9, l√† k·∫øt qu·∫£ ph√¢n lo·∫°i t∆∞∆°ng ·ª©ng v·ªõi ch·ªØ s·ªë xu·∫•t hi·ªán trong ·∫£nh.
        üõ† Y√™u c·∫ßu th∆∞ vi·ªán
        C√†i ƒë·∫∑t c√°c th∆∞ vi·ªán sau b·∫±ng pip:
        pip install transformers torch
        üß™ S·ª≠ d·ª•ng m√¥ h√¨nh
        D∆∞·ªõi ƒë√¢y l√† ƒëo·∫°n m√£ m·∫´u ƒë·ªÉ s·ª≠ d·ª•ng m√¥ h√¨nh:
        from transformers import ViTForImageClassification, ViTImageProcessor
        from PIL import Image
        import torch
        model_name = "thanhtlx/image_classification_01"
        model = ViTForImageClassification.from_pretrained(model_name)
        processor = ViTImageProcessor.from_pretrained(model_name)
        # ƒê·ªçc ·∫£nh c·∫ßn ph√¢n lo·∫°i
        image = Image.open("images.png")
        # X·ª≠ l√Ω ·∫£nh ƒë·∫ßu v√†o
        inputs = processor(images=image, return_tensors="pt")
        # D·ª± ƒëo√°n v·ªõi m√¥ h√¨nh
        with torch.no_grad():
            outputs = model(**inputs)
        # L·∫•y k·∫øt qu·∫£ d·ª± ƒëo√°n
        predicted_class = outputs.logits.argmax(-1).item()
        print(f"prediction: {predicted_class}")""",
    "input_description": """ƒê·ªãnh d·∫°ng d·ªØ li·ªáu ƒë·∫ßu v√†o cho b√†i to√°n t·ªïng th·ªÉ:
        M·ªôt th∆∞ m·ª•c c√≥ t√™n l√† """"images"""" ch·ª©a c√°c ·∫£nh grayscale 28x28 pixel, m·ªói ·∫£nh ch·ª©a m·ªôt ch·ªØ s·ªë vi·∫øt tay t·ª´ 0 ƒë·∫øn 9.""",
    "output_description": """ƒê·ªãnh d·∫°ng k·∫øt qu·∫£ ƒë·∫ßu ra mong mu·ªën cho b√†i to√°n t·ªïng th·ªÉ:
        File output.csv m·ªói h√†ng l√† k·∫øt qu·∫£ d·ª± ƒëo√°n m·ªói ·∫£nh
        c√≥ c√°c c·ªôt:
        file_name: t√™n file ·∫£nh
        prediction: nh√£n c·ªßa ·∫£nh:
        """"s·ªë nguy√™n t·ªë"""" n·∫øu s·ªë trong ·∫£nh l√† s·ªë nguy√™n t·ªë.
        """"kh√¥ng nguy√™n t·ªë"""" n·∫øu s·ªë trong ·∫£nh kh√¥ng ph·∫£i s·ªë nguy√™n t·ªë.""",
    "output_classes": new_state["output_classes"]
}

In [38]:
state = coding_agent(prev_state)

In [39]:
# state

In [40]:
print(state["code"])

```python
import os
import csv
from tqdm import tqdm
from PIL import Image
import torch
from transformers import ViTForImageClassification, ViTImageProcessor

# Load the pre-trained model and processor
model_name = "thanhtlx/image_classification_01"
model = ViTForImageClassification.from_pretrained(model_name)
processor = ViTImageProcessor.from_pretrained(model_name)
model.eval()

# Define the directory containing images
images_dir = "images"
# Define output CSV file
output_csv = "output.csv"

# Function to check if a number is prime
def is_prime(n):
    if n < 2:
        return False
    if n == 2:
        return True
    if n % 2 == 0:
        return False
    sqrt_n = int(n ** 0.5) + 1
    for i in range(3, sqrt_n, 2):
        if n % i == 0:
            return False
    return True

# Prepare list to hold results
results = []

# List all image files in the directory
image_files = [f for f in os.listdir(images_dir) if os.path.isfile(os.path.join(images_dir, f))]

# Process each image