In [2]:
from vlmx.agent import Agent, AgentConfig
from dotenv import load_dotenv
import os
from PIL import Image

# Load API key from .env file
load_dotenv()
API_KEY = os.environ.get('API_KEY')

class_name = "tree, ficus, fern"
SYSTEM_INSTRUCTION = """
We need to select some images of the classes: {class_name}. We will provide you some image rendered from the 3d model. You need to either return True or False. 
Return False to reject the image as inappropriate for the video game development. Some common reasons for rejection:
- The image doesn't clearly depict the object class
- The image contains other things in addition to the object. REMEMBER, we only want images that depict ONE SINGLE OBJECT belong to one of the classes.

The return format is
```json
{
"is_appropriate": true (or false),
"reason": "reason for the decision"
}
```
"""

ADDITIONAL_INSTRUCTION = """
We'll be using the 3d models to learn physic parameters like material and young modulus to simulate the physics of the object.
E.g., the tree swaying in the wind. Therefore, you need to decide if the image depicts an object that is likely to be used in a physics simulation.
"""

class HelperAgent(Agent):
    OUT_RESULT_PATH = "test.txt"

    def _make_system_instruction(self):
        return SYSTEM_INSTRUCTION + ADDITIONAL_INSTRUCTION

    def _make_prompt_parts(self, image_path: str):
        question = ["The image is :", Image.open(image_path)]
        return question

    def parse_response(self, response):
        print("response:", response.text)
        ## string_to_file(response.txt, "path.txt")


# Initialize the agent
agent = HelperAgent(AgentConfig(
    model_name="gemini-2.0-flash-thinking-exp-01-21",
    out_dir="test_results",
    api_key=API_KEY
))

image_path = "class_render_outputs/tree/000-109/99a6191de63b4a568926f4009c91e646/000.png"
# Generate a prediction
response = agent.generate_prediction(image_path)

response: ```json
{
"is_appropriate": false,
"reason": "The image contains a piece of paper in addition to the object, which appears to be wood or bark. We are looking for images that depict only a single object."
}
```
