In [402]:

import os
import warnings

from llama_index.core.agent import AgentOutput
from llama_index.core.async_utils import asyncio_run

warnings.filterwarnings("ignore")
from typing import List
from ultralytics.engine.results import Results
from llama_index.core.agent.workflow import FunctionAgent
from llama_index.llms.ollama import Ollama
from llama_index.core import Settings, SimpleDirectoryReader
from llama_index.core.tools import FunctionTool
from ultralytics import YOLO


async def main():
    # Initialize Ollama LLM
    llm = Ollama(model="llama3.1:latest", request_timeout=120.0)  # Adjust model and timeout as needed
    # Set as global LLM in LlamaIndex Settings
    Settings.llm = llm
    llm = Ollama(
        model="llama3.1:latest",
        base_url="http://localhost:11434/",
        request_timeout=360.0,
        # Manually set the context window to limit memory usage
        context_window=8000,
    )
    yolo_model = YOLO('predict/best.pt')

    def analyse_image_ppe_violation() -> str:
        image_directory_path = "./test"
        image_documents = SimpleDirectoryReader(image_directory_path).load_data()
        for doc in image_documents:
            yolo_responses: List[Results] = yolo_model.predict(doc.image_path, classes=[0, 1, 2, 3, 4, 6])
            if yolo_responses and len(yolo_responses) > 0:
                yolo_response = yolo_responses[0]
                detected_objects = []
                if yolo_response.boxes is not None:
                    detected_objects.append([yolo_model.names[int(c)] for c in yolo_response.boxes.cls])
                    doc.metadata = {os.path.split(doc.image_path)[1]: detected_objects}
        final_response = [image.metadata for image in image_documents]
        return final_response

    analyse_image_ppe_violation = FunctionTool.from_defaults(fn=analyse_image_ppe_violation)
    agent = FunctionAgent(
        tools=[analyse_image_ppe_violation],
        llm=llm,
        description="A computer vision model detects PPE compliance in real-time, with results logged to a JSON file.Generate report based on the same"
    )
    response: AgentOutput = await agent.run(user_msg="Provide ppe report")
    print(response.response.content)


if __name__ == '__main__':
    asyncio_run(main())

2025-11-05 18:37:21,679 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"



image 1/1 /Users/venugopalgotagi/PycharmProjects/llamaindex_poc/test/1.png: 640x512 1 helmet, 2 Persons, 41.4ms
Speed: 3.2ms preprocess, 41.4ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 512)

image 1/1 /Users/venugopalgotagi/PycharmProjects/llamaindex_poc/test/2.png: 256x640 7 helmets, 7 vests, 8 Persons, 16.0ms
Speed: 0.8ms preprocess, 16.0ms inference, 0.7ms postprocess per image at shape (1, 3, 256, 640)

image 1/1 /Users/venugopalgotagi/PycharmProjects/llamaindex_poc/test/3.png: 640x640 1 helmet, 1 vest, 1 Person, 36.8ms
Speed: 1.2ms preprocess, 36.8ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /Users/venugopalgotagi/PycharmProjects/llamaindex_poc/test/4.png: 544x640 3 helmets, 3 vests, 3 Persons, 25.5ms
Speed: 1.3ms preprocess, 25.5ms inference, 0.5ms postprocess per image at shape (1, 3, 544, 640)

image 1/1 /Users/venugopalgotagi/PycharmProjects/llamaindex_poc/test/5.png: 416x640 1 Person, 20.8ms
Speed: 0.8ms preprocess, 20.8ms

2025-11-05 18:37:23,120 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


Based on the PPE (Personal Protective Equipment) report, it appears that:

* In image 1, there is 1 person wearing a helmet.
* In image 2, there are multiple people wearing helmets and vests. 
* In image 3, there is 1 person wearing a vest and a helmet.
* In image 4, most of the people are wearing helmets.
* In image 5, there is no one wearing any PPE.
* In image 6, there is 1 person wearing a helmet.

It's difficult to determine if all workers are following proper safety protocols based on this report alone.
