## JSON Input

In [1]:
from pptlayout.extractors.run_extractors import run_extractors
from json import dumps
from pptlayout.llm.llm import call_llm
import csv
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, filename='llm_benchmark.log', filemode='w',
                    format='%(levelname)s:%(message)s')

# Replace with your actual PPTX file path
pptx_path = "/data/tianyuhu/PPTLayout/data/pptx/ZK7FNUZ33GBBCG7CFVYS56TQCTD72CJR.pptx"

# Run extractors
info = run_extractors(pptx_path, "emu")

# Define the JSON data (replace with actual slide data)
slide_data = dumps(info["slides"][4], indent=4)
print(slide_data)

# Explicitly define the qa_list with questions and answers
qa_list = [
    {
        'question': "What is the shape id of the title of the slide? (Answer with a number)",
        'answer': "38"
    },
    {
        'question': "How many images are in the slide? (Answer with a number)",
        'answer': "1"
    },
    {
        'question': "Which element is in the exact center of the slide? (Answer with its shape id)",
        'answer': "42"
    },
    {
        'question': "Are there any overlapping elements? (Answer 'Yes' or 'No')",
        'answer': "Yes"
    },
    {
        'question': "What is the spatial relationship of the title to the image? (Answer 'Above' or 'Below')",
        'answer': "Above"
    },
    {
        'question': "What is the largest element by area in the slide? (Answer with its shape id)",
        'answer': "42"
    },
    {
        'question': "Are there any elements that extend beyond the slide boundaries? (Answer 'Yes' or 'No')",
        'answer': "No"
    },
    {
        'question': "Which element is closest to the left edge? (Answer with its shape id)",
        'answer': "44"
    },
    {
        'question': "Are all text elements aligned to the left? (Answer 'Yes' or 'No')",
        'answer': "No"
    },
    {
        'question': "How many elements in the slide have text? (Answer with a number)",
        'answer': "3"
    },
    # {   # For MLLM
    #     'question': "What is the primary color of the title text? (Answer in one word)",
    #     'answer': "Black"
    # },
    {
        'question': "Is the spacing between elements consistent? (Answer 'Yes' or 'No')",
        'answer': "Yes"
    },
    {   # To Change
        'question': "Does the slide follow a horizontal or vertical layout? (Answer 'Horizontal' or 'Vertical')",
        'answer': "Horizontal"
    },
    {   # To Change
        'question': "Are there any elements grouped closely together? (Answer 'Yes' or 'No')",
        'answer': "No"
    },
    {
        'question': "Which element has the most text content? (Answer with its shape id)",
        'answer': "44"
    },
]

# Collect results
results = []

# Iterate through each question
for qa in qa_list:
    prompt = (
        f"You are given a slide in JSON format:\n{slide_data}\n"
        f"The width of the slide is 9144000 and the height of the slide is 6858000.\n"
        f"Please answer the following question in **one word only**:\n{qa['question']}\n"
    )
    try:
        response = call_llm(
            model_name="llama3.1:latest",
            prompt=prompt,
        )
        response_text = response.strip()
    except Exception as e:
        response_text = "Error"
        print(f"An error occurred: {e}")
        logging.error(f"Error for question '{qa['question']}': {e}")
        continue

    # Evaluate correctness
    is_correct = response_text.lower() == qa['answer'].lower()

    # Append to results
    results.append({
        'question': qa['question'],
        'expected_answer': qa['answer'],
        'llm_response': response_text,
        'correct': is_correct
    })

    # Logging
    logging.info(f"Question: {qa['question']}")
    logging.info(f"Expected Answer: {qa['answer']}")
    logging.info(f"LLM Response: {response_text}")
    logging.info(f"Correct: {is_correct}\n")

    # Print output
    print(f"Question: {qa['question']}")
    print(f"Expected Answer: {qa['answer']}")
    print(f"LLM Response: {response_text}")
    print(f"Correct: {is_correct}\n")

# Save results to a CSV file
with open('llm_results.csv', 'w', newline='') as csvfile:
    fieldnames = ['question', 'expected_answer', 'llm_response', 'correct']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    for result in results:
        writer.writerow(result)


{
    "slide_id": 260,
    "slide_name": "",
    "shapes": [
        {
            "name": "PlaceHolder 1",
            "shape_id": 38,
            "shape_type": "PLACEHOLDER",
            "measurement_unit": "emu",
            "height": 533520,
            "width": 6477120,
            "left": 1523880,
            "top": 152280,
            "text": "NuMI baffle as prototype of target",
            "placeholder_type": "TITLE"
        },
        {
            "name": "Rectangle 38",
            "shape_id": 39,
            "shape_type": "AUTO_SHAPE",
            "measurement_unit": "emu",
            "height": 762120,
            "width": 2286000,
            "left": 609480,
            "top": 5486400,
            "text": ""
        },
        {
            "name": "Rectangle 39",
            "shape_id": 40,
            "shape_type": "AUTO_SHAPE",
            "measurement_unit": "emu",
            "height": 228600,
            "width": 609480,
            "left": 7772400,
            "to