In [1]:
import base64
import time
import pandas as pd
import os
import google.generativeai as genai
from IPython.display import display
from IPython.display import Markdown

# Gemini API Key - Set this as an environment variable or replace with your actual key
os.environ["GEMINI_API_KEY"] = ""

# Configure the Gemini API
genai.configure(api_key=os.environ["GEMINI_API_KEY"])

# Function to encode the image
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

# Read CSV
questions_df = pd.read_csv("C:/Users/amitc/OneDrive/Desktop/New folder (7)/VLAT/VLAT Questions.csv")
questions = questions_df.to_dict('records')
responses_data = []

for i, question in enumerate(questions, start=1):
    time.sleep(5)
    try:
        print(f"\nProcessing question {i}:")
        print(question)
        
        # Path to your image
        image_path = "C:/Users/amitc/OneDrive/Desktop/New folder (7)/VLAT/Images/" + str(question.get('vis', '')) + ".png"
        
        # Question text
        question_text = question.get('question: ', '')
        question_options = question.get('option:', '')
        correct_ans = str(question.get('correct', '')).strip()
        
        print(f"Processing image: {image_path}")
        print(f"Question: {question_text}")
        print(f"Options: {question_options}")
        print(f"Correct answer: {correct_ans}")
        
        # Read the image as binary and create Gemini image part
        with open(image_path, "rb") as image_file:
            image_data = image_file.read()
        
        # Define the prompt
        prompt = ('I am about to show you a graph and ask you a multiple-choice question about that graph. \n\n' +
                 'Task 1: Data Extraction and Table Creation: First, explicitly list ALL numerical values you can identify on both axes, then create a structured table using markdown syntax that includes ALL data points you identified above with appropriate column headers with units. \n \n' +
                 'Task 2: Sort the data: Sort the data in descending order by the numerical values. \n \n' +
                 'Task 3: Data Verification and Error Handling: Double-check if your table matches ALL elements in the graph by comparing each value in your table with the graph and updating your table with correct values, verify the sorting is correct, and before proceeding, confirm all corrections have been made and use ONLY the corrected data for analysis. \n \n' +
                 'Task 4: Question Analysis: Using ONLY the verified data in your table, compare EACH value individually with the reference value, for "less than" comparisons mark ALL values that are even slightly below the reference, for "greater than" comparisons mark ALL values that are even slightly above the reference, and show each comparison on a new line. \n\n' +
                 'Provide your reasoning with specific references to table values. \n\n' +
                 'End with: "Correct Answer: ". Just write the value, nothing else. Do not write anything after this. \n\n' +
                 'Let\'s solve this step by step.' +
                 '\n\n' + question_text + " " + question_options)
        
        # Create Gemini content parts - removed as we're using the new API format
        
        # Add retry logic for any API errors
        max_retries = 3
        retry_delay = 20  # seconds
        
        for retry in range(max_retries):
            try:
                # Use the Gemini 2.0 Pro model
                model = genai.GenerativeModel('gemini-2.0-pro-exp-02-05')
                
                # Create parts list with text and image
                parts = [
                    {"text": prompt},
                    {"inline_data": {
                        "mime_type": "image/png",
                        "data": base64.b64encode(image_data).decode('utf-8')
                    }}
                ]
                
                # Configure generation parameters
                generation_config = {
                    "temperature": 0.0,  # Match your GPT temperature of 0.0
                    "max_output_tokens": 5000,
                }
                
                time_start = time.perf_counter()
                
                # Generate the response
                response = model.generate_content(
                    parts,
                    generation_config=generation_config
                )
                
                time_end = time.perf_counter()
                
                # If we get here, response was successful
                break
                
            except Exception as e:
                print(f"Error during API call (attempt {retry + 1}/{max_retries}):", str(e))
                if retry < max_retries - 1:
                    print(f"\nAPI error, waiting {retry_delay} seconds before retry {retry + 1}/{max_retries}")
                    time.sleep(retry_delay)
                    continue
                raise  # Re-raise the last exception if we've exhausted all retries
        
        try:
            # Extract the full response text
            full_response = response.text
            print("\nAPI Response:", full_response[:200] + "...")  # Debug print - first 200 chars
            
            # Extract the answer after "Correct Answer: "
            if "Correct Answer: " in full_response:
                gpt_answer = full_response.split("Correct Answer: ")[-1].strip()
                # Case-insensitive comparison after stripping whitespace
                is_correct = gpt_answer.strip().upper() == correct_ans.strip().upper()
            else:
                gpt_answer = "Error: No answer in correct format"
                is_correct = "N/A"
                
        except Exception as e:
            print(f"Error processing response: {str(e)}")
            gpt_answer = f"Error: {str(e)}"
            is_correct = "N/A"
        
        responses_data.append([gpt_answer, time_end-time_start, is_correct])
        print(f"\nAnswer: {gpt_answer}")
        print(f"Time taken: {time_end-time_start:.2f} seconds")
        print(f"Correct? {is_correct}")
        
        # Increase delay between requests to 15 seconds
        time.sleep(max(15 - (time_end-time_start), 0))
        
    except Exception as e:
        print(f"Error processing question {i}:", str(e))
        responses_data.append([f"Error: {str(e)}", 0, "N/A"])

# Create Results directory if it doesn't exist
results_dir = "C:/Users/amitc/OneDrive/Desktop/New folder (7)/VLAT/Results/"
os.makedirs(results_dir, exist_ok=True)

# Save results
results_df = pd.DataFrame(responses_data, columns=['response', 'time', 'correct_bool'])
results_df.index = range(1, results_df.shape[0] + 1)
results_df.to_csv(results_dir + "Gemini_VLAT_" + str(int(time.time())) + ".csv", index_label="id")
print("\n*** Finished ***")


Processing question 1:
{'id': 1, 'dropped': 'no', 'vis': 'VLAT_a', 'item': 'a_1', 'question: ': 'What was the price of a barrel of oil in February 2015? ', 'option:': '$57.36; $47.82; $50.24; $39.72', 'correct': '$50.24 '}
Processing image: C:/Users/amitc/OneDrive/Desktop/New folder (7)/VLAT/Images/VLAT_a.png
Question: What was the price of a barrel of oil in February 2015? 
Options: $57.36; $47.82; $50.24; $39.72
Correct answer: $50.24

API Response: **Task 1: Data Extraction and Table Creation**

Here are the numerical values identified on the axes:

*   **Y-axis (Oil Price in $ per barrel):** 35, 40, 45, 50, 55, 60, 65
*   **X-axis (Month):** Ja...

Answer: $50.24
Time taken: 8.54 seconds
Correct? True

Processing question 2:
{'id': 2, 'dropped': 'no', 'vis': 'VLAT_a', 'item': 'a_2', 'question: ': 'In which month was the price of a barrel of oil the lowest in 2015?', 'option:': 'March; May; July; December', 'correct': 'December'}
Processing image: C:/Users/amitc/OneDrive/Desktop/Ne


Processing question 12:
{'id': 12, 'dropped': 'no', 'vis': 'VLAT_c', 'item': 'c_3', 'question: ': 'In which city is the cost of soda the highest?', 'option:': 'New York City; Las Vegas; Atalanta; Washington D.C.', 'correct': 'Washington D.C.'}
Processing image: C:/Users/amitc/OneDrive/Desktop/New folder (7)/VLAT/Images/VLAT_c.png
Question: In which city is the cost of soda the highest?
Options: New York City; Las Vegas; Atalanta; Washington D.C.
Correct answer: Washington D.C.

API Response: Okay, let's analyze the graph step-by-step.

**Task 1: Data Extraction and Table Creation**

First, I'll list all numerical values on both axes:

*   **Cost ($) Axis:** 0, 5, 10, 15, 20, 25, 30, 35, 4...

Answer: New York City
Time taken: 7.28 seconds
Correct? False

Processing question 13:
{'id': 13, 'dropped': 'yes', 'vis': 'VLAT_c', 'item': 'c_4', 'question: ': 'What is the cost range of a sandwich in the cities?', 'option:': '$0 - $24.2; $0 - $55.9; $13 - $24.2; $17 - $35.2', 'correct': '$13 -


API Response: **Task 1: Data Extraction and Table Creation**

Here are the numerical values identified on both axes:

*   **X-axis (Rating):** 3.0, 3.2, 3.4, 3.6, 3.8, 4.0, 4.2, 4.4, 4.6, 4.8, 5.0
*   **Y-axis (Fre...

Answer: 240
Time taken: 10.52 seconds
Correct? False

Processing question 23:
{'id': 23, 'dropped': 'no', 'vis': 'VLAT_f', 'item': 'f_2', 'question: ': 'What is the rating that the people have rated the taxi the most?', 'option:': '4.2-4.4; 4.4-4.6; 4.6-4.8; 4.8-5.0', 'correct': '4.4-4.6'}
Processing image: C:/Users/amitc/OneDrive/Desktop/New folder (7)/VLAT/Images/VLAT_f.png
Question: What is the rating that the people have rated the taxi the most?
Options: 4.2-4.4; 4.4-4.6; 4.6-4.8; 4.8-5.0
Correct answer: 4.4-4.6

API Response: **Task 1: Data Extraction and Table Creation**

Here are the numerical values identified on the axes:

*   **X-axis (Rating):** 3.0, 3.2, 3.4, 3.6, 3.8, 4.0, 4.2, 4.4, 4.6, 4.8, 5.0
*   **Y-axis (Freq...

Answer: 4.4-4.6
Time taken: 9.57 secon


Processing question 34:
{'id': 34, 'dropped': 'no', 'vis': 'VLAT_g', 'item': 'g_8', 'question: ': 'The weights for males with the height of 188 cm are all the same.', 'option:': 'True; False', 'correct': 'FALSE'}
Processing image: C:/Users/amitc/OneDrive/Desktop/New folder (7)/VLAT/Images/VLAT_g.png
Question: The weights for males with the height of 188 cm are all the same.
Options: True; False
Correct answer: FALSE

API Response: Okay, let's analyze the graph step-by-step.

**Task 1: Data Extraction and Table Creation**

First, I'll list all identifiable numerical values on both axes:

*   **Height (cm):** 160, 165, 170, 175, ...

Answer: False
Time taken: 14.32 seconds
Correct? True

Processing question 35:
{'id': 35, 'dropped': 'no', 'vis': 'VLAT_i', 'item': 'i_1', 'question: ': 'What was the average price of a pound of coffee beans in September 2013?', 'option:': '$4.9; $5.0; $5.1; $5.2', 'correct': '$5.10 '}
Processing image: C:/Users/amitc/OneDrive/Desktop/New folder (7)/VLAT/Im


API Response: Task 1: Data Extraction and Table Creation

X-axis (Year): 2009, 2010, 2011, 2012, 2013, 2014
Y-axis (Number of Girls): 0, 2000, 4000, 6000, 8000, 10000, 12000, 14000, 16000

| Year | Number of Girls ...

Answer: rising
Time taken: 5.78 seconds
Correct? True

Processing question 45:
{'id': 45, 'dropped': 'no', 'vis': 'VLAT_j', 'item': 'j_6', 'question: ': 'In the UK, the number of girls named ‘Amelia’ in 2014 was more than it was in 2013.', 'option:': 'True; False', 'correct': 'FALSE'}
Processing image: C:/Users/amitc/OneDrive/Desktop/New folder (7)/VLAT/Images/VLAT_j.png
Question: In the UK, the number of girls named ‘Amelia’ in 2014 was more than it was in 2013.
Options: True; False
Correct answer: FALSE

API Response: Okay, let's analyze the graph step-by-step.

**Task 1: Data Extraction and Table Creation**

First, I'll identify all numerical values on both axes:

*   **Y-axis (Number of Girls):** 0, 2,000, 4,000,...

Answer: True
Time taken: 6.80 seconds
Correct? Fa


API Response: **Task 1: Data Extraction and Table Creation**

The following numerical values are identified on the axes:

*   X-axis (States): Indiana (IN) - implicit, based on the question.
*   Y-axis (Unemploymen...

Answer: 4.6% - 5.7%
Time taken: 7.20 seconds
Correct? False

Processing question 56:
{'id': 56, 'dropped': 'no', 'vis': 'VLAT_k', 'item': 'k_2', 'question: ': 'In which state was the unemployment rate the highest in 2015?', 'option:': 'Alaska (AK); New Mexico (NM); Florida (FL); New York (NY)', 'correct': 'New Mexico (NM)'}
Processing image: C:/Users/amitc/OneDrive/Desktop/New folder (7)/VLAT/Images/VLAT_k.png
Question: In which state was the unemployment rate the highest in 2015?
Options: Alaska (AK); New Mexico (NM); Florida (FL); New York (NY)
Correct answer: New Mexico (NM)

API Response: **Task 1: Data Extraction and Table Creation**

First, I identify all numerical values on the axes:

*   **X-axis (States):** AK, NM, FL, NY
*   **Y-axis (Unemployment Rate):** 0.0