

---

# Loading Libraries

---



In [None]:
import pandas as pd, os
from openai import OpenAI
import pandas as pd, csv
from tenacity import retry, wait_fixed
from PIL import Image
import base64
import random
import shutil
import re
import os
import csv
from tqdm.notebook import tqdm  # Import tqdm for Colab/Jupyter notebooks
import time                     # Added for potential simulation/debugging

In [None]:
client = OpenAI(api_key="")

In [None]:
@retry(wait=wait_fixed(2))  # Retry with 2-second delay
def generate_response(model_name, prompt, image_path):
    try:
        with open(image_path, "rb") as image_file:
            base64_image = base64.b64encode(image_file.read()).decode("utf-8")
            image_url = f"data:image/png;base64,{base64_image}"

            response = client.chat.completions.create(
                model=model_name,
                messages=[
                    {"role": "user", "content": prompt},  # Same prompt for all images
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "image_url",
                                "image_url": {"url": image_url}
                            }
                        ]
                    }
                ],
                max_tokens=1024,
            )
            return response.choices[0].message.content

    except Exception as e:
        print(f"❌ Error processing {image_path}: {e}")
        return "ERROR"




---

# Truncated Axis

---



Getting Response from GPT 4o


---



In [None]:
prompt="""
Analyze the given chart and determine the correct answer to the multiple-choice question. Select the option number that best aligns with the data presented.


Additionally, provide a concise yet comprehensive 1 line explanation justifying the selected option. The explanation should include:


1. The rationale for choosing the correct answer.
2. The reasons why the other options are incorrect.


**The output should strictly follow the given format**:


Option {1-7}: {1 line explanation covering both selection and elimination reasons}


Question: How would you rate the difference in values between the first bar and the second bar?


Answer Options:
1. No difference: The values of the two bars are exactly equal.
2. Extremely small difference: The difference is less than 5% of the larger value.
3. Very small difference: The difference is 5% to less than 10% of the larger value.
4. Small difference: The difference is 10% to less than 25% of the larger value.
5. Large difference: The difference is 25% to less than 50% of the larger value.
6. Very large difference: The difference is 50% to less than 100% of the larger value.
7. Extremely large difference: The difference is 100% or more (i.e., one bar is at least double the other).
"""


In [None]:
# Parameters
model_name = "gpt-4o" #change
folder_path = "/content/drive/MyDrive/Visual_Deception/Data/FinalDataset/TruncatedAxis"  # Change this
all_ids = list(set([file.split("_")[0] + "_" + file.split("_")[1] + "_" + file.split("_")[2] for file in os.listdir(folder_path)]))
print(f"Total IDs Found: {len(all_ids)}")

# Open CSV to Write
with open("/content/drive/MyDrive/Visual_Deception/Responses/GPT4o_Responses/Trunated_Axis_Responses.csv", "w+", newline="") as f:  #change
    csv_writer = csv.writer(f)
    csv_writer.writerow(["id", "prompt", "Aggressive", "Control"])

    processed = 0
    skipped = 0

    for id in all_ids:
        image_paths = [os.path.join(folder_path, f"{id}_Aggressive.png"),
                       os.path.join(folder_path, f"{id}_Control.png")]

        # Check if all images exist
        if all(os.path.exists(image) for image in image_paths):
            print(f"✅ Processing {id}")

            responses = []
            for image in image_paths:
                try:
                    response = generate_response(model_name, prompt, image)
                    responses.append(response)
                    print(f" - {image} Processed")
                except Exception as e:
                    responses.append("ERROR")
                    print(f"❌ Failed {image}: {e}")
            print(f"Completed {processed+1} out of 100")
            csv_writer.writerow([id, prompt] + responses)
            processed += 1
        else:
            print(f"⚠️ Skipping {id} (Missing Images)")
            skipped += 1

        # Stop after 100 processed IDs
        if processed == 100:
            print("✅ Experiment Completed for 100 IDs")
            break

    print(f"Total Processed: {processed}, Total Skipped: {skipped}")




---

# Inverted Axis

---



Getting Response from GPT 4o


---



In [None]:

prompt="""
Analyze the given chart and determine the correct answer to the multiple-choice question. Select the option number that best aligns with the data presented.


Additionally, provide a concise yet comprehensive 1 line explanation justifying the selected option. The explanation should include:


1. The rationale for choosing the correct answer.
2. The reasons why the other options are incorrect.


**The output should strictly follow the given format**:


Option {1-7}: {1 line explanation covering both selection and elimination reasons}


Question: How strongly do you agree that the trend shows an overall increase?


Answer Options:
1. Strongly Disagree : The y-values decrease for 100% of the x-axis range, with a strong and consistent downward trend.
2. Disagree : The y-values decrease for 50% to less than 100% of the x-axis range.
3. Somewhat Disagree : The y-values decrease for 1% to less than 50% of the x-axis range.
4. Neutral : The y-values neither show a clear increase nor a decrease across the x-axis.
5. Somewhat Agree : The y-values increase for 1% to less than 50% of the x-axis range.
6. Agree : The y-values increase for 50% to less than 100% of the x-axis range.
7. Strongly Agree : The y-values increase for 100% of the x-axis range, with a strong and consistent upward trend.
"""




In [None]:
# Parameters
model_name = "gpt-4o"
folder_path = "/content/drive/MyDrive/Visual_Deception/Data/FinalDataset/InvertedAxis"  # Change this
all_ids = list(set([file.split("_")[0] for file in os.listdir(folder_path)]))
print(f"Total IDs Found: {len(all_ids)}")

# Open CSV to Write
with open("/content/drive/MyDrive/Visual_Deception/Responses/GPT4o_Responses/Inverted_Axis_Responses_informed.csv", "w+", newline="") as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow(["id", "prompt", "Aggressive", "Control"])

    processed = 0
    skipped = 0

    for id in all_ids:
        image_paths = [os.path.join(folder_path, f"{id}_Aggressive.png"),
                       os.path.join(folder_path, f"{id}_Control.png")]

        # Check if all images exist
        if all(os.path.exists(image) for image in image_paths):
            print(f"✅ Processing {id}")

            responses = []
            for image in image_paths:
                try:
                    response = generate_response(model_name, prompt, image)
                    responses.append(response)
                    print(f" - {image} Processed")
                except Exception as e:
                    responses.append("ERROR")
                    print(f"❌ Failed {image}: {e}")
            print(f"Completed {processed+1} out of 100")
            csv_writer.writerow([id, prompt] + responses)
            processed += 1
        else:
            print(f"⚠️ Skipping {id} (Missing Images)")
            skipped += 1

        # Stop after 100 processed IDs
        if processed == 100:
            print("✅ Experiment Completed for 100 IDs")

    print(f"Total Processed: {processed}, Total Skipped: {skipped}")




---

# Aspect Ratio

---



Getting Response from GPT 4o


---



In [None]:
prompt="""
Analyze the given chart and determine the correct answer to the multiple-choice question. Select the option number that best aligns with the data presented.


Additionally, provide a concise yet comprehensive 1 line explanation justifying the selected option. The explanation should include:


1. The rationale for choosing the correct answer.
2. The reasons why the other options are incorrect.


**The output should strictly follow the given format**:


Option {1-7}: {1 line explanation covering both selection and elimination reasons}


Question: How strong is the trend depicted in the chart?


Answer Options:
1. Neutral : The trend is neither clearly increasing nor decreasing, with no consistent direction.
2. Very Weak : The trend is nearly flat, with barely any noticeable change, and occupies less than 10% of the x-axis.
3. Weak : The trend is shallow and difficult to distinguish, with small changes in y-values, and occupies around 10% to 30% of the x-axis.
4. Slightly Weak : The trend is present but not very strong, showing fluctuations, and occupies around 30% to 50% of the x-axis.
5. Slightly Strong : The trend is noticeable and somewhat steady, with a clear direction, and occupies around 50% to 70% of the x-axis.
6. Strong : The trend is distinct and sustained, with a well-defined direction, and occupies around 70% to 90% of the x-axis.
7. Very Strong : The trend is steep and consistent, dominating the entire chart, and occupies more than 90% of the x-axis.
"""


In [None]:
# Parameters
model_name = "gpt-4o"
folder_path = "/content/drive/MyDrive/Visual_Deception/Data/FinalDataset/AspectRatio"  # Change this
all_ids = list(set([file.split("_")[0]+"_"+ file.split("_")[1] +"_"+ file.split("_")[2] for file in os.listdir(folder_path)]))
print(f"Total IDs Found: {len(all_ids)}")

# Open CSV to Write
with open("/content/drive/MyDrive/Visual_Deception/Responses/GPT4o_Responses/Aspect_Ratio_Responses.csv", "w+", newline="") as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow(["id", "prompt", "Aggressive", "Control"])

    processed = 0
    skipped = 0

    for id in all_ids:
        image_paths = [os.path.join(folder_path, f"{id}_aggressive.png"),
                       os.path.join(folder_path, f"{id}_regular.png")]

        # Check if all images exist
        if all(os.path.exists(image) for image in image_paths):
            print(f"✅ Processing {id}")

            responses = []
            for image in image_paths:
                try:
                    response = generate_response(model_name, prompt, image)
                    responses.append(response)
                    print(f" - {image} Processed")
                except Exception as e:
                    responses.append("ERROR")
                    print(f"❌ Failed {image}: {e}")
            print(f"Completed {processed+1} out of 100")
            csv_writer.writerow([id, prompt] + responses)
            processed += 1
        else:
            print(f"⚠️ Skipping {id} (Missing Images)")
            skipped += 1

        # Stop after 100 processed IDs
        if processed == 100:
            print("✅ Experiment Completed for 100 IDs")

    print(f"Total Processed: {processed}, Total Skipped: {skipped}")




---

# Dual Axis

---



Getting Response from GPT 4o


---



In [None]:
prompt="""
Analyze the given chart and determine the correct answer to the multiple-choice question. Select the option number that best aligns with the data presented.


Additionally, provide a concise yet comprehensive 1 line explanation justifying the selected option. The explanation should include:


1. The rationale for choosing the correct answer.
2. The reasons why the other options are incorrect.


**The output should strictly follow the given format**:


Option {1-7}: {1 line explanation covering both selection and elimination reasons}


Question: How would you rate the difference between the trends of the two lines in the chart?


Answer Options:
1. No Similarity: The trend is similar for 0% of the chart, showing completely different movements.
2. Extremely Small Similarity: The trend is similar for less than 5% of the chart, with only brief matching segments.
3. Very Small Similarity: The trend is similar for 5% to less than 10% of the chart, with minimal overlap.
4. Small Similarity: The trend is similar for 10% to less than 25% of the chart, but differences dominate.
5. Large Similarity: The trend is similar for 25% to less than 50% of the chart, showing a mix of similarities and differences.
6. Very Large Similarity: The trend is similar for 50% to less than 100% of the chart, with only minor differences.
7. Extremely Large Similarity: The trend is similar for 100% of the chart, following the same direction, slope, and fluctuations.
"""



In [None]:
# Parameters
model_name = "gpt-4o"
folder_path = "/content/drive/MyDrive/Visual_Deception/Data/FinalDataset/DualAxis"  # Change this
all_ids = list(set([file.split("_")[0]+"_"+ file.split("_")[1] +"_"+ file.split("_")[2] for file in os.listdir(folder_path)]))
print(f"Total IDs Found: {len(all_ids)}")

# Open CSV to Write
with open("/content/drive/MyDrive/Visual_Deception/Responses/GPT4o_Responses/Dual_Axis_Responses.csv", "w+", newline="") as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow(["id", "prompt", "Aggressive", "Control"])

    processed = 0
    skipped = 0

    for id in all_ids:
        image_paths = [os.path.join(folder_path, f"{id}_dual.png"),
                       os.path.join(folder_path, f"{id}_regular.png")]

        # Check if all images exist
        if all(os.path.exists(image) for image in image_paths):
            print(f"✅ Processing {id}")

            responses = []
            for image in image_paths:
                try:
                    response = generate_response(model_name, prompt, image)
                    responses.append(response)
                    print(f" - {image} Processed")
                except Exception as e:
                    responses.append("ERROR")
                    print(f"❌ Failed {image}: {e}")
            print(f"Completed {processed+1} out of 100")
            csv_writer.writerow([id, prompt] + responses)
            processed += 1
        else:
            print(f"⚠️ Skipping {id} (Missing Images)")
            skipped += 1

        # Stop after 100 processed IDs
        if processed == 100:
            print("✅ Experiment Completed for 100 IDs")

    print(f"Total Processed: {processed}, Total Skipped: {skipped}")




---

# Quantity Distortion

---



Getting Response from GPT 4o


---



In [None]:
prompt="""
Analyze the given chart and determine the correct answer to the multiple-choice question. Select the option number that best aligns with the data presented.


Additionally, provide a concise yet comprehensive 1 line explanation justifying the selected option. The explanation should include:


1. The rationale for choosing the correct answer.
2. The reasons why the other options are incorrect.


**The output should strictly follow the given format**:


Option {1-7}: {1 line explanation covering both selection and elimination reasons}


Question: Which statement best describes the disparity between the quantities represented by the circles?


Answer Options:
1. There is no disparity: The circles represent equal quantities.
2. The disparity is very small: The larger circle represents 1x to 1.5x the quantity of the smaller circle.
3. The disparity is small: The larger circle represents 1.5x to 2x the quantity of the smaller circle.
4. The disparity is moderate: The larger circle represents 2x to 3x the quantity of the smaller circle.
5. The disparity is large: The larger circle represents 3x to 5x the quantity of the smaller circle.
6. The disparity is very large: The larger circle represents 5x to 10x the quantity of the smaller circle.
7. The disparity is extremely large: The larger circle represents more than 10x the quantity of the smaller circle.
"""



In [None]:
# Parameters
model_name = "gpt-4o"
folder_path = "/content/drive/MyDrive/Visual_Deception/Data/FinalDataset/QuantityDistortion"  # Change this
all_ids = list(set([file.split("_")[0] for file in os.listdir(folder_path)]))
print(f"Total IDs Found: {len(all_ids)}")

# Open CSV to Write
with open("/content/drive/MyDrive/Visual_Deception/Responses/GPT4o_Responses/Quantity_Distortion_Responses.csv", "w+", newline="") as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow(["id", "prompt", "Aggressive", "Control"])

    processed = 0
    skipped = 0

    for id in all_ids:
        image_paths = [os.path.join(folder_path, f"{id}_deceptive.png"),
                       os.path.join(folder_path, f"{id}_original.png")]

        # Check if all images exist
        if all(os.path.exists(image) for image in image_paths):
            print(f"✅ Processing {id}")

            responses = []
            for image in image_paths:
                try:
                    response = generate_response(model_name, prompt, image)
                    responses.append(response)
                    print(f" - {image} Processed")
                except Exception as e:
                    responses.append("ERROR")
                    print(f"❌ Failed {image}: {e}")
            print(f"Completed {processed+1} out of 100")
            csv_writer.writerow([id, prompt] + responses)
            processed += 1
        else:
            print(f"⚠️ Skipping {id} (Missing Images)")
            skipped += 1

        # Stop after 100 processed IDs
        if processed == 100:
            print("✅ Experiment Completed for 100 IDs")

    print(f"Total Processed: {processed}, Total Skipped: {skipped}")




---

# Distorted Projection

---



Getting Response from GPT 4o


---



In [None]:
prompt="""
Analyze the given chart and determine the correct answer to the multiple-choice question. Select the option number that best aligns with the data presented.


Additionally, provide a concise yet comprehensive 1 line explanation justifying the selected option. The explanation should include:


1. The rationale for choosing the correct answer.
2. The reasons why the other options are incorrect.


**The output should strictly follow the given format**:


Option {1-7}: {1 line explanation covering both selection and elimination reasons}


Question: Which statement best describes the disparity between the highest and the second highest quantity in the pie chart?


Answer Options:
1. No disparity: The two largest segments represent equal quantities.
2. Extremely small disparity: The largest segment represents 1x to more than 1.25x the quantity of the second-largest segment.
3. Very small disparity: The largest segment represents 1.25x to more than 1.5x the quantity of the second-largest segment.
4. Small disparity: The largest segment represents 1.5x to more than 2x the quantity of the second-largest segment.
5. Moderate disparity: The largest segment represents 2x to more than 3x the quantity of the second-largest segment.
6. Large disparity: The largest segment represents 3x to more than 5x the quantity of the second-largest segment.
7. Very large disparity: The largest segment represents more than 5x the quantity of the second-largest segment.
"""



In [None]:
# Parameters
model_name = "gpt-4o"
folder_path = "/content/drive/MyDrive/Visual_Deception/Data/FinalDataset/DistortedProjection"  # Change this
all_ids = list(set([file.split("_")[1]+"_"+ file.split("_")[2] +"_"+ file.split("_")[3] for file in os.listdir(folder_path)]))
print(f"Total IDs Found: {len(all_ids)}")

# Open CSV to Write
with open("/content/drive/MyDrive/Visual_Deception/Responses/GPT4o_Responses/Distorted_Projection_Responses_2.csv", "w+", newline="") as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow(["id", "prompt", "Aggressive", "Control"])

    processed = 0
    skipped = 0

    for id in all_ids:
        image_paths = [os.path.join(folder_path, f"3d_{id}"),
                       os.path.join(folder_path, f"2d_{id}")]

        # Check if all images exist
        if all(os.path.exists(image) for image in image_paths):
            print(f"✅ Processing {id}")

            responses = []
            for image in image_paths:
                try:
                    response = generate_response(model_name, prompt, image)
                    responses.append(response)
                    print(f" - {image} Processed")
                except Exception as e:
                    responses.append("ERROR")
                    print(f"❌ Failed {image}: {e}")
            print(f"Completed {processed+1} out of 100")
            csv_writer.writerow([id, prompt] + responses)
            processed += 1
        else:
            print(f"⚠️ Skipping {id} (Missing Images)")
            skipped += 1

        # Stop after 100 processed IDs
        if processed == 100:
            print("✅ Experiment Completed for 100 IDs")

    print(f"Total Processed: {processed}, Total Skipped: {skipped}")




---

# Inappropiate Categorical Encoding

---



Getting Response from GPT 4o


---



In [None]:
prompt="""
Analyze the given chart and determine the correct answer to the multiple-choice question. Select the option number that best aligns with the data presented.




Additionally, provide a concise yet comprehensive 1 line explanation justifying the selected option. The explanation should include:




1. The rationale for choosing the correct answer.
2. The reasons why the other options are incorrect.




**The output should strictly follow the given format**:




Option {1-7}: {1 line explanation covering both selection and elimination reasons}




Question: How would you describe the overall trend depicted in the chart?




Answer Options:
1. Neutral: There is no significant upward or downward trend in the chart.
2. Extremely Weak: The y-values change for 1% to less than 5% of the x-axis range, with an almost negligible trend.
3. Very Weak: The y-values change for 5% to less than 10% of the x-axis range, with a slight increasing or decreasing tendency.
4. Weak: The y-values change for 10% to less than 20% of the x-axis range, showing a minor but noticeable trend.
5. Strong: The y-values change for 20% to less than 50% of the x-axis range, with a somewhat clear upward or downward pattern.
6. Very Strong: The y-values change for 50% to less than 75% of the x-axis range, following a well-defined increasing or decreasing trend.
7. Extremely Strong: The y-values change for 75% to 100% of the x-axis range, with a strong and consistent upward or downward trend.


"""




In [None]:
# Parameters
model_name = "gpt-4o"
folder_path = "/content/drive/MyDrive/Visual_Deception/Data/FinalDataset/InappropriateCategoricalEncoding"  # Change this
all_ids = list(set([file.split("_")[0] for file in os.listdir(folder_path)]))
print(f"Total IDs Found: {len(all_ids)}")

# Open CSV to Write
with open("/content/drive/MyDrive/Visual_Deception/Responses/GPT4o_Responses/Inappropriate_Categorical_Encoding_Responses.csv", "w+", newline="") as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow(["id", "prompt", "Aggressive", "Control"])

    processed = 0
    skipped = 0

    for id in all_ids:
        image_paths = [os.path.join(folder_path, f"{id}_misleading.png"),
                       os.path.join(folder_path, f"{id}_regular.png")]

        # Check if all images exist
        if all(os.path.exists(image) for image in image_paths):
            print(f"✅ Processing {id}")

            responses = []
            for image in image_paths:
                try:
                    response = generate_response(model_name, prompt, image)
                    responses.append(response)
                    print(f" - {image} Processed")
                except Exception as e:
                    responses.append("ERROR")
                    print(f"❌ Failed {image}: {e}")
            print(f"Completed {processed+1} out of 100")
            csv_writer.writerow([id, prompt] + responses)
            processed += 1
        else:
            print(f"⚠️ Skipping {id} (Missing Images)")
            skipped += 1

        # Stop after 100 processed IDs
        if processed == 100:
            print("✅ Experiment Completed for 100 IDs")

    print(f"Total Processed: {processed}, Total Skipped: {skipped}")




---

# Inappropiate Continuous Encoding

---



Getting Response from GPT 4o


---



In [None]:
prompt="""
Analyze the given chart and determine the correct answer to the multiple-choice question. Select the option number that best aligns with the data presented.


Additionally, provide a concise yet comprehensive 1 line explanation justifying the selected option. The explanation should include:


1. The rationale for choosing the correct answer.
2. The reasons why the other options are incorrect.


**The output should strictly follow the given format**:


Option {1-7}: {1 line explanation covering both selection and elimination reasons}


Question: How would you describe the overall trend depicted in the chart?


Answer Options:
1. No Trend: The data primarily represents categorical values.
2. Extremely Weak: The y-values change for 1% to less than 5% of the x-axis range, with an almost negligible trend.
3. Very Weak: The y-values change for 5% to less than 10% of the x-axis range, with a slight increasing or decreasing tendency.
4. Weak: The y-values change for 10% to less than 20% of the x-axis range, showing a minor but noticeable trend.
5. Strong: The y-values change for 20% to less than 50% of the x-axis range, with a somewhat clear upward or downward pattern.
6. Very Strong: The y-values change for 50% to less than 75% of the x-axis range, following a well-defined increasing or decreasing trend.
7. Extremely Strong: The y-values change for 75% to 100% of the x-axis range, with a strong and consistent upward or downward trend.
"""











In [None]:
# Parameters
model_name = "gpt-4o"
folder_path = "/content/drive/MyDrive/Visual_Deception/Data/FinalDataset/InappropriateContinuousEncoding"  # Change this
all_ids = list(set([file.split("_")[0] for file in os.listdir(folder_path)]))
print(f"Total IDs Found: {len(all_ids)}")

# Open CSV to Write
with open("/content/drive/MyDrive/Visual_Deception/Responses/GPT4o_Responses/Inappropriate_Continuous_Encoding_Responses_2.csv", "w+", newline="") as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow(["id", "prompt", "Aggressive", "Control"])

    processed = 0
    skipped = 0

    for id in all_ids:
        image_paths = [os.path.join(folder_path, f"{id}_misleading.png"),
                       os.path.join(folder_path, f"{id}_regular.png")]

        # Check if all images exist
        if all(os.path.exists(image) for image in image_paths):
            print(f"✅ Processing {id}")

            responses = []
            for image in image_paths:
                try:
                    response = generate_response(model_name, prompt, image)
                    responses.append(response)
                    print(f" - {image} Processed")
                except Exception as e:
                    responses.append("ERROR")
                    print(f"❌ Failed {image}: {e}")
            print(f"Completed {processed+1} out of 100")
            csv_writer.writerow([id, prompt] + responses)
            processed += 1
        else:
            print(f"⚠️ Skipping {id} (Missing Images)")
            skipped += 1

        # Stop after 100 processed IDs
        if processed == 100:
            print("✅ Experiment Completed for 100 IDs")

    print(f"Total Processed: {processed}, Total Skipped: {skipped}")
