In [1]:
import openai
import json
import os
import time
import numpy as np
import pandas as pd



In [3]:
openai.api_key = "your openai api key here" 

temperatures = [0]
studies = ["kim2018"]
models = ["gpt-4-0125-preview"]#, "gpt-4-1106-preview", "gpt-4", "gpt-3.5-turbo"]
system_prompt = "Your goal is to rank visualization designs based on how effective they are in helping people read charts quickly and accurately. You rely upon visualization design best practices and graphical perception research to rank visualizations."

def get_task_type(path, study, name):
    file_path = f'{path}/{study}/{name}'

    # Read the content of the file
    with open(file_path, 'r') as file:
        content = file.read()

    # Check for the presence of "summary" and "value"
    contains_summary = "summary" in content
    contains_value = "value" in content
    
    if contains_summary and contains_value:
        raise ValueError("Both 'summary' and 'value' are present, which is not allowed.")
    elif not contains_summary and not contains_value:
        raise ValueError("Neither 'summary' nor 'value' is present, which is required.")

    # Return the keyword that is present
    if contains_summary:
        return "summary"
    elif contains_value:
        return "value"


    
def generate_responses(directory, study, filename, max_retries=3):
    for model in models:
        for temp in temperatures:
            response_file = f'./gpt_responses/{study}_responses/{model}_{temp}/{os.path.splitext(filename)[0]}_{model}_{temp}.txt'
            
            if not os.path.exists(f'./gpt_responses/{study}_responses/{model}_{temp}/'):
                os.makedirs(f'./gpt_responses/{study}_responses/{model}_{temp}/')
                
            if os.path.exists(response_file):
                print(response_file + " exists!!!\n")
                continue
            
            
            
            print("*" * 100)
            print(model + " " + str(temp) + " " + study + " " + filename)

            task_type = get_task_type(directory, study, filename)
            
            prompt = ""
            with open('./prompt_templates/{}'.format(task_type) + '_begin.txt', 'r') as file:
                prompt = file.read()
                
            with open(os.path.join(directory + '/{}/'.format(study), filename), 'r') as file:
                content = file.read()
                prompt += "\n\n" + content.strip() + "\n\n"

            with open('./prompt_templates/{}'.format(task_type) + '_end.txt', 'r') as file:
                content = file.read()
                prompt += content.strip()

            messages = [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": prompt}
            ]
    
            attempt = 0
            while attempt < max_retries:
                
                try:
                    completion = openai.ChatCompletion.create(
                        model=model,
                        temperature=temp,
                        seed = 42,
                        messages=messages
                    )
                    messages.append({"role": "assistant", "content": completion.choices[0].message.content})
                    print(completion.choices[0].message.content)

                    with open(response_file, 'w') as file:
                        for message in messages:
                            for key, value in message.items():
                                file.write(f"{key}: {value}\n")
                            file.write('-' * 40)
                            file.write('\n')
                    print('write complete')
                    break  

                except Exception as e:
                    print(f"Error encountered: {e}. Retrying...")
                    attempt += 1
                    time.sleep(60)

            if attempt == max_retries:
                print(f"Failed to process {response_file} after {max_retries} attempts.")
                return "failed"



dir_path = './data/example_pairs_to_rank'
i = 0
for study in studies:
    for filename in os.listdir(dir_path + '/{}'.format(study)):
        if filename.endswith(".txt"):
            file_path = os.path.join(dir_path + '/{}/'.format(study), filename)
            response = generate_responses(dir_path, study, filename)
            if response == "failed":
                time.sleep(60)
                response = generate_responses(dir_path, study, filename)
            print(i)
            i+=1
            
        
            
    


./gpt_responses/kim2018_responses/gpt-4-0125-preview_0/419_pos_first_gpt-4-0125-preview_0.txt exists!!!

0
./gpt_responses/kim2018_responses/gpt-4-0125-preview_0/549_pos_first_gpt-4-0125-preview_0.txt exists!!!

1
./gpt_responses/kim2018_responses/gpt-4-0125-preview_0/926_pos_first_gpt-4-0125-preview_0.txt exists!!!

2
./gpt_responses/kim2018_responses/gpt-4-0125-preview_0/876_pos_first_gpt-4-0125-preview_0.txt exists!!!

3
./gpt_responses/kim2018_responses/gpt-4-0125-preview_0/1081_pos_first_gpt-4-0125-preview_0.txt exists!!!

4
./gpt_responses/kim2018_responses/gpt-4-0125-preview_0/260_pos_first_gpt-4-0125-preview_0.txt exists!!!

5
./gpt_responses/kim2018_responses/gpt-4-0125-preview_0/330_pos_first_gpt-4-0125-preview_0.txt exists!!!

6
./gpt_responses/kim2018_responses/gpt-4-0125-preview_0/715_pos_first_gpt-4-0125-preview_0.txt exists!!!

7
./gpt_responses/kim2018_responses/gpt-4-0125-preview_0/27_pos_first_gpt-4-0125-preview_0.txt exists!!!

8
./gpt_responses/kim2018_responses/gpt

```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses the x-axis for the categorical variable and size for one of the quantitative variables, which aligns better with human perceptual abilities for summary tasks involving comparison and aggregation.",
  "confidence": 4
}
```
write complete
1514
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 319_neg_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses the x-axis for q1 and size for q2, which aligns better with how people perceive quantitative differences, making it easier to compare and summarize.",
  "confidence": 4
}
```
write complete
1515
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 781_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses faceting by the categorical variable 'n', which reduces clutter and makes it easier to compar

```json
{
  "chart": "chart 2",
  "reason": "Using color to encode q1 allows for quicker comparison across categories in n than varying point sizes, as color perception is generally more immediate and less cognitively taxing than accurately interpreting size differences.",
  "confidence": 4
}
```
write complete
1534
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 312_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses size and position on a common scale (y-axis for categories and x-axis for one of the quantitative variables), which are more effective for comparing aggregate properties than color encoding used in Chart 2.",
  "confidence": 4
}
```
write complete
1535
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 737_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses both x and y axes

```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses the x-axis to directly represent the categorical variable and size encoding for one of the quantitative variables, making comparisons and aggregations more intuitive and less cluttered than the faceted approach in Chart 1.",
  "confidence": 4
}
```
write complete
1554
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 37_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses color encoding for q2, which has low entropy, making it easier to compare and summarize data points based on q1 values along the y-axis.",
  "confidence": 4
}
```
write complete
1555
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 655_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses faceting by the categorical variable 'n' and directly plots 'q1' and 'q2' on th

```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses the x-axis to directly represent the categorical variable and size for one of the quantitative variables, making comparisons and aggregations more intuitive than overlaying all variables in a scatter plot as in Chart 1.",
  "confidence": 4
}
```
write complete
1575
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 712_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses the y-axis for q1, which is directly involved in the value task, making it easier for participants to read and compare values.",
  "confidence": 4
}
```
write complete
1576
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 642_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses color to encode the categorical variable and positions the two quantitative variables along

```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses spatial position on common scales (x and y axes) for both quantitative variables, which is more effective for accurate and quick value tasks compared to using color as a quantitative scale in Chart 2.",
  "confidence": 5
}
```
write complete
1595
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 371_pos_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Using color to encode q1 allows for quicker comparison and identification of aggregate properties across categories in n than size encoding, as color is more directly interpretable in this context.",
  "confidence": 4
}
```
write complete
1596
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 458_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses color encoding for q1, which is low entropy and inte

```json
{
  "chart": "chart 2",
  "reason": "Encoding quantitative data (q1) on the y-axis and using size for the second quantitative variable (q2) allows for easier and more accurate value retrieval compared to using color for q1 as in chart 1.",
  "confidence": 4
}
```
write complete
1615
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 1058_neg_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses color encoding for q1, which has low entropy, making it easier to visually compare and summarize the data related to q1 across categories in n.",
  "confidence": 4
}
```
write complete
1616
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 1108_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses the x-axis to represent the categorical variable and size for one of the quantitative variables, which s

```json
{
  "chart": "chart 1",
  "reason": "Using size to encode q1 allows for easier and more accurate value comparison and retrieval than using color, especially for quantitative data.",
  "confidence": 4
}
```
write complete
1635
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 103_pos_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses faceting by the categorical variable 'n', which simplifies comparison and aggregation tasks across categories by providing separate, parallel coordinate plots for each category.",
  "confidence": 4
}
```
write complete
1636
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 476_pos_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses color to represent q2 and positions categories of n along the y-axis, which facilitates easier comparison of aggregate properties 

```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses faceting by the categorical variable 'n' and directly plots 'q1' and 'q2' on the x and y axes, making it easier to focus on and compare individual values for 'q1' and 'q2'.",
  "confidence": 4
}
```
write complete
1656
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 1122_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses color to distinguish between categories and places both quantitative variables on axes, making it easier to read and compare values.",
  "confidence": 4
}
```
write complete
1657
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 1072_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses size encoding for q2 which is more intuitive for value tasks compared to color encoding in Chart 1, making it easier to read and c

```json
{
  "chart": "chart 1",
  "reason": "Using size to encode q1 allows for easier and more accurate comparison and retrieval of values than using color, especially for quantitative data.",
  "confidence": 4
}
```
write complete
1676
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 298_pos_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses faceting by the categorical variable 'n', which reduces clutter and makes it easier to focus on and compare the values of 'q1' and 'q2' for specific categories.",
  "confidence": 4
}
```
write complete
1677
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 194_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses the y-axis for q1, which is directly involved in the value task, making it easier for participants to read and compare values.",
  "confidence"

```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses size encoding for one quantitative variable and a linear y-axis for the other, which are both more effective for accurate value reading compared to color encoding used in Chart 1.",
  "confidence": 4
}
```
write complete
1696
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 50_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses size encoding for q2 and x-axis encoding for q1, which are more intuitive for value tasks involving direct comparison or retrieval of quantitative values.",
  "confidence": 4
}
```
write complete
1697
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 632_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses the x-axis for the categorical variable and color for one of the quantitative variables, which simplifie

```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses the y-axis for q1 and color for q2, which aligns better with how people typically interpret quantitative data and color encoding, making it easier for summary tasks involving q1.",
  "confidence": 4
}
```
write complete
1716
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 594_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses faceting by the categorical variable 'n', which reduces clutter and makes it easier to focus on and compare the values of 'q1' and 'q2' for each category.",
  "confidence": 4
}
```
write complete
1717
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 625_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses size encoding for q1 and positions categories of n along the x-axis, making it easier to compare aggrega

```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses faceting by the categorical variable 'n' which allows for easier comparison and summary of 'q1' across categories.",
  "confidence": 4
}
```
write complete
1736
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 68_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses the y-axis for q1 and the x-axis for the categorical variable n, which aligns with conventional practices for easier reading and comparison of quantitative values.",
  "confidence": 5
}
```
write complete
1737
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 884_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses color to encode the categorical variable and positions on both axes to encode quantitative variables, which is more effective for value tasks than using size an

```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses the x-axis for q1 and size for q2, which aligns better with how people perceive quantitative differences, making it easier to compare averages and maximum values.",
  "confidence": 4
}
```
write complete
1756
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 189_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses color to encode q2 and position along the x-axis to encode q1, which are more effective for precise value tasks compared to the size encoding used for q1 in Chart 2.",
  "confidence": 4
}
```
write complete
1757
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 299_neg_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses color encoding for q2 and places q1 on the y-axis, making it easier to read and compare values for q1, which 

```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses the x-axis for the categorical variable and the y-axis for one of the quantitative variables, which aligns with conventional reading patterns and supports easier comparison of aggregate properties.",
  "confidence": 4
}
```
write complete
1776
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 800_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses color to encode the categorical variable and positions on both axes for the quantitative variables, making it easier to read and compare values directly.",
  "confidence": 4
}
```
write complete
1777
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 216_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses spatial position on common scales (x and y axes) for both quantitative variables, whic

```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses the x-axis for the categorical variable and the y-axis for a quantitative variable, which aligns with conventional reading patterns and aids in quicker and more accurate value retrieval.",
  "confidence": 4
}
```
write complete
1796
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 589_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses the x and y axes for the quantitative variables, which is more intuitive for comparing numerical values, while using color to distinguish categories, making it easier to perform value tasks.",
  "confidence": 4
}
```
write complete
1797
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 685_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses size encoding for q2 which is more effective for summary tas

```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses the x-axis for the quantitative variable q1, which is directly involved in the value task, making it easier for participants to read and compare values.",
  "confidence": 4
}
```
write complete
1817
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 1130_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses spatial position on common scales (x and y axes) for both quantitative variables, which is more effective for accurate and quick value tasks.",
  "confidence": 5
}
```
write complete
1818
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 1060_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses color to differentiate categories in 'n' and spatial position (x and y axes) for the quantitative variables 'q1' and 'q2', which aligns with 

```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses both x and y axes for quantitative variables, making it easier to compare values directly and accurately.",
  "confidence": 4
}
```
write complete
1838
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 681_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses size encoding for q2 and positions q1 along the x-axis, which facilitates easier comparison of aggregate properties across categories in n.",
  "confidence": 4
}
```
write complete
1839
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 530_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses the x and y axes for the two quantitative variables, making it easier to compare their values directly.",
  "confidence": 4
}
```
write complete
1840
******************************************

```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses spatial position on common scales (x and y axes) for both quantitative variables, which is more effective for value tasks according to graphical perception research.",
  "confidence": 5
}
```
write complete
1859
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 637_pos_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses color encoding for the categorical variable, allowing for easier comparison and summary across categories without the need to navigate multiple facets.",
  "confidence": 4
}
```
write complete
1860
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 55_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses spatial position on common scales (x and y axes) for both quantitative variables, which is more effective for accurate an

```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses the x-axis for the categorical variable and color for one of the quantitative variables, which aligns better with human perceptual abilities for summarizing and comparing aggregate properties.",
  "confidence": 4
}
```
write complete
1880
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 1008_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses faceting by the categorical variable 'n' and linear scales for both quantitative variables, which simplifies comparison and identification tasks across categories.",
  "confidence": 4
}
```
write complete
1881
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 158_neg_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses color encoding for q1 and positions q2 on the y-axis, which aligns better with h

```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses spatial position on both x and y axes for quantitative variables, which is more effective for value tasks than using color for one of the quantitative variables as in Chart 2.",
  "confidence": 5
}
```
write complete
1900
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 855_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses color to represent the categorical variable and positions both quantitative variables along the x and y axes, making it easier to read and compare values directly.",
  "confidence": 4
}
```
write complete
1901
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 905_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses color to differentiate categories in 'n', allowing for easier comparison and summary across the tw

```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses a Cartesian coordinate system with linear scales and points to directly encode q1 and q2, making it easier to read and compare specific values.",
  "confidence": 5
}
```
write complete
1920
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 1090_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses the x-axis for q1, which is directly involved in the value task, making it easier for participants to read and compare values quickly and accurately.",
  "confidence": 4
}
```
write complete
1921
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 654_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses spatial position on common scales (x and y axes) for both quantitative variables, which is more effective for accurate and quick value tasks co

```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses faceting by the categorical variable 'n' and linear scales for both 'q1' and 'q2', making it easier to compare aggregate properties across categories.",
  "confidence": 4
}
```
write complete
1940
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 397_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses the y-axis to represent the categorical variable and size for one of the quantitative variables, which simplifies comparison and identification tasks.",
  "confidence": 4
}
```
write complete
1941
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 483_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses faceting by the categorical variable 'n', which simplifies comparison and aggregation tasks across categories by providing separate, par

```json
{
  "chart": "chart 1",
  "reason": "Chart 1 maps the variable of interest (q1) to the x-axis, which is more intuitive for reading specific values compared to using color as in Chart 2.",
  "confidence": 4
}
```
write complete
1960
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 739_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses size to represent q1 and positions q2 along the y-axis, which are more effective for comparing aggregate properties than using color to represent a quantitative variable as in Chart 2.",
  "confidence": 4
}
```
write complete
1961
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 669_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses color to encode the categorical variable and positions on both axes for the quantitative variables, making it easier 

```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses color to represent one quantitative variable and the y-axis for the categorical variable, making it easier to compare categories and see trends or outliers in the data.",
  "confidence": 4
}
```
write complete
1981
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 985_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses the y-axis for q1, which is directly involved in the value task, making it easier for participants to read and compare values.",
  "confidence": 4
}
```
write complete
1982
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 39_neg_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses color to encode the categorical variable and positions on both axes for quantitative variables, which is more effective for comparing aggregate

```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses spatial position on both x and y axes for quantitative variables, which is more effective for value tasks than using color for one of the quantitative variables as in Chart 2.",
  "confidence": 5
}
```
write complete
2001
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 933_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses a Cartesian coordinate system with points directly encoding q1 and q2 values on the x and y axes, making it easier to read and compare these values.",
  "confidence": 5
}
```
write complete
2002
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 863_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses color to differentiate categories and positions on both axes to represent quantitative values, which is more effe

```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses the x and y axes for the quantitative variables, which is more intuitive for reading and comparing numerical values.",
  "confidence": 5
}
```
write complete
2022
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 856_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses faceting by the categorical variable 'n' and direct encoding of 'q1' and 'q2' on the x and y axes, respectively, which simplifies comparison and value retrieval tasks.",
  "confidence": 4
}
```
write complete
2023
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 906_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses faceting by the categorical variable 'n', which simplifies comparison and summary tasks across categories.",
  "confidence": 4
}
```
write complete
2024


```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses the x-axis for the categorical variable and the y-axis for a quantitative variable, which aligns with conventional reading patterns and aids in quicker and more accurate value retrieval.",
  "confidence": 4
}
```
write complete
2042
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 307_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses the x-axis for the categorical variable and the y-axis for one of the quantitative variables, which aligns with conventional reading patterns and supports easier comparison of categories.",
  "confidence": 4
}
```
write complete
2043
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 257_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses faceting by the categorical variable 'n', which simplifies comp

```json
{
  "chart": "chart 1",
  "reason": "Chart 1 maps q1 to the y-axis and uses color for q2, making it easier to read exact values for q1, which is involved in the value task.",
  "confidence": 4
}
```
write complete
2063
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 335_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses the x-axis for the categorical variable and color for one of the quantitative variables, which aligns better with best practices for encoding and comparing categories and their associated numerical values.",
  "confidence": 4
}
```
write complete
2064
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 265_pos_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses faceting by the categorical variable 'n' and linear scales for both quantitative variables, which simplifies 

```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses color to represent the categorical variable and positions both quantitative variables along the x and y axes, making it easier to read and compare values directly.",
  "confidence": 4
}
```
write complete
2083
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 162_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses size and position (x and y axes) to represent the quantitative variables and an ordinal scale for the categorical variable, which aligns better with human perceptual abilities for summary tasks.",
  "confidence": 4
}
```
write complete
2084
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 878_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses the y-axis for q1, which is directly involved in the value task, making it easi

```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses size to represent q2 and positions q1 along the x-axis, which are more intuitive and direct ways to compare and read values than using color as in Chart 2.",
  "confidence": 4
}
```
write complete
2104
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 1004_pos_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses color encoding for the categorical variable, allowing for easier comparison and summary across categories without the need to switch between different facets or plots.",
  "confidence": 4
}
```
write complete
2105
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 421_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses size to represent q2 and position (y-axis) for q1, which aligns better with human perceptual abilities for accura

```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses both x and y axes for quantitative variables, making it easier to compare values directly and accurately.",
  "confidence": 4
}
```
write complete
2124
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 166_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses the y-axis to represent the categorical variable and size for one of the quantitative variables, which simplifies comparison of categories and understanding of aggregate properties.",
  "confidence": 4
}
```
write complete
2125
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 387_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses color to represent one quantitative variable and positions along the y-axis for the categorical variable, which generally allows for easier comparison

```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses color to encode q2 and position (y-axis) for q1, which are more perceptually effective for value tasks than the size encoding used for q1 in Chart 2.",
  "confidence": 4
}
```
write complete
2144
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 927_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses size encoding for q2 and y-axis encoding for q1, which are more effective for accurate and quick value tasks compared to color encoding used for q1 in Chart 2.",
  "confidence": 4
}
```
write complete
2145
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 877_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses the y-axis for a quantitative variable (q1) and color for another quantitative variable (q2), which aligns better with how peopl

```json
{
  "chart": "chart 1",
  "reason": "Using size to encode q1 allows for easier comparison of aggregate properties than color encoding, as size is a more effective visual variable for quantitative comparison.",
  "confidence": 4
}
```
write complete
2164
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 286_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses size to encode q2 and position (y-axis) for q1, which is more effective for precise value tasks compared to using color for q1 as in Chart 2.",
  "confidence": 4
}
```
write complete
2165
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 137_pos_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Color encoding in chart 2 is generally more effective for value tasks involving comparison or retrieval than size encoding used in chart 1, as it allow

```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses size encoding for q1 and positions q2 along the y-axis and categories (n) along the x-axis, making it easier to compare aggregate properties across categories.",
  "confidence": 4
}
```
write complete
2184
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 745_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses size encoding for one quantitative variable and positions both the categorical and the other quantitative variable along axes, which typically allows for easier comparison and summary of aggregate properties.",
  "confidence": 4
}
```
write complete
2185
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 615_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses color encoding for the categorical variable, which facilitates quicke

```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses the x-axis for the categorical variable and color for one of the quantitative variables, making it easier to perform summary tasks involving comparisons across categories.",
  "confidence": 4
}
```
write complete
2204
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 202_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses the y-axis to encode q1, which is directly involved in the value task, making it easier for participants to read and compare values.",
  "confidence": 4
}
```
write complete
2205
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 352_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses the y-axis for q1, which is directly involved in the value task, making it easier for participants to read and compare values.",
  "c

```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses color to differentiate categories in 'n' and positions on both axes to represent 'q1' and 'q2', which is more effective for comparing aggregate properties than using size and potentially confusing ordinal positioning for quantitative data.",
  "confidence": 4
}
```
write complete
2224
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 470_neg_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses color to encode the categorical variable and positions on both axes for the quantitative variables, making it easier to compare and identify aggregate properties.",
  "confidence": 4
}
```
write complete
2225
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 520_neg_first.txt
```json
{
  "chart": "chart 2",
  "reason": "Chart 2 uses faceting by the categorical variabl

```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses color to encode q2 and positions q1 along the x-axis, which is more effective for precise value tasks compared to using size as in Chart 2.",
  "confidence": 4
}
```
write complete
2244
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 124_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses size encoding for q1 and positions q2 along the y-axis, making it easier to compare aggregate properties of q1 and q2 across categories in n.",
  "confidence": 4
}
```
write complete
2245
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 1124_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses the y-axis for q1 and color for q2, which aligns better with how people interpret quantitative values and differences, making it easier to perform value ta

```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses the x-axis for the categorical variable and color for one of the quantitative variables, which is more effective for comparing aggregates across categories.",
  "confidence": 4
}
```
write complete
2265
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 373_neg_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses color to encode the categorical variable and spatial position for the two quantitative variables, which aligns with best practices for encoding and comparing quantitative data.",
  "confidence": 4
}
```
write complete
2266
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 888_neg_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses faceting by the categorical variable 'n' and directly plots 'q1' and 'q2' on the Cartesian coordinates, 

```json
{
  "chart": "chart 1",
  "reason": "Faceting by the categorical variable 'n' in Chart 1 allows for easier comparison and summary of 'q1' across categories.",
  "confidence": 4
}
```
write complete
2285
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 764_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses size encoding for q1 and positions q2 along the y-axis, making it easier to compare aggregate properties of q1 across categories in n than Chart 2, which uses color encoding for categories.",
  "confidence": 4
}
```
write complete
2286
****************************************************************************************************
gpt-4-0125-preview 0 kim2018 341_pos_first.txt
```json
{
  "chart": "chart 1",
  "reason": "Chart 1 uses spatial position on common scales (x and y axes) for both quantitative variables, which is more effective for accurate and quick value tasks.

In [2]:
import json

def extract_json_from_file(file_path):
    with open(file_path, 'r') as file:
        text = file.read()

    # Find the position of the last 'content:'
    last_content_pos = text.rfind('content:')
    if last_content_pos == -1:
        print("No 'content:' found in the file.")
        return None

    # Find the position of the first '{' after the last 'content:'
    first_brace_pos = text.find('{', last_content_pos)

    if first_brace_pos == -1:
        print("No '{' found after the last 'content:'.")
        return None

    # Find the position of the last '}' in the file
    last_brace_pos = text.rfind('}')
    
    if last_brace_pos == -1:
        print("No closing '}' found in the file.")
        return None

    # Extract the JSON string
    json_str = text[first_brace_pos:last_brace_pos + 1]
    # Parse the JSON string into a Python object
    try:
        json_obj = json.loads(json_str)
        return json_obj
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        return None

    
    
def read_json_files_from_directory(directory_path, study, model, temp):
    # Dictionary to store the contents of each JSON file
    rankings = {}

    # List all files in the directory
    for filename in os.listdir(directory_path + f'/{study}_responses/{model}_{temp}'):
        if filename.endswith(".txt") and f'{model}_{temp}' in filename:
            file_path = os.path.join(directory_path + f'/{study}_responses/{model}_{temp}', filename)
            with open(file_path, 'r') as file:
                try:
                    rankings[filename + f'_{study}'] = extract_json_from_file(file_path)
                except json.JSONDecodeError:
                    print(f"Error reading {filename}")
    
    return rankings

In [3]:
def filter_results(res):
    # Initialize a dictionary to group items by their number
    grouped_items = {}
    
    # Iterate through the keys in the JSON object
    for key in res:
        # Extract the first number from the key
        number = int(key.split('_')[0])
        
        # Group items with the same number
        if number in grouped_items:
            grouped_items[number].append(key)
        else:
            grouped_items[number] = [key]
    #print(grouped_items)
    # Initialize the output list with NaNs
    output = [None] * (len(res) // 2)
    
    # Process each group
    for number, keys in grouped_items.items():
        #print(number, keys)
        # Check if the 'chart' fields are the same (case-insensitive)
        if res[keys[0]]['chart'].lower() == res[keys[1]]['chart'].lower():
            # If they are the same, place NaN in the output list
            output[number - 1] = "conflicting"
            conflicting_choices.append(res[keys[0]]['chart'].lower())
        else:
            # Otherwise, place the 'chart' value of the item with 'pos' in its key
            pos_key = next(key for key in keys if 'pos' in key)
            output[number - 1] = res[pos_key]['chart'].lower()

    return output



In [5]:
import pickle
def dump_data(model, temp, res_kim):
    with open(f'./gpt_responses/kim2018_{model}_{temp}_ranking.pkl', 'wb') as file:
        pickle.dump(filter_results(res_kim), file)

In [12]:
#gpt-4-0125
dir_path = './gpt_responses'
res_kim = read_json_files_from_directory(dir_path, 'kim2018', 'gpt-4-0125-preview', 0)  
conflicting_choices = []
output = filter_results(res_kim)
print(len(output))

cnt = 0
for item in conflicting_choices:
    if item == "chart 1":
        cnt += 1
print(cnt / len(conflicting_choices))
print(cnt, len(conflicting_choices))


dump_data('gpt-4-0125-preview', 0, res_kim)

1152
0.37593984962406013
100 266


In [7]:
#gpt-4
dir_path = './gpt_responses'
res_kim = read_json_files_from_directory(dir_path, 'kim2018', 'gpt-4-0613', 0)  
# print(res_kim)
conflicting_choices = []
output = filter_results(res_kim)


cnt = 0
for item in conflicting_choices:
    if item == "chart 1":
        cnt += 1
print(cnt / len(conflicting_choices))
print(cnt, len(conflicting_choices))


dump_data('gpt-4-0613', 0, res_kim)

0.9270516717325228
305 329


In [8]:
#gpt-3.5-turbo
dir_path = './gpt_responses'
res_kim = read_json_files_from_directory(dir_path, 'kim2018', 'gpt-3.5-turbo-0125', 0)  
# print(res_kim)
conflicting_choices = []
output = filter_results(res_kim)

cnt = 0
for item in conflicting_choices:
    if item == "chart 1":
        cnt += 1
print(cnt / len(conflicting_choices))
print(cnt, len(conflicting_choices))


dump_data('gpt-3.5-turbo-0125', 0, res_kim)

0.9652694610778443
806 835
