In [None]:
# import necessary packages
import openai
import os
import pandas as pd
import dotenv
import base64
import time
import sys
# load open-api key
dotenv.load_dotenv()

# define utility functions
def encode_image_to_base64(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')
    
# define chat completion function
client = openai.OpenAI()

# define chat completion function
def complete_chat(prompt, base64_image):
    global client
    try:
        response = client.chat.completions.create(
            model="gpt-4-vision-preview",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                        "type": "text",
                        "text": prompt,
                        },
                        {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}",
                            "detail": "low"
                        }
                        }
                    ],
                }
            ],
            temperature=0.7,
            max_tokens=1,
        )
        response_content = response.choices[0].message.content
        return (int(response_content), None) if response_content.isdigit() and 1 <= int(response_content) <= 7 else (None, None)
    except Exception as e:
        print(f"Error: {e}")
        print(sys.exc_info())
        return True, e
    
# Define a function to process a single image
def process_image(text_prompt, base64_image, image_race, asking_emotion, image_emotion, rater_race):
    response, e = complete_chat(text_prompt, base64_image)
    return {"image_race": image_race, "asking_emotion": asking_emotion, "image_emotion": image_emotion,"rater_race": rater_race, "response": response}, e

In [None]:
# define prompt template and api completion parameters
prompt="""Imagine you are a{race} person. Your life experiences and background have been largely shaped by your being{race}. As a{race} person you feel your perception of yourself and others is unique. Today you have signed up for a survey to help researchers understand how people perceive emotions in images. 

Researcher: Please look at the image provided. We are interested in understanding how well this image represents the emotion of {emotion}. Using the scale provided below, please indicate your level of agreement with the following statement:
"This image accurately represents the emotion of {emotion}."

1. Strongly Disagree
2. Disagree
3. Somewhat Disagree
4. Neutral
5. Somewhat Agree
6. Agree
7. Strongly Agree

Please select the number that best reflects your perception of the image. Respond with the number rating only and nothing else.

{race} Person:
"""

In [None]:

results = []
try:
        df = pd.read_csv("rawdata/results.csv").fillna("")
        results = df.to_dict(orient="records")
        done = df.groupby(["image_race", "asking_emotion", "rater_race", "image_emotion"]).count().reset_index()
except:
       done = pd.DataFrame(columns=["image_race", "asking_emotion", "rater_race", "image_emotion", "response"])
       pass

# define list of rater races
rater_races = [" white", " black", ""]

# define rate limit tracking variables
tokens = 0
time_start = time.time()
# define list of images
images = os.listdir("images")

for image in images:
    for rater_race in rater_races:
        # getting data necessary for gathering
        image_emotion, asking_emotion, image_race = image.split(".")[0].split("_")
        image_path = f"images/{image}"
        base64_image = encode_image_to_base64(image_path)
        text_prompt = prompt.format(race=rater_race, emotion=asking_emotion)
        # checking if image has been rated
        curdone = done[(done["image_race"] == image_race) & (done["asking_emotion"] == asking_emotion) & (done["rater_race"] == rater_race) & (done["image_emotion"] == image_emotion)].reset_index(drop=True)
        if len(curdone) > 0:
                curdone = curdone.loc[0, "response"]
                num_trials = 100 - curdone
        else:
                num_trials = 100
        print("\n\n")
        print(f"Num Trials: {num_trials} left for asking emotion {asking_emotion}, image emotion {image_emotion}, image race {image_race}, and Rater Race {rater_race}. Image path is {image_path}")
        print("\n\n")
        #print(text_prompt)
        # running trials
        for trial_num in range(num_trials):
            # making sure responses are valid
                valid = False
                while not valid:
                        # check rate limit
                        if tokens + 289 > 10000:
                                time_end = time.time()
                                time_elapsed = time_end - time_start
                                if time_elapsed < 60:
                                        time.sleep(60 - time_elapsed)
                                tokens = 0
                                time_start = time.time()
                        
                        # fetch response
                        response, e = process_image(text_prompt, base64_image, image_race, asking_emotion, image_emotion, rater_race)
                        tokens += 289
                        print(response)
                        # validating response
                        if (response["response"] is not None and response["response"] != True) or response["response"] == 1:
                                valid = True
                                results.append(response)
                                tokens += 289
                        elif response["response"] == True:
                                print(f"Error: {e}")
                                sys.exit(5)
                
                # checkpoint df
                df = pd.DataFrame(results)
                df.to_csv("rawdata/ckpt.csv", index=False)




In [None]:
# save results to csv with double check for fallback
df = pd.read_csv("rawdata/ckpt.csv").fillna("")
try:
        old_df = pd.read_csv("rawdata/results.csv").fillna("")
except:
        old_df = pd.DataFrame(columns=["image_race", "asking_emotion", "image_emotion", "rater_race", "response"])
print("New DF")
display(df.groupby(["image_race", "asking_emotion", "image_emotion", "rater_race"]).count().reset_index())
print("Old DF")
display(old_df.groupby(["image_race", "asking_emotion", "image_emotion", "rater_race"]).count().reset_index())
if input("Would you like to overwrite: ") == "y":
        df.to_csv("rawdata/results.csv", index=False)
        print("Overwritten")
        os.remove("rawdata/ckpt.csv")

# Reformatting data for Analysis

In [None]:
df = pd.read_csv("rawdata/results.csv").fillna("")
analysis_data = pd.DataFrame()
# converting races to dummies 
analysis_data["WB"] = df["image_race"].apply(lambda x: int(x == "W")) * df["rater_race"].apply(lambda x: int(x == " black"))
analysis_data["WW"] = df["image_race"].apply(lambda x: int(x == "W")) * df["rater_race"].apply(lambda x: int(x == " white"))
analysis_data["BB"] = df["image_race"].apply(lambda x: int(x == "B")) * df["rater_race"].apply(lambda x: int(x == " black"))
analysis_data["BW"] = df["image_race"].apply(lambda x: int(x == "B")) * df["rater_race"].apply(lambda x: int(x == " white"))
analysis_data["WD"] = df["image_race"].apply(lambda x: int(x == "W")) * df["rater_race"].apply(lambda x: int(x == ""))
analysis_data["BD"] = df["image_race"].apply(lambda x: int(x == "B")) * df["rater_race"].apply(lambda x: int(x == ""))
# add emotional congruence
analysis_data["EC"] = df["image_emotion"] == df["asking_emotion"]
# adding response
analysis_data["rating"] = df["response"]

# adding emotion for potential categorical analysis
analysis_data["image_emotion"] = df["image_emotion"]

# Create a group key based on 'image_emotion' and 'EC', then assign unique sequential numbers within each group
analysis_data['group_key'] = analysis_data.groupby(['image_emotion', 'WB', 'WW', 'BB', 'BW', 'WD', 'BD', 'EC']).cumcount() + 1

# Split the dataframe into congruent and incongruent based on 'EC'
congruent_df = analysis_data[analysis_data['EC'] == 1].copy()
incongruent_df = analysis_data[analysis_data['EC'] == 0].copy()

# Keep relevant columns for the merge
congruent_df = congruent_df[['group_key', 'image_emotion', 'WB', 'WW', 'BB', 'BW', 'WD', 'BD', 'rating']]
incongruent_df = incongruent_df[['group_key', 'image_emotion', 'WB', 'WW', 'BB', 'BW', 'WD', 'BD', 'rating']]

# Merge the two dataframes on 'group_key' and 'image_emotion'
analysis_data = pd.merge(congruent_df, incongruent_df, on=['group_key', 'image_emotion', 'WB', 'WW', 'BB', 'BW', 'WD', 'BD'], suffixes=('_congruent', '_incongruent')).drop(columns=['group_key'])

# add emotion dummies
analysis_data = pd.get_dummies(analysis_data, columns=["image_emotion"])
# cast to int
analysis_data = analysis_data.astype(int)
display(analysis_data)

In [None]:
# define variable labels
variable_labels = {
    "WB": "Image is white, rater is black",
    "WW": "Image is white, rater is white",
    "BB": "Image is black, rater is black",
    "BW": "Image is black, rater is white",
    "WD": "Image is white, rater is not specified",
    "BD": "Image is black, rater is not specified",
    "rating_congruent": "Rating for congruent ask and image",
    "rating_incongruent": "Rating for incongruent ask and image",
    "image_emotion_angry": "Image is of anger",
    "image_emotion_happy": "Image is of happy",
    "image_emotion_sad": "Image is of sadness",
}

# save data for stata
analysis_data.to_stata("rawdata/analysis_data.dta", variable_labels=variable_labels, write_index=False)