In [None]:
import openai
import pandas as pd
from PIL import Image
from io import BytesIO
import base64
import glob
import json

import matplotlib.image as mpimg
import matplotlib.pyplot as plt

In [None]:
# Initialize OpenAI API key
openai.api_key = 'YOUR KEY'

# Sample Data for Text and Image Labeling
text_data = [
    "The big dog is playing in the park.",
    "My chef is making instant pork ramen for me in the kitchen.",
    "The frat brothers are in the animal house.",
]

In [None]:
prompt = """
Please identify if there are animals in the sentence. 
Respond with 'True' or 'False', with no extra information.
"""

# Function to Label Text Using ChatGPT (Zero-Shot)
def label_text(text):
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": prompt},
            {"role": "user", "content": f"Please label the following text: '{text}'"}
        ]
    )
    label = response['choices'][0]['message']['content']
    return label.strip()

In [None]:
label_text(text_data[4])

### Labeling images

In [None]:
# Downsample and encode
def encode_image_base64(img):
    with open(img, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

In [None]:
# Function to read, resize, and encode an image in base64
def process_image(image_path, new_size=(128, 128)):
    with Image.open(image_path) as img:
        img = img.resize(new_size)
        buffered = BytesIO()
        img.save(buffered, format="JPEG") 
        img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
    return img_base64

In [None]:
img     = "data/Keggy_the_Keg.jpeg"
encoded = []
for f in glob.glob("data/*.jpg"):
    encoded.append(process_image(f))

In [None]:
img_jpg = mpimg.imread(img)
plt.imshow(img_jpg)
# plt.axis('off')  # Hide axes for a cleaner look
# plt.show()

In [None]:
glob.glob("data/*.jpg")

In [None]:
## Define these upfront
prompt = """
        This is an image that I want to upload. Describe what is in it.
            Please also provide the probability that the image contains a keg. 
            Please also provide the probability that the image contains a human. 
            What is the dominant color?
            Please separate the response for each image with '$'.
"""

# Create the prompt messages
messages = [
    {
        "role": "user",
        "content": [
            (prompt),
            *map(lambda x: {"image": x, "resize": 768}, encoded),
        ],
    },
]

# Define the parameters for the API call
params = {
    "model": "gpt-4-turbo",
    "messages": messages,
    "max_tokens": 1500,
}

response = openai.ChatCompletion.create(**params)
label = response.choices[0].message.content

## JSON Processing for labeling

In [None]:
label_system_prompt = '''
As a political researcher analyzing U.S. elections, your goal is to evaluate the political standpoint expressed in a provided post. 
Each indicator should represent whether the post supports or opposes various political figures or parties.
Based on the post's content, you will need to output a JSON object containing various binary indicators (0 or 1) reflecting specific conditions:

{
    pro_democrat: int, // Set to 1 if the post supports the Democratic party, otherwise 0,
    against_democrat: int, // Set to 1 if the post opposes the Democratic party, otherwise 0,
    pro_republican: int, // Set to 1 if the post supports the Republican party, otherwise 0,
    against_republican: int, // Set to 1 if the post opposes the Republican party, otherwise 0,
    pro_biden: int, // Set to 1 if the post supports Joe Biden, otherwise 0,
    against_biden: int, // Set to 1 if the post opposes Joe Biden, otherwise 0,
    pro_trump: int, // Set to 1 if the post supports Donald Trump, otherwise 0,
    against_trump: int, // Set to 1 if the post opposes Donald Trump, otherwise 0,
    pro_kamala: int, // Set to 1 if the post supports Kamala Harris, otherwise 0,
    against_kamala: int // Set to 1 if the post opposes Kamala Harris, otherwise 0
}
'''

In [None]:
# Function to Label Text Using ChatGPT (Zero-Shot)
def label_text_json(text):
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": label_system_prompt},
            {"role": "user", "content": f"Please label the following text: '{text}'"}
        ]
    )
    label = response['choices'][0]['message']['content']
    return label.split("\n\nJSON object:\n")[-1]

In [None]:
text_data = [
    "The big dog is playing in the park.",
    "My chef is making instant pork ramen for me in the kitchen.",
    "The frat brothers are in the animal house.",
    "Kamala Harris is great and in a two-way horse race.",
    "Trump is lovely and said immigrants eat pets."
]

In [None]:
label = label_text_json(text_data[2])

In [None]:
json.loads(label)

## Your turn

Download 10 images of your choice, into a folder in the same directory as your notebook. The goal will be to combine structured JSON format with image labeling. 
* After downloading the images, use glob.glob to load your images into a list or dataframe.
* Decide on three to five things you want to label, then structure an image prompt using JSON format
* Test the prompt on one image. Your call should return a json, which you should convert into a dictionary.
* Iterate through all images, then save your results into a dataframe. Your columns should be the image_path and then the labels you imposed.

Check to see how much this costs on your interface.

# Extend to your final project

Once you are comfortable with the baby example, try out your labeling process for your final projects. I suggest trying it on 1000 rows as practice and to gauge the damage to your wallet.