In [3]:
import openai

In [4]:
from openai import OpenAI

In [5]:
client = OpenAI(api_key="sk-OPEN_AI_API_KEY")

# Image-to-text pre-trained model

In [13]:
'''
This code uses the OpenAI client object to make a chat completion request using the GPT-4 Turbo model.
It sends a user message asking "What’s in this image?" along with an image URL. The response from the
model is stored in the response variable. The code then prints the assistant's response, which can be 
accessed using response.choices[0].
''' 
response = client.chat.completions.create(
  model="gpt-4-turbo",
  messages=[
    {
      "role": "user",
      "content": [
        {"type": "text", "text": "What’s in this image?"},
        {
          "type": "image_url",
          "image_url": {
            "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
          },
        },
      ],
    }
  ],
  max_tokens=300,
)

print(response.choices[0])
description = response.choices[0].message.content

Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='This image features a serene landscape with a wooden boardwalk extending through a lush meadow. The meadow is filled with tall, green grass and dotted with occasional shrubs and trees. The sky is clear with fluffy clouds and provides a pleasant, vibrant backdrop to the verdant setting. This type of walkway is often used in natural reserves or parks to protect the natural habitat while allowing people to enjoy close contact with nature without damaging it. The scene captures the essence of tranquility and greenery, typical of a well-preserved natural environment.', role='assistant', function_call=None, tool_calls=None))


# Creating a predefined set of attributes to extract from images

In [28]:
import json

'''
This code uses the OpenAI client object to make a chat completion request using the GPT-4 Turbo model.
It prompts the GPT to extract 7 pre-defined attributes from the description of the image input into 
the image-to-text response above. Then it converts the formatted string object into an object of attributes
that we can utilize further using json.
''' 
response = client.chat.completions.create(
  model="gpt-4-turbo",
  messages=[
    {
      "role": "user",
      "content": [
        {"type": "text", 
         # prompt to extract 7 key pre-defined attributes: color palette, subject matter, mood/emotion, style/aesthetic, setting/location, time period, cultural/symbolic representation
        "text": "You are a machine learning model trained to extract specific attributes from a detailed description of an image. Your task is to identify the following seven attributes from the given text and represent them as key-value pairs in a Python dictionary: 1. Color Palette: Identify the dominant colors or color schemes present in the image. 2. Subject Matter: Identify the main subject, object, or theme depicted in the image. 3. Mood/Emotion: Identify the overall mood, emotion, or feeling conveyed by the image. 4. Style/Aesthetic: Identify the artistic style, aesthetic, or visual characteristics of the image. 5. Setting/Location: Identify the physical setting, location, or environment depicted in the image. 6. Time Period: Identify the time period, era, or historical context represented in the image. 7. Cultural/Symbolic Representation: Identify any cultural, symbolic, or metaphorical representations present in the image. Please provide your output in the following format: {'Color Palette': 'one or two word adjectives', 'Subject Matter': 'one or two word adjectives', 'Mood/Emotion': 'one or two word adjectives', 'Style/Aesthetic': 'one or two word adjectives', 'Setting/Location': 'one or two word adjectives', 'Time Period': 'one or two word adjectives', 'Cultural/Symbolic Representation': 'one or two word adjectives'}, format the output in double quotations so I can convert it into an object with json."
},
        {
          "type": "text",
          "text" : description
        },
      ],
    }
  ],
  max_tokens=500,
)

# string reponse from prompt
attributes_content = response.choices[0].message.content
print(attributes_content)

# object to work with data
attributes_dict = json.loads(attributes_content)
print(attributes_dict)

Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n    "Color Palette": "vibrant greens, clear blue",\n    "Subject Matter": "serene landscape",\n    "Mood/Emotion": "tranquility",\n    "Style/Aesthetic": "naturalistic",\n    "Setting/Location": "meadow, boardwalk",\n    "Time Period": "contemporary",\n    "Cultural/Symbolic Representation": "environmental conservation"\n}', role='assistant', function_call=None, tool_calls=None))
{
    "Color Palette": "vibrant greens, clear blue",
    "Subject Matter": "serene landscape",
    "Mood/Emotion": "tranquility",
    "Style/Aesthetic": "naturalistic",
    "Setting/Location": "meadow, boardwalk",
    "Time Period": "contemporary",
    "Cultural/Symbolic Representation": "environmental conservation"
}
{'Color Palette': 'vibrant greens, clear blue', 'Subject Matter': 'serene landscape', 'Mood/Emotion': 'tranquility', 'Style/Aesthetic': 'naturalistic', 'Setting/Location': 'meadow, boardwalk', 'Time Per