In [30]:
import openai

In [31]:
from openai import OpenAI

In [32]:
client = OpenAI(api_key="sk-OPEN_AI_API_KEY")

# Image-to-text pre-trained model

In [62]:
'''
This code uses the OpenAI client object to make a chat completion request using the GPT-4 Turbo model.
It sends a user message asking "What’s in this image?" along with an image URL. The response from the
model is stored in the response variable. The code then prints the assistant's response, which can be 
accessed using response.choices[0].
''' 
image_urls = [
    "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
    "https://cdn.pixabay.com/photo/2017/08/25/18/48/watercolor-2681039_1280.jpg",
    "https://cdn.pixabay.com/photo/2017/03/12/13/41/colorful-2137080_1280.jpg",
    "https://cdn.pixabay.com/photo/2018/03/30/15/11/deer-3275594_1280.jpg"
    # Add more image URLs as needed
]

descriptions = {}

for url in image_urls:
  response = client.chat.completions.create(
    model="gpt-4-turbo",
    messages=[
      {
        "role": "user",
        "content": [
          {"type": "text", "text": "What’s in this image?"},
          {
            "type": "image_url",
            "image_url": {
              "url": url,
            },
          },
        ],
      }
    ],
    max_tokens=300,
  )

  # print(response.choices[0])
  description = response.choices[0].message.content
  descriptions[url] = description

print('descriptions:', json.dumps(descriptions, indent=4))

descriptions: {
    "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg": "This image features a scenic wooden boardwalk extending through a lush green meadow under a vibrant blue sky with sparse clouds. The meadow is dotted with tall grasses and shrubs, and in the distance, there is a line of trees marking the horizon. The setting appears calm and serene, ideal for a leisurely walk or a peaceful escape into nature. The lighting suggests it might be late afternoon, providing a warm glow that enhances the natural beauty of the landscape.",
    "https://cdn.pixabay.com/photo/2017/08/25/18/48/watercolor-2681039_1280.jpg": "This image features vibrant and colorful abstract art, created by blending various liquid colors that appear to be alcohol ink. The hues range dramatically from deep reds and pinks to brighter shades of turquoise and yellow, merging into each other with a fluid, o

# Creating a predefined set of attributes to extract from images

In [60]:
import json

'''
This code uses the OpenAI client object to make a chat completion request using the GPT-4 Turbo model.
It prompts the GPT to extract 7 pre-defined attributes from the description of the image input into 
the image-to-text response above. Then it converts the formatted string object into an object of attributes
that we can utilize further using json.
''' 

attributes = {}

for url, description in descriptions.items():
  response = client.chat.completions.create(
    model="gpt-4-turbo",
    messages=[
      {
        "role": "user",
        "content": [
          {"type": "text", 
          # prompt to extract 7 key pre-defined attributes: color palette, subject matter, mood/emotion, style/aesthetic, setting/location, time period, cultural/symbolic representation
          "text": "You are a machine learning model trained to extract specific attributes from a detailed description of an image. Your task is to identify the following seven attributes from the given text and represent them as key-value pairs in a Python dictionary: 1. Color Palette: Identify the dominant colors or color schemes present in the image. 2. Subject Matter: Identify the main subject, object, or theme depicted in the image. 3. Mood/Emotion: Identify the overall mood, emotion, or feeling conveyed by the image. 4. Style/Aesthetic: Identify the artistic style, aesthetic, or visual characteristics of the image. 5. Setting/Location: Identify the physical setting, location, or environment depicted in the image. 6. Time Period: Identify the time period, era, or historical context represented in the image. 7. Cultural/Symbolic Representation: Identify any cultural, symbolic, or metaphorical representations present in the image. Please provide your output in the following format: {'Color Palette': 'one or two word adjectives', 'Subject Matter': 'one or two word adjectives', 'Mood/Emotion': 'one or two word adjectives', 'Style/Aesthetic': 'one or two word adjectives', 'Setting/Location': 'one or two word adjectives', 'Time Period': 'one or two word adjectives', 'Cultural/Symbolic Representation': 'one or two word adjectives'}; if an attribute is not obvious, infer from the context of the description an attribute, don't use 'none' or 'not specified' as an attribute. Format the output in double quotations so I can convert it into an object with json."
  },
          {
            "type": "text",
            "text" : json.dumps(description)
          },
        ],
      }
    ],
    max_tokens=500,
  )

  # string reponse from prompt
  attributes_content = response.choices[0].message.content

  # object to work with data
  attributes_dict = json.loads(attributes_content)

  # append to attributes dictionary
  attributes[url] = attributes_dict

print('Image - Attributes Dictionary:\n', json.dumps(attributes, indent=4))

Image - Attributes Dictionary:
 {
    "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg": {
        "Color Palette": "vibrant green, vivid blue",
        "Subject Matter": "natural landscape",
        "Mood/Emotion": "peace, tranquility",
        "Style/Aesthetic": "picturesque",
        "Setting/Location": "nature reserve",
        "Time Period": "contemporary",
        "Cultural/Symbolic Representation": "environmental appreciation"
    },
    "https://cdn.pixabay.com/photo/2017/08/25/18/48/watercolor-2681039_1280.jpg": {
        "Color Palette": "vibrant, colorful",
        "Subject Matter": "abstract art",
        "Mood/Emotion": "dynamic, fluid",
        "Style/Aesthetic": "alcohol ink",
        "Setting/Location": "imaginary",
        "Time Period": "contemporary",
        "Cultural/Symbolic Representation": "organic"
    },
    "https://cdn.pixabay.com/photo/2017/03/12/1