# GPT-4 Vision

GPT-4 Turbo with Vision is a large multimodal model (LMM) developed by OpenAI that can analyze images and provide textual responses to questions about them. It incorporates both natural language processing and visual understanding.

In [None]:
# Show data/ingredients.png image below
from PIL import Image
import matplotlib.pyplot as plt

img = Image.open('data/ingredients.png')
plt.imshow(img)
plt.show()

In [None]:
import os
import requests
import base64
from dotenv import load_dotenv
load_dotenv("../credentials.env")

# 구성
GPT4V_KEY = os.getenv('GPT_4_VISION_API_KEY')
IMAGE_PATH = "./data/ingredients.png"
encoded_image = base64.b64encode(open(IMAGE_PATH, 'rb').read()).decode('ascii')
headers = {
    "Content-Type": "application/json",
    "api-key": GPT4V_KEY,
}

# 요청을 위한 Payload
payload = {
  "messages": [
    {
      "role": "system",
      "content": [
        {
          "type": "text",
          "text": "You are an AI assistant that helps people find information."
        }
      ]
    },
    {
      "role": "user",
      "content": [
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/png;base64,{encoded_image}"
          }
        },
        {
          "type": "text",
          "text": "이 사진에 대해서 설명해줘."
        }
      ]
    }
  ],
  "temperature": 0.7,
  "top_p": 0.95,
  "max_tokens": 800
}

GPT4V_ENDPOINT = f"{os.getenv('GPT_4_VISION_ENDPOINT')}openai/deployments/{os.getenv('GPT_4_VISION_DEPLOYMENT_NAME')}/chat/completions?api-version=2024-02-15-preview"



In [None]:
# Send request
try:
    response = requests.post(GPT4V_ENDPOINT, headers=headers, json=payload)
    response.raise_for_status()  # HTTP 요청이 실패한 상태 코드를 반환한 경우 HttpError를 발생시킵니다.
except requests.RequestException as e:
    raise SystemExit(f"Failed to make the request. Error: {e}")

# Handle the response as needed (e.g., print or process)
print(response.json()['choices'][0]['message']['content'])