In [28]:
import base64
import os
from pathlib import Path
from langchain.chat_models import ChatOpenAI

from langchain_core.messages import HumanMessage
input_dir = Path.cwd() 

def encode_image(image_path):
    """Getting the base64 string"""
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")


def image_summarize(img_base64, prompt):
    """Make image summary"""
    chat = ChatOpenAI(model="gpt-4-vision-preview", max_tokens=1024)

    msg = chat.invoke(
        [
            HumanMessage(
                content=[
                    {"type": "text", "text": prompt},
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"},
                    },
                ]
            )
        ]
    )
    return msg.content


def generate_img_summaries(path):
    """
    Generate summaries and base64 encoded strings for images
    path: Path to list of .jpg files extracted by Unstructured
    """

    # Store base64 encoded images
    img_base64_list = []

    # Store image summaries
    image_summaries = []

    # Prompt
    prompt = """You are an assistant tasked with analyzing time series data. \
    Give insights on the correlation and interactions between data, then try to deduce the reasons for the the relationship between federal interest rates and usd/sgd using supplementary sources from the internet. \
    In addition, give a concise summary of the image that is well optimized for retrieval."""

    # Apply to images
    for img_file in sorted(os.listdir(path)):
        if img_file.endswith(".png"):
            img_path = os.path.join(path, img_file)
            base64_image = encode_image(img_path)
            img_base64_list.append(base64_image)
            image_summaries.append(image_summarize(base64_image, prompt))

    return img_base64_list, image_summaries


# Image summaries
img_base64_list, image_summaries = generate_img_summaries(input_dir)

In [29]:
image_summaries

['For analyzing the time series data shown in the image, we can make a few observations regarding the correlation and interactions between the federal interest rates (represented by the blue line) and the USD/SGD exchange rate (represented by the green line). The red vertical dashed lines appear to represent the dates of FOMC (Federal Open Market Committee) meetings.\n\nRegarding the correlation between the two data sets, we can observe that:\n\n1. There appears to be an inverse relationship between the federal interest rates and the USD/SGD exchange rate. Typically, when interest rates rise, we would expect the currency value to increase as well, because higher rates provide a better return on investments denominated in that currency. However, this chart seems to show periods where the USD/SGD exchange rate decreases as the interest rate increases.\n\n2. The most significant observation is the sharp increase in the federal interest rate in early 2020, which coincides with a sharp incr