In [1]:
import os
from autogen import filter_config

from dotenv import load_dotenv

load_dotenv()

# This is a list of models that we want to use in the LLM. Each model is a dictionary with some properties.
config_list = [
    # Model that outputs text
    {
        "model": "gpt-4o",
        "api_key": os.environ["OPENAI_API_KEY"],
        "response_format": {"type": "text"},
        "tags": ["gpt-4o", "text"],
    },
    # Model that outputs JSON only: https://platform.openai.com/docs/guides/json-mode
    {
        "model": "gpt-4o",
        "api_key": os.environ["OPENAI_API_KEY"],
        "response_format": {"type": "json_object"},
        "tags": ["gpt-4o", "json"],
    },
]

llm_config_text = {
    "timeout": 600,
    "cache_seed": None,  # Disable caching to make sure we get fresh results. Be careful with this in production. Set this to 41 for disk caching.
    "config_list": filter_config(
        config_list, {"tags": ["text"]}
    ),  # Only use models that have the "text" tag
    "temperature": 0.7,
}

llm_config_json = {
    "timeout": 600,
    "cache_seed": None,  # Disable caching to make sure we get fresh results. Be careful with this in production. Set this to 41 for disk caching.
    "config_list": filter_config(
        config_list, {"tags": ["json"]}
    ),  # Only use models that have the "text" tag
    "temperature": 0.7,
}

# from autogen.agentchat import AssistantAgent

# assistant = AssistantAgent("assistant", llm_config=llm_config)

# reply = assistant.generate_reply(
#     messages=[{
#         "content": "Who founded OpenAI?",
#         "role": "user"
#     }]
# )

# print(reply)

In [2]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
from infini8.core.summarizer import Summarizer

In [3]:
summarizer = Summarizer()

summary = summarizer.summarize(
    csv_path="https://raw.githubusercontent.com/uwdata/draco/master/data/cars.csv",
    llm_config=llm_config_json,
)

print(summary)

Summarizer initialized
{
  "file_path": "https://raw.githubusercontent.com/uwdata/draco/master/data/cars.csv",
  "dataset_name": "Car Specifications Dataset",
  "dataset_description": "This dataset contains various specifications of different car models including their names, types, and various performance and physical attributes.",
  "column_metadata": [
    {
      "column": "Name",
      "properties": {
        "dtype": "string",
        "samples": ["Nissan Altima S 4dr", "Mercury Marauder 4dr", "Toyota Prius 4dr (gas/electric)"],
        "num_unique_values": 385,
        "semantic_type": "car_model",
        "description": "The name of the car model."
      }
    },
    {
      "column": "Type",
      "properties": {
        "dtype": "category",
        "samples": ["SUV", "Minivan", "Sports Car"],
        "num_unique_values": 5,
        "semantic_type": "car_type",
        "description": "The type or category of the car."
      }
    },
    {
      "column": "AWD",
      "propertie