In [1]:
import os
from autogen import filter_config

from dotenv import load_dotenv
load_dotenv()

# This is a list of models that we want to use in the LLM. Each model is a dictionary with some properties.
config_list = [
    # Model that outputs text
    {
        "model": "gpt-4o",
        "api_key": os.environ["OPENAI_API_KEY"],
        "response_format": {"type": "text"},
        "tags": ["gpt-4o", "text"]
    },
    # Model that outputs JSON only: https://platform.openai.com/docs/guides/json-mode
    {
        "model": "gpt-4o",
        "api_key": os.environ["OPENAI_API_KEY"],
        "response_format": {"type": "json_object"},
        "tags": ["gpt-4o", "json"]
    }
]

llm_config_text = {
    "timeout": 600,
    "cache_seed": None,  # Disable caching to make sure we get fresh results. Be careful with this in production. Set this to 41 for disk caching.
    "config_list": filter_config(config_list, {"tags": ["text"]}), # Only use models that have the "text" tag
    "temperature": 0.7,
}

llm_config_json = {
    "timeout": 600,
    "cache_seed": None,  # Disable caching to make sure we get fresh results. Be careful with this in production. Set this to 41 for disk caching.
    "config_list": filter_config(config_list, {"tags": ["json"]}), # Only use models that have the "text" tag
    "temperature": 0.7,
}

# from autogen.agentchat import AssistantAgent

# assistant = AssistantAgent("assistant", llm_config=llm_config)

# reply = assistant.generate_reply(
#     messages=[{
#         "content": "Who founded OpenAI?",
#         "role": "user"
#     }]
# )

# print(reply)

In [2]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from infini8.core.summarizer import Summarizer
from infini8.core.insights import InsightsGenerator


In [3]:
summarizer = Summarizer()

summary = summarizer.summarize("./data/KAG_conversion_data.csv", llm_config=llm_config_json)

print(summary)





  pd.to_datetime(column, errors="raise")
  pd.to_datetime(column, errors="raise")


{'dataset_name': 'Ad_Campaign_Performance', 'dataset_description': 'This dataset contains information about various ad campaigns, including demographic data, ad impressions, clicks, and conversion metrics.', 'key_variables': "The key variables in this dataset include 'ad_id', 'xyz_campaign_id', 'fb_campaign_id', 'Impressions', 'Clicks', 'Spent', 'Total_Conversion', and 'Approved_Conversion'. These variables are crucial for analyzing the performance of ad campaigns.", 'column_metadata': [{'column': 'ad_id', 'properties': {'dtype': 'number', 'std': 193992, 'min': 708746, 'max': 1314415, 'samples': [747332, 1314343, 777794], 'num_unique_values': 1143, 'llm_type': 'id', 'description': 'A unique identifier for each ad.'}}, {'column': 'xyz_campaign_id', 'properties': {'dtype': 'number', 'std': 121, 'min': 916, 'max': 1178, 'samples': [916, 936, 1178], 'num_unique_values': 3, 'llm_type': 'campaign_id', 'description': 'Identifies the campaign associated with the ad.'}}, {'column': 'fb_campaign

In [4]:
insights_gen = InsightsGenerator()

insight = insights_gen.generate_insights(dataset_summary=summary, llm_config=llm_config_json)

print(insight)

{
    "insights": [
        {
            "index": 0,
            "expected_insight": {
                "question": "Which age group has the highest click-through rate (CTR) for each campaign?",
                "insight": "The bar chart will reveal the age group with the highest CTR across different campaigns, helping to identify the most responsive demographic."
            },
            "visualization": {
                "type": "Bar Chart",
                "axes_and_labels": {
                    "x_axis": "Age Group",
                    "y_axis": "Click-Through Rate (CTR)",
                    "additional_labels": [
                        "Campaign ID"
                    ]
                },
                "additional_recommendations": {
                    "styling": "Use different colors for each campaign to distinguish between them clearly.",
                    "legend": "Include a legend to identify each campaign.",
                    "gridlines": "Enable gridlines for b

In [5]:
from infini8.core.visualizer import Visualizer

## print file_path in summary
print(summary['file_path'])

visualizer = Visualizer()

notebook = visualizer.visualize(dataset_summary=summary, insights=insight, llm_config=llm_config_text)

/Users/umermansoor/Documents/GitHub/infini8/notebooks/data/KAG_conversion_data.csv
[33mCodeReviewer[0m (to Programmer):


                ## Objective
                
        You are a data visualization and Python expert who can create meaningful and insightful visualizations based on a dataset. You are given a list of insights that need to be visualized for a dataset. You will also be provided a dataset summary, link to the dataset and visualization library to use.

        You will output a well formatted Jupyter notebook containing the visualizations for each insight. The notebook should include the following sections:
        - Introduction (Markdown): Briefly introduce the dataset and the insights that are visualized.
        - Setup (Code): Import the necessary libraries and load the dataset. If using `pip install`, use the `-qqq` flag to suppress output.
        - Insight #1 (Markdown): Describe the insight and the visualization.
        - Insight #1 (Code): Generate the vis

In [6]:
%pip install -qqq nbformat nbconvert pandoc

Note: you may need to restart the kernel to use updated packages.


In [7]:
print(notebook)


{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ad Campaign Performance Visualization\n",
    "\n",
    "## Introduction\n",
    "\n",
    "This dataset contains information about various ad campaigns, including demographic data, ad impressions, clicks, and conversion metrics. The key variables in this dataset include 'ad_id', 'xyz_campaign_id', 'fb_campaign_id', 'Impressions', 'Clicks', 'Spent', 'Total_Conversion', and 'Approved_Conversion'. These variables are crucial for analyzing the performance of ad campaigns.\n",
    "\n",
    "In this notebook, we will visualize several insights based on this dataset to better understand the performance and effectiveness of different ad campaigns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Setup\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "# Load the da

In [9]:
# Save the notebook to a file in output folder, create it if it doesn't exist
output_folder = "output"
os.makedirs(output_folder, exist_ok=True)
notebook_file_path = os.path.join(output_folder, "notebook.ipynb")
with open(notebook_file_path, "w") as f:
    f.write(notebook)

print(f"Notebook saved to {notebook_file_path}")




Notebook saved to output/notebook.ipynb
Notebook executed and saved to output/notebook.ipynb


In [20]:
from infini8.core.executor import Executor

# print absolute path of notebook file
print(os.path.abspath(notebook_file_path))

executor = Executor()

executor.execute_with_llm_1(notebook_file_path)


/Users/umermansoor/Documents/GitHub/infini8/notebooks/output/notebook.ipynb


AttributeError: 'Executor' object has no attribute 'execute_with_llm_1'

In [10]:
## run the notebook
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor

with open(notebook_file_path) as f:
    nb = nbformat.read(f, as_version=4)

ep = ExecutePreprocessor(timeout=600, kernel_name='python3')
ep.preprocess(nb, {'metadata': {'path': 'output/'}})
with open(notebook_file_path, 'wt') as f:
    nbformat.write(nb, f)

print(f"Notebook executed and saved to {notebook_file_path}")


CellExecutionError: An error occurred while executing the following cell:
------------------
# Corrected Insight #1: Distribution of Impressions across Interest Scores
plt.figure(figsize=(10, 6))
sns.histplot(data=data, x:='interest', weights='Impressions', bins=30, kde=True)
plt.title('Distribution of Impressions across Interest Scores')
plt.xlabel('Interest Score')
plt.ylabel('Number of Impressions')
plt.show()
------------------


[0;36m  Cell [0;32mIn[2], line 3[0;36m[0m
[0;31m    sns.histplot(data=data, x:='interest', weights='Impressions', bins=30, kde=True)[0m
[0m                                                                                   ^[0m
[0;31mSyntaxError[0m[0;31m:[0m positional argument follows keyword argument

