In [1]:
import os
from autogen import filter_config

from dotenv import load_dotenv
load_dotenv()

# This is a list of models that we want to use in the LLM. Each model is a dictionary with some properties.
config_list = [
    # Model that outputs text
    {
        "model": "gpt-4o",
        "api_key": os.environ["OPENAI_API_KEY"],
        "response_format": {"type": "text"},
        "tags": ["gpt-4o", "text"]
    },
    # Model that outputs JSON only: https://platform.openai.com/docs/guides/json-mode
    {
        "model": "gpt-4o",
        "api_key": os.environ["OPENAI_API_KEY"],
        "response_format": {"type": "json_object"},
        "tags": ["gpt-4o", "json"]
    }
]

llm_config_text = {
    "timeout": 600,
    "cache_seed": None,  # Disable caching to make sure we get fresh results. Be careful with this in production. Set this to 41 for disk caching.
    "config_list": filter_config(config_list, {"tags": ["text"]}), # Only use models that have the "text" tag
    "temperature": 0.7,
}

llm_config_json = {
    "timeout": 600,
    "cache_seed": None,  # Disable caching to make sure we get fresh results. Be careful with this in production. Set this to 41 for disk caching.
    "config_list": filter_config(config_list, {"tags": ["json"]}), # Only use models that have the "text" tag
    "temperature": 0.7,
}

# from autogen.agentchat import AssistantAgent

# assistant = AssistantAgent("assistant", llm_config=llm_config)

# reply = assistant.generate_reply(
#     messages=[{
#         "content": "Who founded OpenAI?",
#         "role": "user"
#     }]
# )

# print(reply)

In [2]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from infini8.core.summarizer import Summarizer
from infini8.core.insights import InsightsGenerator


In [3]:
summarizer = Summarizer()

summary = summarizer.summarize("./data/KAG_conversion_data.csv", llm_config=llm_config_json)

print(summary)





  pd.to_datetime(column, errors="raise")
  pd.to_datetime(column, errors="raise")
  pd.to_datetime(column, errors="raise")
  pd.to_datetime(column, errors="raise")
  pd.to_datetime(column, errors="raise")


{'dataset_name': 'Web Search Results', 'dataset_description': 'This dataset contains information from web search results including titles, snippets, links, and search metrics.', 'key_variables': "The key variables include 'rank', 'total_result', and 'links'. 'Rank' is important for understanding the order of relevance, 'total_result' indicates the volume of search results, and 'links' provide the URLs of the search results.", 'column_metadata': [{'column': 'words', 'properties': {'dtype': 'category', 'samples': ['Supervised learning', 'Chatbots', 'Artificial intelligence'], 'num_unique_values': 15, 'llm_type': 'category', 'description': 'Keywords or phrases used in the search queries.'}}, {'column': 'rank', 'properties': {'dtype': 'number', 'std': 7, 'min': 1, 'max': 25, 'samples': [9, 17, 1], 'num_unique_values': 25, 'llm_type': 'ranking', 'description': 'The rank of the search result.'}}, {'column': 'title', 'properties': {'dtype': 'string', 'samples': ['Deep Learning', 'Learn to Bec

In [4]:
insights_gen = InsightsGenerator()

insight = insights_gen.generate_insights(dataset_summary=summary, llm_config=llm_config_json)

print(insight)

{
    "insights": [
        {
            "index": 0,
            "expected_insight": {
                "question": "Which keywords have the highest average rank across all web search results?",
                "insight": "This bar chart will reveal the keywords that consistently appear higher in search rankings, helping to identify which topics are most relevant or popular."
            },
            "visualization": {
                "type": "Bar Chart",
                "axes_and_labels": {
                    "x_axis": "Keywords",
                    "y_axis": "Average Rank",
                    "additional_labels": []
                },
                "additional_recommendations": {
                    "styling": "Use a color gradient to highlight higher ranks in darker shades.",
                    "legend": "No legend is needed as each bar represents a keyword.",
                    "gridlines": "Enable gridlines for better readability."
                }
            },
       

In [5]:
from infini8.core.visualizer import Visualizer

## print file_path in summary
print(summary['file_path'])

visualizer = Visualizer()

notebook = visualizer.visualize(dataset_summary=summary, insights=insight, llm_config=llm_config_text)

/Users/umermansoor/Documents/GitHub/infini8/notebooks/data/SEO_data.csv
[33mCodeReviewer[0m (to Programmer):


                ## Objective
                
        You are a data visualization and Python expert who can create meaningful and insightful visualizations based on a dataset. You are given a list of insights that need to be visualized for a dataset. You will also be provided a dataset summary, link to the dataset and visualization library to use.

        You will output a well formatted Jupyter notebook containing the visualizations for each insight. The notebook should include the following sections:
        - Introduction (Markdown): Briefly introduce the dataset and the insights that are visualized.
        - Setup (Code): Import the necessary libraries and load the dataset. If using `pip install`, use the `-qqq` flag to suppress output.
        - Insight #1 (Markdown): Describe the insight and the visualization.
        - Insight #1 (Code): Generate the visualization 

[33mProgrammer[0m (to CodeReviewer):

```python
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Web Search Results Dataset Visualization\n",
    "\n",
    "## Introduction\n",
    "\n",
    "This dataset, titled **Web Search Results**, contains information from web search results including titles, snippets, links, and search metrics. The key variables in this dataset include:\n",
    "- **Rank:** The rank of the search result.\n",
    "- **Total Result:** The volume of search results.\n",
    "- **Links:** The URLs of the search results.\n",
    "\n",
    "In this notebook, we will visualize various insights derived from this dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Setup\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "# Load the dataset\n",
    "file_path = '/Users/umermansoor/Documen

In [6]:
%pip install -qqq nbformat nbconvert pandoc

Note: you may need to restart the kernel to use updated packages.


In [7]:
# Save the notebook to a file in output folder, create it if it doesn't exist
output_folder = "output"
os.makedirs(output_folder, exist_ok=True)
notebook_file_path = os.path.join(output_folder, "notebook.ipynb")
with open(notebook_file_path, "w") as f:
    f.write(notebook)

print(f"Notebook saved to {notebook_file_path}")




Notebook saved to output/notebook.ipynb
