In [None]:
!pip install autogen

In [None]:
!pip install pyautogen

In [None]:
!pip install dask[dataframe]

In [None]:
pip install seaborn

In [None]:
pip install ag2[openai]

In [None]:
 pip install pandas matplotlib

In [None]:
import openai
import uuid
import json
import autogen

In [None]:
from autogen import AssistantAgent, UserProxyAgent, GroupChat, GroupChatManager

In [None]:
from openai import OpenAI

In [None]:
file_path = "apikey.json"

# Load the JSON file
with open(file_path, 'r') as file:
    config = json.load(file)
    print("Loaded JSON content:", config)


In [None]:
config_list = autogen.config_list_from_json("apikey.json")
model_name = config_list[0].get("model")
api_key = config_list[0].get("api_key")

In [None]:
file_path = r"C:\Users\DANICA\Documents\Projects\dataset\Mall_Customers.csv"
location = r"C:\Users\DANICA\Documents\Projects\dataset\Mall_Customers.csv"
chart_location = r"C:\Users\DANICA\Desktop\charts"
chart_filename = f"{str(uuid.uuid4())}_chart.png"
code_language= 'python'


In [None]:
import pandas as pd
def load_dataset(file_path):
    if file_path.endswith('.csv'):
        df = pd.read_csv(file_path)
    elif file_path.endswith(('.xls', '.xlsx')):
        df = pd.read_excel(file_path)
    elif file_path.endswith('.json'):
        df = pd.read_json(file_path)
    else:
        raise ValueError("Unsupported file format. Please provide CSV, Excel, or JSON.")
    return df

try:
    df = load_dataset(file_path)
    print("Dataset loaded successfully:")
    display(df.head()) 
except Exception as e:
    print(f"Error loading dataset: {e}")
    df = pd.DataFrame()  

In [None]:
data_retrival_prompt = '''
**Role**:You are a data retrival agent who retrives data from the file that has been uploaded.You have following tasks:
- Retrieve the most recent data from the file. 
- Focus on the following data points: specific variables. 
- Ensure that the data is cleaned by handling missing values, duplicates, or outliers, and is formatted in a desired format (e.g., CSV, JSON, or other suitable formats for analysis).
- Provide a summary of the data retrieved, including a brief description of the dataset, the number of rows/columns, and any noteworthy characteristics (e.g., missing data, duplicates, etc.).
- Ensure the data is structured and ready for analysis, including appropriate column names, and consistent formatting.
- Analyze and document the distribution of numerical variables to detect any unusual or extreme values that might need attention.
- Identify and provide a breakdown of missing data percentages and handle missing values (e.g., imputation, removal, or flagging).
- Ensure that categorical variables are properly encoded (e.g., one-hot encoding or label encoding if required).
- Provide an overview of the data, highlighting any trends or patterns observed in the initial analysis (e.g., time trends, correlations, etc.).
'''

In [None]:
query_processing_prompt = '''
**Role**: You are a query processing agent who interprets and processes user queries. You have the following tasks:
- Parse the user's query and identify key data points or requirements.
- Translate the query into a structured format (e.g., SQL, filters) to extract relevant data.
- Ensure the query applies the correct filters (e.g., date range, category) and targets the right data.
- Provide the user with the results in a clear and structured format.
- Break down complex queries into smaller parts, if necessary, to ensure each part is correctly executed and aggregated.
- If the query involves aggregations, ensure the proper functions (e.g., SUM, COUNT, AVG) are used and explain the results clearly.
- Suggest improvements or optimizations to the query if necessary, such as indexing or performance improvements.
- If the query involves time-sensitive or real-time data, ensure that the appropriate filters (e.g., timestamps) are used to return accurate results.
- After processing, confirm the completion of the task and ask if the user needs further assistance or additional queries.
'''


In [None]:
query_processing_agent = autogen.AssistantAgent(
    name="Query_Processor",
    system_message=query_processing_prompt,
    llm_config={"config_list": config_list}
)


In [None]:
planner = AssistantAgent(
    name="planner",
    llm_config={"config_list": config_list},
    system_message=f"""You are a Planner Agent to create all type of charts plots that a user will request from the data file.
    Load the data from the provided file location.
    Determine the type of analysis (basic, advanced, forecasting, or AI/ML).
    Clean the data by handling missing values, duplicates, or formatting issues.
    Decide on the appropriate chart type based on the analysis.
    Create the chart with necessary customizations (e.g., axis labels, title).
    Add insights or annotations to highlight trends if needed.
    Save the plot at the specified location {chart_location} and print the filename.
    Make refinements based on feedback or specific requirements.
    Suggest alternative chart types or analysis methods if necessary.
    Ensure data security and protect sensitive information throughout the process.
     **DO NOT GENERATE CODE YOURSELF.** Instruct the CodeWriter to generate the necessary code for each step.
            """
        )

In [None]:
data_retrival_agent = autogen.AssistantAgent(
    name="Data_Retriever",
    system_message=data_retrival_prompt,
    llm_config={"config_list": config_list}
)

In [None]:
code_writer = AssistantAgent(
    name="code_writer",
    llm_config={"config_list": config_list},
    system_message=f"""Code will be written in the specified language, {code_language}.
    Based on the outlined plan, write the complete code for each step.
    Ensure to save the plot using plt.save and store it at the specified {chart_location}. Print "Chart saved as {chart_filename}" after saving the plot.
    Indicate clearly which agent is responsible for each step.
    Consider the analysis type (basic, analytics, forecasting, AI/ML) and adjust the approach accordingly.
    Ensure that the code handles all relevant edge cases or special conditions for the analysis.
    After the plot is saved, ensure the process is logged, with details on the chart creation steps followed.
            """
        )


In [None]:
code_executor = UserProxyAgent(
            name="code_executor",
            human_input_mode="NEVER",
            code_execution_config={
        "work_dir": "plotter_code",
        "use_docker": False  
            },
            llm_config={"config_list": config_list},
            system_message=f'''You are the CodeExecutor Agent, responsible for executing the code provided by the CodeWriter and reporting back the results or any errors encountered.
            Always check for code correctness before execution and ensure it aligns with the outlined plan.
            Resuts should be displayed in the most effective format.
            Use a tabular format when appropriate.
            In cases where a graphical output is produced, ensure the plot is saved using plt.save and stored at the specified {chart_location}, printing "Chart saved as {chart_filename}" after saving.
            Provide a clear and detailed error message with the relevant traceback to help the user debug, in case of errors.
            verify the accuracy of the results and confirm that they match the expected output.
            provide a summary or analysis of the data to help the user understand the outcomes better.
            The file path is -->{location} 
            '''
        )


In [None]:
debugger = AssistantAgent(
    name="debugger",
    llm_config={"config_list": config_list},
    system_message=f"""
    Code will be written in the specified language, {code_language}.
    Write the complete end-to-end code based on the outlined steps.
    Ensure that the plot is saved using plt.save and stored at the specified {chart_location}, printing "Chart saved as {chart_filename}" once the plot is saved.
    Your responsibility is to identify these errors and resolve them promptly generated from the previous code.
    test the code after execution to ensure it works as expected and correct any issues that arise.
    If the code fails or produces unexpected results, debug it step-by-step, fix the problem.
    Re-run the code until it's fully functional.Confirm that the plot is correctly generated and saved, and the output is accurate when done resolving errors.
    Ensure that all dependencies and libraries required for execution are correctly installed and imported.
"""
        )

In [None]:
process_completion = AssistantAgent(
    name="process_completion",
    llm_config={"config_list": config_list},
    system_message=f"""You are a Process completion agent which takes care that all processes are being completed by all other agents.
    Respond back with information in a tabular format or sequential steps, depending on the context.
    Always provide tabular responses in Markdown format.
    display the data head in Markdown format.
    Ensure all tabular data is processed and presented using Markdown for clarity.
    Give complete details at each step, ensuring clarity in every action for sequential data.
    Transform the data into a more usable format(If required) (e.g., scaling or encoding categorical variables).
    Provide an overview of any transformations applied, explaining the changes made to the data.
    If the process is incomplete, ask the user if they would like to continue or need further assistance.
    Once everything is finished, confirm completion with the user.
    Recommend two new questions or tasks to keep the conversation engaging and move forward.
"""
        )

In [None]:
from autogen.agentchat.agent import Agent

In [None]:
def state_transition(last_speaker: Agent, groupchat: GroupChat):
    messages = groupchat.messages
    # Initial interaction
    if len(messages) <= 1:
        return query_processing_agent  # Start with the user
    if last_speaker is query_processing_agent:
        return planner  # Move from query processing to planning
    # After planner responds, proceed with the flow:
    elif last_speaker is planner:
        return data_retrival_agent  # Planner passes the work to data retrieval
    elif last_speaker is data_retrival_agent:
        return code_writer  # Data retrieval hands over to code writer
    elif last_speaker is code_writer:
        return code_executor  # Code writer moves to code execution
    elif last_speaker is code_executor:
        # Check if code execution had an error and decide the next speaker
        if "exitcode: 1" in messages[-1]["content"]:
            return debugger  # If error, hand over to debugger
        else:
            return process_completion  # If no error, complete the process
    elif last_speaker is debugger:
        return groupchat.agent_by_name("code_writer")  # Return to code writer after debugging
    return None  # Default case if none of the above conditions match



In [None]:
cs_groupchat = GroupChat(
    agents=[
        query_processing_agent,
        planner,  
        data_retrival_agent,
        code_writer,  
        code_executor,  
        debugger,  
        process_completion 
    ],
    speaker_selection_method=state_transition,
    messages=[],
    max_round=900
)


In [None]:
cs_manager = GroupChatManager(cs_groupchat,llm_config=config_list[0],)

In [None]:
question= f"""
can you plot all variation of age vs gender plots from  mall_customer.csv file preset at--> {location}
"""

response = cs_manager.initiate_chat(cs_manager, message=question)