Code for prodiving a chat interface

In [4]:
# -*- coding: utf-8 -*-
"""Chatbot_DataAnalysis.ipynb

Automatically generated by Colaboratory.

Original file is located at
    [Your Colab Link Here - if applicable]
"""

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import base64
from io import BytesIO
import os
import io
import uuid #for unique file names

from flask import Flask, render_template, request, jsonify

app = Flask(__name__)

class DataAnalyzerAgent:
    def __init__(self, config):
        self.config = config
        self.filepath = config.get('filepath')
        self.html_output = ""
        self.df = None

    def load_data(self):
        if self.filepath is None:
            self.html_output += "<p>Error: Filepath not provided in configuration.</p>"
            return False

        try:
            self.df = pd.read_csv(self.filepath)
            self.html_output += "<p>Data loaded successfully.</p>"
            return True
        except FileNotFoundError:
            self.html_output += f"<p>Error: File not found at {self.filepath}</p>"
            return False
        except Exception as e:
            self.html_output += f"<p>An error occurred while loading the data: {e}</p>"
            return False

    def describe_data(self):
        if self.df is None:
            return

        self.html_output += "<h2>Data Info</h2>"
        buffer = io.StringIO()
        self.df.info(buf=buffer)
        info_str = buffer.getvalue()
        self.html_output += f"<pre>{info_str}</pre>"

        self.html_output += "<h2>Descriptive Statistics for Numerical Columns</h2>"
        self.html_output += self.df.describe().to_html()

        self.html_output += "<h2>Value Counts for Categorical Columns</h2>"
        for col in self.df.select_dtypes(include=['object', 'category']).columns:
            self.html_output += f"<h3>Value counts for {col}</h3>"
            self.html_output += self.df[col].value_counts().to_frame().to_html()

    def visualize_data(self):
        if self.df is None:
            return

        numerical_cols = self.df.select_dtypes(include=['number']).columns
        categorical_cols = self.df.select_dtypes(include=['object', 'category']).columns

        def plot_to_base64(plot_func):
            buffer = BytesIO()
            plot_func()
            plt.savefig(buffer, format='png')
            buffer.seek(0)
            image_base64 = base64.b64encode(buffer.getvalue()).decode()
            plt.close()
            return f'<img src="data:image/png;base64,{image_base64}" />'

        if len(numerical_cols) > 0:
            self.html_output += "<h2>Histograms of Numerical Columns</h2>"
            self.html_output += plot_to_base64(lambda: self.df[numerical_cols].hist(figsize=(15, 10)))

        if len(numerical_cols) > 0:
            self.html_output += "<h2>Box Plots of Numerical Columns</h2>"
            for col in numerical_cols:
                self.html_output += f"<h3>Box Plot of {col}</h3>"
                self.html_output += plot_to_base64(lambda: sns.boxplot(x=self.df[col]))

        if len(categorical_cols) > 0:
            self.html_output += "<h2>Count Plots of Categorical Columns</h2>"
            for col in categorical_cols:
                self.html_output += f"<h3>Count Plot of {col}</h3>"
                self.html_output += plot_to_base64(lambda: sns.countplot(x=self.df[col]))

        if len(numerical_cols) > 1:
            self.html_output += "<h2>Pairplots of numerical columns</h2>"
            self.html_output += plot_to_base64(lambda: sns.pairplot(self.df[numerical_cols]))

    def generate_html(self):
        html_content = f"""
        <html>
        <head><title>Data Analysis Report</title></head>
        <body>
        <h1>Data Analysis Report</h1>
        {self.html_output}
        </body>
        </html>
        """
        return html_content

    def run_analysis(self):
        if self.load_data():
            self.describe_data()
            self.visualize_data()
            html_content = self.generate_html()
            filename = f"data_analysis_report_{uuid.uuid4()}.html" #unique file name
            with open(filename, "w") as f:
                f.write(html_content)
            return filename
        else:
          return None

@app.route("/")
def index():
    return render_template("index.html")

@app.route("/upload", methods=["POST"])
def upload():
    if "file" not in request.files:
        return jsonify({"response": "No file part"}), 400
    file = request.files["file"]
    if file.filename == "":
        return jsonify({"response": "No selected file"}), 400
    if file:
        try:
            temp_file_path = f"temp_{uuid.uuid4()}.csv"
            file.save(temp_file_path) #save to temp file.
            config = {"filepath": temp_file_path}
            analyzer = DataAnalyzerAgent(config)
            html_link = analyzer.run_analysis()
            os.remove(temp_file_path) #remove temp file.
            if html_link:
                return jsonify({"response": f"Analysis complete. View report: <a href='/{html_link}'>here</a>"})
            else:
              return jsonify({"response": "Analysis failed. Please check your data."})
        except Exception as e:
            return jsonify({"response": f"An error occurred: {str(e)}"}), 500

@app.route("/<filename>")
def serve_file(filename):
    return app.send_static_file(filename)

if __name__ == "__main__":
    app.run(debug=True)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
[33mPress CTRL+C to quit[0m
 * Restarting with stat
Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/opt/miniconda3/envs/eaisi-enexis-visualization/lib/python3.13/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
    ~~~~~~~~~~~~~~~~~~~~~~~^^
  File "/opt/miniconda3/envs/eaisi-enexis-visualization/lib/python3.13/site-packages/traitlets/config/application.py", line 1074, in launch_instance
    app.initialize(argv)
    ~~~~~~~~~~~~~~^^^^^^
  File "/opt/miniconda3/envs/eaisi-enexis-visualization/lib/python3.13/site-packages/traitlets/config/application.py", line 118, in inner
    return method(app, *args, **kwargs)
  File "/opt/miniconda3/envs/eaisi-enexis-visualization/lib/python3.13/site-packages/ipykernel/kernelapp.py", line 692, in initialize
    self.init_sockets()
    ~~~~~~~~~~~~~~~~~^^
  File "/opt/minicond

SystemExit: 1