In [None]:
import pandas as pd
import openai
import os
from dotenv import load_dotenv
from typing import List, Dict, Any
import mysql.connector
from mysql.connector import Error

# Load environment variables
dotenv_path = os.path.join('..', 'keys.env')
load_dotenv(dotenv_path)
api_key = os.getenv("OPENAI_API_KEY")
db_user = os.getenv("DATABASE_USER")
db_password = os.getenv("DATABASE_PASSWORD")

def connect_to_mysql():
    connection = mysql.connector.connect(
        host='box5882.bluehost.com',
        database='detoraki_fetusdata',
        user=db_user,
        password=db_password,
        port=3306  # Specify default MySQL port
    )
    return connection

def get_schema() -> Dict[str, List[str]]:
    conn = connect_to_mysql()
    try:
        cursor = conn.cursor()
        
        # Get all tables
        cursor.execute("SHOW TABLES;")
        tables = cursor.fetchall()
        
        schema = {}
        for table in tables:
            table_name = table[0]
            cursor.execute(f"DESCRIBE {table_name};")
            columns = [row[0] for row in cursor.fetchall()]
            schema[table_name] = columns
            
        cursor.close()
        return schema
    except Error as e:
        print(e)
    finally:
        conn.close()

schema = get_schema()

def generate_sql_query(user_query, schema, api_key, conversation_history):
    openai.api_key = api_key
    
    # Prepare the schema information as part of the system message
    schema_prompt = "Here is the database schema:\n"
    for table, columns in schema.items():
        schema_prompt += f"Table: {table}\nColumns: {', '.join(columns)}\n"
    
    # Use the Chat Completion API (for models like GPT-4)
    messages = [
        {
            "role": "system",
            "content": (
                "You are a SQL expert specialized in phpMyAdmin."
                "Answer the questions by providing an SQL script that is compatible with MySQL."
                # "Respect the following schema:\n"
                # "The lastName AND firstName columns in s_org_ext are used interchangeably.\n"
                # + schema_prompt +
                # "Ensure that Greek names remain in Greek, without transliteration. "
                # "Do not provide any explanations, comments, or additional context—only return the SQL query as plain text.\n"
                # "The visits can be found in the combination of tblobstetrichistory_x and tblgynhistory tables since the tblappointment isn't actively used.\n"
                # "for the number of children, you need to count the number of rows in tblpregnancyhistory.\n"
                
                # "Do not use any other tables than the ones provided in the schema.\n" 
                # "Do not use columns that are not in the respective table.\n"
                # "s_org_extID is not in tblobstetrichistory_x so you need to join with tblobstetrichistory.\n"
                # "The lastName AND firstName columns in s_org_ext are used interchangeably.\n"
                # "Ensure the SQL query is syntactically correct and uses only the columns and tables specified in the schema."

            )
        }
    ] + conversation_history + [
        {
            "role": "user",
            "content": f"User query: {user_query}"
        }
    ]
    
    response = openai.ChatCompletion.create(
        model="gpt-4",  # Ensure you're using a chat-based model
        messages=messages,
        max_tokens=250,
        temperature=0,
        top_p=1,
    )
    
    # Extract the SQL query from the assistant's reply
    sql_query = response['choices'][0]['message']['content'].strip()
    return sql_query

def run_query(query_string):
    conn = connect_to_mysql()
    try:
        cursor = conn.cursor()
        cursor.execute(query_string)
        results = cursor.fetchall()
        column_names = [desc[0] for desc in cursor.description]
        df = pd.DataFrame(results, columns=column_names)
        return df
    except Error as e:
        print(e)
        print(query_string)
    finally:
        conn.close()

def synthesize_response(user_query, data, api_key, conversation_history):
    openai.api_key = api_key
    
    # Build a conversation prompt where the data retrieved from the database is included
    data_context = f"Retrieved data: {data}\n"

    # Messages for the chat model (GPT-4) to synthesize the final response
    messages = conversation_history + [
        {
            "role": "system",
            "content": "You are a helpful assistant that provides concise and accurate answers based on user queries and retrieved data."
        },
        {
            "role": "user",
            "content": f"User query: {user_query}"
        },
        {
            "role": "system",
            "content": data_context  # Provide the retrieved data as context for GPT-4
        }
    ]
    
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=messages,
        max_tokens=1000,
        temperature=0,
        top_p=1,
    )
    
    final_response = response['choices'][0]['message']['content'].strip()
    return final_response

# def rag_pipeline(user_query, api_key=None, conversation_history=[]):
#     # Step 1: Get the database schema dynamically
#     schema = get_schema()
    
#     # Step 2: Generate the SQL query from the user's natural language question
#     sql_query = generate_sql_query(user_query, schema, api_key, conversation_history)
    
#     # Step 3: Execute the SQL query and retrieve data from the database
#     result = run_query(sql_query)
    
#     # Step 4: Synthesize the final response using GPT-4
#     final_response = synthesize_response(user_query, result, api_key, conversation_history)
    
#     return final_response, sql_query, result

# # Function to handle the recursive question-answer process
# def ask_questions():
#     conversation_history = []
#     while True:
#         user_query = input("Enter your question (or type 'exit' to quit): ")
#         print(f"Ερώτηση: {user_query}")
        
#         # Exit condition
#         if user_query.lower() == 'exit':
#             print("Exiting the question-answer loop.")
#             break

#         # Process the query using your pipeline
#         final_response, sql_query, result = rag_pipeline(user_query, api_key=api_key, conversation_history=conversation_history)
        
#         # Append the user query and the assistant's response to the conversation history
#         conversation_history.append({"role": "user", "content": user_query})
#         conversation_history.append({"role": "assistant", "content": final_response})
        
#         print(f"Απάντηση: {final_response}") # Print the response for the current query
#         print(f"SQL Query: \n{sql_query}")
#         print(f"Αποτελέσματα: \n{result}")
#         print("--------------------------------------")

148.253.134.213

In [2]:
# ask_questions()

In [3]:
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
from abc import ABC, abstractmethod
import pandas as pd

import os
from dotenv import load_dotenv
from mysql.connector import Error

# Load environment variables
dotenv_path = os.path.join('..', 'keys.env')
load_dotenv(dotenv_path)
api_key = os.getenv("OPENAI_API_KEY")
db_user = os.getenv("DATABASE_USER")
db_password = os.getenv("DATABASE_PASSWORD")

@dataclass
class AgentResponse:
    success: bool
    data: Any
    error: Optional[str] = None

class BaseAgent(ABC):
    def __init__(self, api_key: str):
        self.api_key = api_key
        
    @abstractmethod
    def execute(self, *args, **kwargs) -> AgentResponse:
        pass

class SchemaAgent(BaseAgent):
    def execute(self) -> AgentResponse:
        try:
            schema = get_schema()
            return AgentResponse(success=True, data=schema)
        except Exception as e:
            return AgentResponse(success=False, data=None, error=str(e))

class SQLGenerationAgent(BaseAgent):
    def __init__(self, api_key: str, system_prompt: str):
        super().__init__(api_key)
        self.system_prompt = system_prompt
    
    def execute(self, user_query: str, schema: Dict, conversation_history: List) -> AgentResponse:
        try:
            sql_query = generate_sql_query(user_query, schema, self.api_key, conversation_history)
            return AgentResponse(success=True, data=sql_query)
        except Exception as e:
            return AgentResponse(success=False, data=None, error=str(e))

class QueryExecutionAgent(BaseAgent):
    def execute(self, sql_query: str) -> AgentResponse:
        try:
            result = run_query(sql_query)
            return AgentResponse(success=True, data=result)
        except Exception as e:
            return AgentResponse(success=False, data=None, error=str(e))

class ResponseSynthesisAgent(BaseAgent):
    def execute(self, user_query: str, query_result: pd.DataFrame, conversation_history: List) -> AgentResponse:
        try:
            response = synthesize_response(user_query, query_result, self.api_key, conversation_history)
            return AgentResponse(success=True, data=response)
        except Exception as e:
            return AgentResponse(success=False, data=None, error=str(e))

class RAGOrchestrator:
    def __init__(self, api_key: str):
        self.schema_agent = SchemaAgent(api_key)
        self.sql_agent = SQLGenerationAgent(api_key, system_prompt=self._get_system_prompt())
        self.query_agent = QueryExecutionAgent(api_key)
        self.synthesis_agent = ResponseSynthesisAgent(api_key)
        self.conversation_history = []
    
    def _get_system_prompt(self) -> str:
        return """
        ### Database Schema ###
        {schema}
        
        ### Specific Instructions ###
        1. **Table Usage**:
           - Visits: tblobstetrichistory_x + tblgynhistory
           - Children count: tblpregnancyhistory
        
        2. **Joins**:
           - s_org_extID requires tblobstetrichistory join
        
        3. **Special Rules**:
           - Use lastName/firstName interchangeably
           - Preserve Greek characters
        """
    
    def process_query(self, user_query: str) -> Dict[str, Any]:
        # Get schema
        schema_response = self.schema_agent.execute()
        if not schema_response.success:
            return {"error": schema_response.error}
        
        # Generate SQL
        sql_response = self.sql_agent.execute(
            user_query, 
            schema_response.data, 
            self.conversation_history
        )
        
        # Execute query
        result_response = self.query_agent.execute(sql_response.data)
        
        # Synthesize response
        final_response = self.synthesis_agent.execute(
            user_query,
            result_response.data,
            self.conversation_history
        )
        
        # Update conversation history
        self.conversation_history.append({"role": "user", "content": user_query})
        self.conversation_history.append({"role": "assistant", "content": final_response.data})
        
        return {
            "response": final_response.data,
            "sql_query": sql_response.data,
            "results": result_response.data
        }
    
    def ask_questions(self):
        while True:
            user_query = input("Enter your question (or type 'exit' to quit): ")
            if user_query.lower() == 'exit':
                break
            print(f"Ερώτηση: {user_query}")
            result = self.process_query(user_query)
            print(f"Απάντηση: {result['response']}")
            print(f"SQL Query: \n{result['sql_query']}")
            print(f"Αποτελέσματα: \n{result['results']}")
            print(f"{100*"-"}\n")

In [4]:
# Usage in Jupyter
orchestrator = RAGOrchestrator(api_key)
orchestrator.ask_questions()

Ερώτηση: How many patients do I have?
Απάντηση: You have 1436 patients.
SQL Query: 
SELECT COUNT(*) FROM s_org_ext WHERE IsActive = 1;
Αποτελέσματα: 
   COUNT(*)
0      1436
----------------------------------------------------------------------------------------------------

