In [5]:
import re
import pandas as pd
import psycopg2
import numpy as np
from sentence_transformers import SentenceTransformer
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
import pinecone
import openai
import os
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
import uuid  # For generating unique session IDs

# OpenAI API key
OPENAI_API_KEY = ''
openai.api_key = OPENAI_API_KEY

# Database connection details
DATABASE_HOST = "database-test-postgress-instance.cpk2uyae6iza.ap-south-1.rds.amazonaws.com"
DATABASE_USERNAME = "postgres"
DATABASE_PASSWORD = "valign#123"
DATABASE_DB = "python_test_poc"
PORT = 5432

# Constants
PINECONE_API_KEY = "9fbe58e4-9e72-4023-90eb-ba8d022916b5"  # Replace with your Pinecone API key
INDEX_NAME = "smart-desk"  # Replace with your Pinecone index name
NAMESPACE = "projects"  # Replace with your namespace
MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"

# Initialize Pinecone client
def initialize_pinecone():
    from pinecone import Pinecone, ServerlessSpec
    pc = Pinecone(api_key=PINECONE_API_KEY)
    
    if INDEX_NAME not in pc.list_indexes().names():
        pc.create_index(
            name=INDEX_NAME,
            dimension=768,
            metric='cosine',
            spec=ServerlessSpec(cloud='aws', region='us-west-2')
        )
    return pc.Index(INDEX_NAME)

# Load Hugging Face model for embeddings
def load_huggingface_model():
    return SentenceTransformer(MODEL_NAME)

def connect_to_db():
    try:
        conn = psycopg2.connect(
            dbname=DATABASE_DB,
            user=DATABASE_USERNAME,
            password=DATABASE_PASSWORD,
            host=DATABASE_HOST,
            port=PORT
        )
        return conn
    except psycopg2.Error as e:
        print(f"Error connecting to the database: {e}")
        raise
        
# Function to fetch schema from PostgreSQL database
def fetch_schema(conn):
    try:
        query = """
        SELECT table_name, column_name, data_type
        FROM information_schema.columns
        WHERE table_schema = 'public'
        """
        schema_df = pd.read_sql(query, conn)
        return schema_df
    except Exception as e:
        print(f"Error fetching schema: {e}")
        raise

# Function to process schema: remove special characters and convert to lowercase
def process_schema(schema_df):
    def clean_column_name(name):
        return re.sub(r'[^a-zA-Z]', '', name).lower()

    schema_df['processed_column_name'] = schema_df['column_name'].apply(clean_column_name)
    return schema_df

# Extract relevant entities based on regex and column names
def extract_entities(user_query, schema):
    entities = {
        'project_name': None,
        'owner': None
    }
    project_pattern = re.compile(r'project\s+([a-zA-Z0-9_ ]+)', re.IGNORECASE)
    owner_pattern = re.compile(r'owner\s+of\s+project\s+([a-zA-Z0-9_ ]+)', re.IGNORECASE)
    project_match = project_pattern.search(user_query)
    if project_match:
        entities['project_name'] = project_match.group(1).strip()
    owner_match = owner_pattern.search(user_query)
    if owner_match:
        entities['owner'] = owner_match.group(1).strip()
    return entities

# Query Pinecone for relevant context and augment the input
def query_pinecone_and_augment_input(user_input, entities, namespace):
    embedding_model = load_huggingface_model()
    pinecone_index = initialize_pinecone()
    augmented_input = user_input
    pinecone_data = {}
    for entity_name, entity_value in entities.items():
        if entity_value:
            query_embedding = embedding_model.encode([entity_value])[0]
            query_embedding = np.array(query_embedding, dtype=np.float32)
            try:
                result = pinecone_index.query(
                    namespace=namespace,
                    vector=query_embedding.tolist(),
                    top_k=3,
                    include_values=True,
                    include_metadata=True
                )
                matches = result.get('matches', [])
                if matches:
                    unique_values = [match['metadata'].get('unique_value') for match in matches if 'metadata' in match]
                    if unique_values:
                        pinecone_data[entity_name] = unique_values
                        if len(unique_values) > 1:
                            print(f"Multiple matches found for '{entity_value}':")
                            for idx, unique_value in enumerate(unique_values):
                                print(f"{idx + 1}: {unique_value}")
                            while True:
                                selection = input(f"Please select the most relevant option for '{entity_value}' (1-{len(unique_values)}): ")
                                try:
                                    selected_value = unique_values[int(selection) - 1]
                                    augmented_input = augmented_input.replace(entity_value, selected_value)
                                    break
                                except (IndexError, ValueError):
                                    print("Invalid selection. Please choose a valid option.")
                        else:
                            augmented_input = augmented_input.replace(entity_value, unique_values[0])
                else:
                    print(f"No matches found for {entity_value} in Pinecone.")
            except Exception as e:
                print(f"Error querying Pinecone: {str(e)}")
                return f"Error querying Pinecone: {str(e)}", {}
    return augmented_input, pinecone_data

def generate_sql_query(user_input, processed_schema_df):
    schema_json = processed_schema_df.to_json(orient='records')
    schema_with_types = processed_schema_df[['table_name', 'column_name']].to_dict(orient='records')  # Removed 'data_type'
    
    context = f"""
    ## Database Schema Context
    Schema JSON: {schema_json}
    Detailed Schema: {schema_with_types}

    ## User Input
    Given the following user input: '{user_input}', generate an SQL query.
    Use the LIKE operator for partial matches where appropriate. Handle data type mismatches explicitly.

    ## Instructions
    Based on the user input and the provided schema, generate an accurate SQL query.
    Ensure the query maps correctly to the tables and columns in the database.
    Handle data type casting if necessary to match columns with different types.
    """
    try:
        response = openai.completions.create(
            model="gpt-3.5-turbo-instruct",
            prompt=context,
            max_tokens=500,
            temperature=0.7
        )
        generated_query = response.choices[0].text.strip()
        if generated_query.lower().startswith("the generated sql query is:"):
            generated_query = generated_query[len("The generated SQL query is:"):].strip()
        return generated_query
    except openai.OpenAIError as e:
        print(f"Error generating SQL query: {e}")
        raise


# Initialize OpenAI Chat model
openai_model = ChatOpenAI(
    openai_api_key=OPENAI_API_KEY,
    model_name="gpt-3.5-turbo",
    temperature=0.7,
    max_tokens=150
)

# Create a ChatPromptTemplate with the knowledge base included
template = """
## Knowledge Base:
{knowledge_base}

## Database Schema:
{database_schema}

## Question:
{question}

## Answer:
"""

prompt_template = ChatPromptTemplate.from_template(template)

# Statefully manage chat history
store = {}

def get_session_history(session_id: str):
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

def generate_session_id():
    """Generate a unique session ID using UUID."""
    return str(uuid.uuid4())

session_id = generate_session_id()

chain = prompt_template | openai_model

chain_with_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="query",
    history_messages_key="history"
)

def get_answer_from_chatbot(question, database_schema):
    try:
        prompt = prompt_template.format(
            knowledge_base="",
            database_schema=database_schema,
            question=question
        )
        response = openai_model.invoke(input=prompt)
        parsed_response = response.content.strip() if hasattr(response, 'content') else "No response content found."
        return parsed_response
    except Exception as e:
        return f"Error generating response from OpenAI: {str(e)}"

# Function to execute the SQL query and print the results
def execute_sql_query(conn, sql_query):
    try:
        with conn.cursor() as cursor:
            cursor.execute(sql_query)
            results = cursor.fetchall()
            return results
    except Exception as e:
        print(f"Error executing SQL query: {e}")
        return None

# Determine if user query is related to database or general knowledge
def determine_query_type(user_query, schema_df):
    user_query_lower = user_query.lower()
    
    if any(table.lower() in user_query_lower for table in schema_df['table_name'].unique()) or \
       any(column.lower() in user_query_lower for column in schema_df['column_name'].unique()):
        return "database"
    else:
        return "general"

def main():
    # Connect to the database
    conn = connect_to_db()
    schema_df = fetch_schema(conn)
    processed_schema_df = process_schema(schema_df)

    user_query = input("Please enter your query: ")

    # Extract entities from the user query
    entities = extract_entities(user_query, processed_schema_df)

    # Determine if the query is related to the database or general knowledge
    query_type = determine_query_type(user_query, processed_schema_df)

    if query_type == "database":
        # Query Pinecone and augment input
        augmented_query, pinecone_data = query_pinecone_and_augment_input(user_query, entities, NAMESPACE)

        # Generate SQL query
        sql_query = generate_sql_query(augmented_query, processed_schema_df)
        
        print(f"Generated SQL Query: {sql_query}")

        # Execute SQL query and print results
        results = execute_sql_query(conn, sql_query)
        if results:
            for row in results:
                print(row)
    else:
        # Use OpenAI for general knowledge questions
        answer = get_answer_from_chatbot(user_query, processed_schema_df.to_json())
        print(f"Chatbot Answer: {answer}")

    # Close database connection
    conn.close()

if __name__ == "__main__":
    main()


  schema_df = pd.read_sql(query, conn)


Please enter your query:  give me  the list tasks related to project IIFL Samasta.




Multiple matches found for 'IIFL Samasta':
1: IIFl Samasta CPL CR
2: IIFL Samasta - CGRM
3: IIFL SAMASTA - RPA BOT


Please select the most relevant option for 'IIFL Samasta' (1-3):  3


Generated SQL Query: ## Expected Output
    SELECT *
    FROM tasks
    WHERE project_name LIKE '%IIFL SAMASTA - RPA BOT%';
Error executing SQL query: syntax error at or near "##"
LINE 1: ## Expected Output
        ^



In [None]:
# import os
# import json
# import re
# import psycopg2
# import pandas as pd
# import openai
# from fuzzywuzzy import fuzz

# # OpenAI API key
# OPENAI_API_KEY = 'sk-proj-UnzdWuWBs7ZQRbRPiRCoT3BlbkFJhPM1p7DdZUMklcpnWK1S'
# openai.api_key = OPENAI_API_KEY

# # PostgreSQL database connection details
# DATABASE_HOST = "database-test-postgress-instance.cpk2uyae6iza.ap-south-1.rds.amazonaws.com"
# DATABASE_USERNAME = "postgres"
# DATABASE_PASSWORD = "valign#123"
# DATABASE_DB = "python_test_poc"
# PORT = 5432

# # Function to connect to PostgreSQL database
# def connect_to_db():
#     """Connect to the PostgreSQL database."""
#     try:
#         conn = psycopg2.connect(
#             dbname=DATABASE_DB,
#             user=DATABASE_USERNAME,
#             password=DATABASE_PASSWORD,
#             host=DATABASE_HOST,
#             port=PORT
#         )
#         return conn
#     except psycopg2.Error as e:
#         print(f"Error connecting to the database: {e}")
#         return None

# # Fetch schema with column names and data types
# def fetch_schema_with_data_types(conn):
#     """Fetch the database schema with data types."""
#     try:
#         query = """
#         SELECT table_name, column_name, data_type
#         FROM information_schema.columns
#         WHERE table_schema = 'public'
#         """
#         schema_df = pd.read_sql(query, conn)
#         return schema_df
#     except Exception as e:
#         print(f"Error fetching schema with data types: {e}")
#         return None

# # Format schema as a string for the prompt
# def format_schema(schema_df):
#     """Format the schema DataFrame as a string."""
#     if schema_df is None or schema_df.empty:
#         return "No schema information available."
    
#     schema_str = ""
#     grouped = schema_df.groupby('table_name')
#     for table_name, group in grouped:
#         columns = ', '.join([f"{row['column_name']} ({row['data_type']})" for _, row in group.iterrows()])
#         schema_str += f"{table_name}: {columns}\n"
#     return schema_str

# # Function to generate SQL query using OpenAI
# def generate_sql_query(schema_str, user_input, chat_history):
#     """Generate an SQL query using OpenAI based on the schema and user input."""
#     # Include chat history in the prompt
#     chat_context = "\n".join(chat_history)
    
#     prompt = f"""
#     The database contains the following schema:
#     {schema_str}
    
#     Previous interactions:
#     {chat_context}
    
#     Based on this schema and the user request:
#     "{user_input}"

#     Generate an optimized SQL query that meets the user's intent.
#     The query should be efficient and use the correct table and column names.
#     """

#     try:
#         # Call OpenAI using chat completion API
#         response = openai.chat.completions.create(
#             model="gpt-4o-mini-2024-07-18",
#             messages=[
#                 {"role": "system", "content": "You are a helpful assistant specialized in generating SQL queries, ensuring the use of appropriate operators like LIKE or expressions in sql queries like '% %' for  matches if needed. Accurately map user input to the relevant tables and columns in the database based on the provided schema, using the LIKE operator for partial matches where necessary. Handle data type mismatches explicitly by casting to the appropriate type when required, ensuring correct query execution. Additionally, Manage variations in user input, such as case sensitivity or small spelling differences, using flexible matching techniques to generate precise and reliable SQL queries."},
#                 {"role": "user", "content": prompt}
#             ],
#             max_tokens=500,
#             temperature=0.7
#         )

#         # Extract SQL query from the response
#         sql_response = response.choices[0].message.content.strip()
#         return sql_response
#     except Exception as e:
#         print(f"Error generating SQL query: {e}")
#         return None

# # Extract generated SQL Query
# def extract_sql_query(response):
#     """Extract SQL query from the OpenAI response."""
#     if response:
#         start = response.find("```sql") + len("```sql\n")
#         end = response.find("```", start)
#         sql_query = response[start:end].strip()
#         return sql_query
#     return None

# # Function to execute the SQL query and print the results
# def execute_sql_query(conn, sql_query):
#     """Execute the SQL query and return the results."""
#     try:
#         with conn.cursor() as cursor:
#             cursor.execute(sql_query)
#             results = cursor.fetchall()
#             return results
#     except Exception as e:
#         print(f"Error executing SQL query: {e}")
#         return None

# # Determine if user query is related to database or general knowledge
# def determine_query_type(user_query, schema_df, threshold=75):
#     """Determine if the user query is related to the database or general knowledge."""
#     user_query_lower = user_query.lower()
    
#     # Extract unique table and column names from the schema and convert to lowercase
#     table_names = schema_df['table_name'].str.lower().unique()
#     column_names = schema_df['column_name'].str.lower().unique()
    
#     # Function to check fuzzy match
#     def is_fuzzy_match(query, options, threshold):
#         for option in options:
#             if fuzz.partial_ratio(query, option) >= threshold:
#                 return True
#         return False
    
#     # Check if user query matches any table or column name
#     if is_fuzzy_match(user_query_lower, table_names, threshold) or \
#        is_fuzzy_match(user_query_lower, column_names, threshold):
#         return "database"
    
#     return "knowledge"

# # Main function to handle user queries
# def process_user_query(user_input, chat_history):
#     """Process the user input and determine the appropriate response."""
#     conn = connect_to_db()
#     if conn is None:
#         print("Failed to connect to the database.")
#         return

#     schema_df = fetch_schema_with_data_types(conn)
#     if schema_df is None:
#         print("Failed to fetch schema.")
#         return

#     processed_schema_str = format_schema(schema_df)
#     query_type = determine_query_type(user_input, schema_df)

#     if query_type == "database":
#         sql_query = generate_sql_query(processed_schema_str, user_input, chat_history)
#         sql_query = extract_sql_query(sql_query)
#         if sql_query:
#             print("Generated SQL Query:", sql_query)
            
#             # Execute the generated SQL query
#             results = execute_sql_query(conn, sql_query)
            
#             if results:
#                 print("Query Results:", results)
#                 chat_history.append(f"User: {user_input}\nSQL Query: {sql_query}\nResults: {results}")
#             else:
#                 print("No results returned for the SQL query.")
#         else:
#             print("SQL query generation failed.")
        
#     else:
#         print("This query is not related to the database.")

#     conn.close()

# # Example usage
# if __name__ == "__main__":
#     chat_history = []
#     while True:
#         user_input = input("Enter your query (type 'exit' to quit): ")
#         if user_input.lower() in ['exit', 'quit']:
#             break
#         process_user_query(user_input, chat_history)


In [None]:
# import openai
# import psycopg2
# import pandas as pd
# from langchain_openai import ChatOpenAI
# from langchain.prompts import ChatPromptTemplate
# import os
# import json
# from fuzzywuzzy import fuzz
# import re


# # Database connection details
# DATABASE_HOST = "database-test-postgress-instance.cpk2uyae6iza.ap-south-1.rds.amazonaws.com"
# DATABASE_USERNAME = "postgres"
# DATABASE_PASSWORD = "valign#123"
# DATABASE_DB = "python_test_poc"
# PORT = 5432

# # OpenAI API key initialization
# openai.api_key = 'sk-proj-UnzdWuWBs7ZQRbRPiRCoT3BlbkFJhPM1p7DdZUMklcpnWK1S'

# # Function to connect to PostgreSQL database
# def connect_to_db():
#     try:
#         conn = psycopg2.connect(
#             dbname=DATABASE_DB,
#             user=DATABASE_USERNAME,
#             password=DATABASE_PASSWORD,
#             host=DATABASE_HOST,
#             port=PORT
#         )
#         return conn
#     except psycopg2.Error as e:
#         print(f"Error connecting to the database: {e}")
#         raise

# # Fetch schema with column names and data types
# def fetch_schema_with_data_types(conn):
#     try:
#         query = """
#         SELECT table_name, column_name, data_type
#         FROM information_schema.columns
#         WHERE table_schema = 'public'
#         """
#         schema_df = pd.read_sql(query, conn)
#         return schema_df
#     except Exception as e:
#         print(f"Error fetching schema with data types: {e}")
#         raise

# # Format schema as a string for the prompt
# def format_schema(schema_df):
#     schema_str = ""
#     grouped = schema_df.groupby('table_name')
#     for table_name, group in grouped:
#         columns = ', '.join([f"{row['column_name']} ({row['data_type']})" for _, row in group.iterrows()])
#         schema_str += f"{table_name}: {columns}\n"
#     return schema_str

# # Function to generate SQL query using GPT-4o-mini
# def generate_sql_query(schema_str, user_input):
#     prompt = f"""
#     The database contains the following schema:
#     {schema_str}

#     Based on this schema and the user request:
#     "{user_input}"

#     Generate an optimized SQL query that meets the user's intent.
#     The query should be efficient and use the correct table and column names.
#     """

#     # Call GPT-4o-mini-2024-07-18 model using chat completion API
#     response = openai.chat.completions.create(
#         model="gpt-4o-mini-2024-07-18",
#         messages=[
#             {"role": "system", "content": "You are a helpful assistant specialized in generating SQL queries, always ensuring the use of appropriate operators like LIKE or expressions in sql queries like '% %' for partial matches if needed. Accurately map user input to the relevant tables and columns in the database based on the provided schema, using the LIKE operator for partial matches where necessary. Handle data type mismatches explicitly by casting to the appropriate type when required, ensuring correct query execution. Additionally, Manage variations in user input, such as case sensitivity or small spelling differences, using flexible matching techniques to generate precise and reliable SQL queries.Note do not use ILIKE Operator"},
#             {"role": "user", "content": prompt}
#         ],
#         max_tokens=500,  # Reduced token limit for completion
#         temperature=0.7
#     )

#     # Extract SQL query from the response
#     sql_response = response.choices[0].message.content
#     # Find and clean the SQL query part
#     start = sql_response.find("```sql") + 6
#     end = sql_response.find("```", start)
#     sql_query = sql_response
#     print("Response:",sql_response)
    

#     return sql_query


# # Initialize the chat history
# chat_history = []

# def chat_with_openai(user_message):
#     # Append the user message to the chat history
#     chat_history.append({"role": "user", "content": user_message})

#     # Prepare the messages to send to the OpenAI API
#     messages = [{"role": message['role'], "content": message['content']} for message in chat_history]

#     # Send the chat history to OpenAI
#     response = openai.ChatCompletion.create(
#         model="gpt-3.5-turbo",  # Use the desired model
#         messages=messages,
#         max_tokens=150,  # Adjust the max tokens as needed
#         temperature=0.7  # Adjust the temperature for response variability
#     )

#     # Get the assistant's response
#     assistant_response = response['choices'][0]['message']['content']

#     # Append the assistant's response to the chat history
#     chat_history.append({"role": "assistant", "content": assistant_response})

#     return assistant_response

# # Example usage
# user_input = "What is the capital of France?"
# response = chat_with_openai(user_input)
# print("Assistant:", response)

# # Continue the chat
# follow_up_input = "And what about Germany?"
# response = chat_with_openai(follow_up_input)
# print("Assistant:", response)


# # Extract generated SQL Query
# def extract_sql_query(response):
#     start = response.find("```sql") + len("```sql\n")
#     end = response.find("```", start)
#     sql_query = response[start:end].strip()
#     return sql_query

# # Initialize OpenAI Chat model
# openai_model = ChatOpenAI(
#     openai_api_key=openai.api_key,
#     model_name="gpt-4o-mini-2024-07-18",
#     temperature=0.7,
#     max_tokens=150
# )

# #Generate Response
# # Update the generate_response function
# def generate_response(user_query, sql_result):
#     # Prepare the prompt for GPT-4 to generate the natural language response
#     prompt = f"User query: \"{user_query}\"\nSQL result: {sql_result}\nGenerate a natural language response from the result:"
    
#     # Call the OpenAI Chat API
#     response = openai.chat.completions.create(
#       model="gpt-4o-mini-2024-07-18",
#       messages=[
#           {"role": "user", "content": prompt}
#       ],
#       max_tokens=500,
#       temperature=0.7
#     )
    
#     return response.choices[0].message.content

# # Make sure to replace the completion calls elsewhere in the code

    
# # Create a ChatPromptTemplate with the knowledge base included
# template = """
# ## Knowledge Base:
# {knowledge_base}

# ## Database Schema:
# {database_schema}

# ## Question:
# {question}

# ## Answer:
# """

# prompt_template = ChatPromptTemplate.from_template(template)

# def get_answer_from_chatbot(question, database_schema):
#     try:
#         prompt = prompt_template.format(
#             knowledge_base="",
#             database_schema=database_schema,
#             question=question
#         )
#         response = openai_model.invoke(input=prompt)
#         parsed_response = response.content.strip() if hasattr(response, 'content') else "No response content found."
#         return parsed_response
#     except Exception as e:
#         return f"Error generating response from OpenAI: {str(e)}"
        
# # Function to execute the SQL query and print the results
# def execute_sql_query(conn, sql_query):
#     try:
#         with conn.cursor() as cursor:
#             cursor.execute(sql_query)
#             results = cursor.fetchall()
#             print(results)
#             return results
#     except Exception as e:
#         print(f"Error executing SQL query: {e}")
#         return None
        
# # Determine if user query is related to database or general knowledge
# def determine_query_type(user_query, schema_df, threshold=75):
#     user_query_lower = user_query.lower()
    
#     # Extract unique table and column names from the schema and convert to lowercase
#     table_names = schema_df['table_name'].str.lower().unique()
#     column_names = schema_df['column_name'].str.lower().unique()
    
#     # Function to check fuzzy match
#     def is_fuzzy_match(query, options, threshold):
#         for option in options:
#             if fuzz.partial_ratio(query, option) >= threshold:
#                 return True
#         return False
    
#     # Check if user query matches any table or column name
#     if is_fuzzy_match(user_query_lower, table_names, threshold) or \
#        is_fuzzy_match(user_query_lower, column_names, threshold):
#         return "database"
    
#     return "knowledge"

# # Main function to handle user queries
# def process_user_query(user_input):
#     conn = connect_to_db()
#     schema_df = fetch_schema_with_data_types(conn)
#     processed_schema_str = format_schema(schema_df)
#     query_type = determine_query_type(user_input, schema_df)

#     if query_type == "database":
#         sql_query = generate_sql_query(processed_schema_str, user_input)
#         sql_query=extract_sql_query(sql_query)
        
#         print("Generated SQL Query:", sql_query)
        
#         # Execute the generated SQL query
#         results = execute_sql_query(conn, sql_query)
#         rows=results
        
#         print("Row:",rows)
#         if len(rows)!=0:
#             print(generate_response(user_input,rows))
#         else:
#             print("I'm sorry, but I'm unable to provide results. Could you please clarify your query so I can assist you better?")
        
#         conn.close()
    
#     else:
#         # For non-database related queries, respond using the chatbot
#         print(get_answer_from_chatbot(user_input, processed_schema_str))

# # Example usage
# if __name__ == "__main__":
#     while True:
#         user_input = input("Enter your query: ")
#         if user_input.lower() in ['exit', 'quit']:
#             break
#         response = process_user_query(user_input)
        


In [None]:
# import openai
# import psycopg2
# import pandas as pd
# from langchain_openai import ChatOpenAI
# from langchain.prompts import ChatPromptTemplate
# import os
# import json
# from fuzzywuzzy import fuzz
# import re

# # Database connection details
# DATABASE_HOST = "database-test-postgress-instance.cpk2uyae6iza.ap-south-1.rds.amazonaws.com"
# DATABASE_USERNAME = "postgres"
# DATABASE_PASSWORD = "valign#123"
# DATABASE_DB = "python_test_poc"
# PORT = 5432

# # OpenAI API key initialization
# openai.api_key = 'sk-proj-UnzdWuWBs7ZQRbRPiRCoT3BlbkFJhPM1p7DdZUMklcpnWK1S'

# # Function to connect to PostgreSQL database
# def connect_to_db():
#     try:
#         conn = psycopg2.connect(
#             dbname=DATABASE_DB,
#             user=DATABASE_USERNAME,
#             password=DATABASE_PASSWORD,
#             host=DATABASE_HOST,
#             port=PORT
#         )
#         return conn
#     except psycopg2.Error as e:
#         print(f"Error connecting to the database: {e}")
#         raise

# # Fetch schema with column names and data types
# def fetch_schema_with_data_types(conn):
#     try:
#         query = """
#         SELECT table_name, column_name, data_type
#         FROM information_schema.columns
#         WHERE table_schema = 'public'
#         """
#         schema_df = pd.read_sql(query, conn)
#         return schema_df
#     except Exception as e:
#         print(f"Error fetching schema with data types: {e}")
#         raise

# # Format schema as a string for the prompt
# def format_schema(schema_df):
#     schema_str = ""
#     grouped = schema_df.groupby('table_name')
#     for table_name, group in grouped:
#         columns = ', '.join([f"{row['column_name']} ({row['data_type']})" for _, row in group.iterrows()])
#         schema_str += f"{table_name}: {columns}\n"
#     return schema_str

# # Function to generate SQL query using GPT-4o-mini
# def generate_sql_query(schema_str, user_input):
#     prompt = f"""
#     The database contains the following schema:
#     {schema_str}

#     Based on this schema and the user request:
#     "{user_input}"

#     Generate an optimized SQL query that meets the user's intent.
#     The query should be efficient and use the correct table and column names.
#     """

#     # Call GPT-4o-mini-2024-07-18 model using chat completion API
#     response = client.chat.completions.create(
#         model="gpt-4o-mini-2024-07-18",
#         messages=[
#             {"role": "system", "content": "You are a helpful assistant specialized in generating SQL queries."},
#             {"role": "user", "content": prompt}
#         ],
#         max_tokens=500,  # Reduced token limit for completion
#         temperature=0.7
#     )

#     # Extract SQL query from the response
#     sql_response = response.choices[0].message.content
    
#     # # Find and clean the SQL query part
#     # start = sql_response.find("```sql") + 6
#     # end = sql_response.find("```", start)
#     sql_query = sql_response
    
#     return sql_query

# #extract generatedSQL Query
# def extract_sql_query(response):
#     # Find the start and end indices of the SQL code block
#     start = response.find("```sql") + len("```sql\n")
#     end = response.find("```", start)
    
#     # Extract and clean the SQL query
#     sql_query = response[start:end].strip()
    
#     return sql_query

# # Initialize OpenAI Chat model
# openai_model = ChatOpenAI(
#     openai_api_key=OPENAI_API_KEY,
#     model_name="gpt-3.5-turbo",
#     temperature=0.7,
#     max_tokens=150
# )

# # Create a ChatPromptTemplate with the knowledge base included
# template = """
# ## Knowledge Base:
# {knowledge_base}

# ## Database Schema:
# {database_schema}

# ## Question:
# {question}

# ## Answer:
# """

# prompt_template = ChatPromptTemplate.from_template(template)

# def get_answer_from_chatbot(question, database_schema):
#     try:
#         prompt = prompt_template.format(
#             knowledge_base="",
#             database_schema=database_schema,
#             question=question
#         )
#         response = openai_model.invoke(input=prompt)
#         parsed_response = response.content.strip() if hasattr(response, 'content') else "No response content found."
#         return parsed_response
#     except Exception as e:
#         return f"Error generating response from OpenAI: {str(e)}"
        
# # Function to execute the SQL query and print the results
# def execute_sql_query(conn, sql_query):
#     try:
#         with conn.cursor() as cursor:
#             cursor.execute(sql_query)
#             results = cursor.fetchall()
#             return results
#     except Exception as e:
#         print(f"Error executing SQL query: {e}")
#         return None
        
# # Determine if user query is related to database or general knowledge
# def determine_query_type(user_input, schema_df, threshold = 75):
#     user_query_lower = user_query.lower()
    
#     # Extract unique table and column names from the schema and convert to lowercase
#     table_names = schema_df['table_name'].str.lower().unique()
#     column_names = schema_df['column_name'].str.lower().unique()
    
#     # Function to check fuzzy match
#     def is_fuzzy_match(query, options, threshold):
#         for option in options:
#             if fuzz.partial_ratio(query, option) >= threshold:
#                 return True
#         return False
    
#     # Check if user query matches any table or column name
#     if is_fuzzy_match(user_query_lower, table_names, threshold) or \
#        is_fuzzy_match(user_query_lower, column_names, threshold):
#         return "database"
    
#     return "knowledge"

# # Main function to handle user queries
# def process_user_query(user_input):
#     # Connect to the database and fetch the schema
#     conn = connect_to_db()
#     schema_df = fetch_schema(conn)
#     processed_schema_df = format_schema(schema_df)
#     query_type = determine_query_type(user_input, schema_df)

#     if query_type == "database":
#         sql_query = generate_sql_from_input(user_query, processed_schema_df)
        
#         print("Generated SQL Query:", sql_query)
        
#         # Execute the generated SQL query
#         results = execute_sql_query(conn, sql_query)
#         conn.close()

#         if results is not None:
#             print("Query Results:")
#             for row in results:
#                 print(row)
#         else:
#             print("No results returned or error occurred during query execution.")
        
#         return f"Generated SQL Query: {sql_query}"
    
#     else:
#         # For non-database related queries, respond using the chatbot
#         database_schema = fetch_schema_with_data_types(conn)  # Fetching schema again if needed
#         database_schema_df = process_schema(database_schema)
#         return get_answer_from_chatbot(user_input, database_schema_df.to_dict(orient='records'))


# # Example usage
# if __name__ == "__main__":
#     while True:
#         user_input = input("Enter your query: ")
#         if user_input.lower() in ['exit', 'quit']:
#             break
#         response = process_user_query(user_input)
#         print(response)


# # # Example user input
# # user_input = "who is the most productive user"

# # # Fetch schema and generate SQL query
# # conn = connect_to_db()
# # schema_df = fetch_schema_with_data_types(conn)
# # schema_str = format_schema(schema_df)
# # sql_query = generate_sql_query(schema_str, user_input)
# # sql_query=extract_sql_query(sql_query)
# # results = execute_sql_query(conn, sql_query)
# # print(sql_query)
# # print(results)


In [None]:
# import openai
# import psycopg2
# import pandas as pd

# # Database connection details
# DATABASE_HOST = "database-test-postgress-instance.cpk2uyae6iza.ap-south-1.rds.amazonaws.com"
# DATABASE_USERNAME = "postgres"
# DATABASE_PASSWORD = "valign#123"
# DATABASE_DB = "python_test_poc"
# PORT = 5432

# # OpenAI API key initialization
# openai.api_key = 'sk-proj-UnzdWuWBs7ZQRbRPiRCoT3BlbkFJhPM1p7DdZUMklcpnWK1S'

# # Function to connect to PostgreSQL database
# def connect_to_db():
#     try:
#         conn = psycopg2.connect(
#             dbname=DATABASE_DB,
#             user=DATABASE_USERNAME,
#             password=DATABASE_PASSWORD,
#             host=DATABASE_HOST,
#             port=PORT
#         )
#         return conn
#     except psycopg2.Error as e:
#         print(f"Error connecting to the database: {e}")
#         raise

# # Fetch schema with column names and data types
# def fetch_schema_with_data_types(conn):
#     try:
#         query = """
#         SELECT table_name, column_name, data_type
#         FROM information_schema.columns
#         WHERE table_schema = 'public'
#         """
#         schema_df = pd.read_sql(query, conn)
#         return schema_df
#     except Exception as e:
#         print(f"Error fetching schema with data types: {e}")
#         raise

# # Format schema as a string for the prompt
# def format_schema(schema_df):
#     schema_str = ""
#     grouped = schema_df.groupby('table_name')
#     for table_name, group in grouped:
#         columns = ', '.join([f"{row['column_name']} ({row['data_type']})" for _, row in group.iterrows()])
#         schema_str += f"{table_name}: {columns}\n"
#     return schema_str

# # Function to generate SQL query using GPT-4o-mini
# def generate_sql_query(schema_str, user_input):
#     prompt = f"""
#     The database contains the following schema:
#     {schema_str}

#     Based on this schema and the user request:
#     "{user_input}"

#     Generate an optimized SQL query that meets the user's intent.
#     The query should be efficient and use the correct table and column names.
#     """

#     # Call GPT-4o-mini-2024-07-18 model using chat completion API
#     response = client.chat.completions.create(
#         model="gpt-4o-mini-2024-07-18",
#         messages=[
#             {"role": "system", "content": "You are a helpful assistant specialized in generating SQL queries."},
#             {"role": "user", "content": prompt}
#         ],
#         max_tokens=500,  # Reduced token limit for completion
#         temperature=0.7
#     )

#     # Extract SQL query from the response
#     sql_response = response.choices[0].message.content
    
#     # # Find and clean the SQL query part
#     # start = sql_response.find("```sql") + 6
#     # end = sql_response.find("```", start)
#     sql_query = sql_response
    
#     return sql_query

# def extract_sql_query(response):
#     # Find the start and end indices of the SQL code block
#     start = response.find("```sql") + len("```sql\n")
#     end = response.find("```", start)
    
#     # Extract and clean the SQL query
#     sql_query = response[start:end].strip()
    
#     return sql_query
    
# # Example user input
# user_input = "which is the longest delayed or lagged milestone name"

# # Fetch schema and generate SQL query
# conn = connect_to_db()
# schema_df = fetch_schema_with_data_types(conn)
# schema_str = format_schema(schema_df)
# sql_query = generate_sql_query(schema_str, user_input)
# sql_query=extract_sql_query(sql_query)
# # Print the cleaned SQL query
# print("Generated SQL Query:")
# print(sql_query)
