In [1]:
import openai
import psycopg2
import pandas as pd
import re

# Initialize OpenAI API key
OPENAI_API_KEY = ""
openai.api_key = OPENAI_API_KEY

# Database connection details
DATABASE_HOST = "database-test-postgress-instance.cpk2uyae6iza.ap-south-1.rds.amazonaws.com"
DATABASE_USERNAME = "postgres"
DATABASE_PASSWORD = "valign#123"
DATABASE_DB = "python_test_poc"
PORT = 5432

# Function to connect to PostgreSQL database
def connect_to_db():
    try:
        conn = psycopg2.connect(
            dbname=DATABASE_DB,
            user=DATABASE_USERNAME,
            password=DATABASE_PASSWORD,
            host=DATABASE_HOST,
            port=PORT
        )
        return conn
    except psycopg2.Error as e:
        print(f"Error connecting to the database: {e}")
        raise

# Function to fetch schema from PostgreSQL database
def fetch_schema(conn):
    try:
        query = """
        SELECT table_name, column_name
        FROM information_schema.columns
        WHERE table_schema = 'public'
        """
        schema_df = pd.read_sql(query, conn)
        return schema_df
    except Exception as e:
        print(f"Error fetching schema: {e}")
        raise

# Function to process schema: remove special characters and convert to lowercase
def process_schema(schema_df):
    def clean_column_name(name):
        return re.sub(r'[^a-zA-Z]', '', name).lower()

    schema_df['processed_column_name'] = schema_df['column_name'].apply(clean_column_name)
    return schema_df

# Function to extract features (like project name, owner, etc.) using OpenAI's LLM
def extract_features_with_openai(user_input, processed_schema_df):
    schema_json = processed_schema_df.to_json(orient='records')
    
    # Refined prompt to ensure OpenAI only returns valid matches
    prompt = f"""
    ## Database Schema Context:
    The following represents the columns available in the database:
    {schema_json}

    ## User Input:
    The user has provided the following input: "{user_input}"

    ## Task:
    Extract the relevant features or values from the user input based on the schema. These features might include project names, owners, dates, statuses, etc. 

    ## Instructions:
    - Return the fields that have valid values based on the user input in a JSON dictionary format.
    - Omit any fields where the value is empty or null.
    - Format the output as a JSON object with keys only for fields that have values.
    - Example:
      {{
        "project_name": "Some Project",
        "owner": "Some Owner"
      }}
    """

    try:
        response = openai.completions.create(
            model="gpt-3.5-turbo-instruct",
            prompt=prompt,
            max_tokens=500,
            temperature=0.5
        )
        extracted_features = response.choices[0].text.strip()
        return extracted_features
    except openai.OpenAIError as e:
        print(f"Error with OpenAI: {e}")
        raise

# Function to parse and store extracted features into a dictionary
def parse_extracted_features(extracted_features):
    try:
        feature_dict = eval(extracted_features)  # Convert string representation to dictionary
        # Ensure values are valid and not placeholders
        cleaned_dict = {k: v for k, v in feature_dict.items() if v and v.lower() not in {'no relevant value found', 'none', 'n/a', 'not specified'}}
        return cleaned_dict
    except (SyntaxError, ValueError) as e:
        print(f"Error parsing features: {e}")
        return {}

# Main function to process user input and extract entities
def process_user_input(user_input):
    # Connect to DB and fetch schema
    conn = connect_to_db()
    schema_df = fetch_schema(conn)
    processed_schema_df = process_schema(schema_df)

    # Extract features from user input using OpenAI's LLM
    extracted_features = extract_features_with_openai(user_input, processed_schema_df)
    
    # Print the extracted features
    print("Extracted Features from OpenAI:")
    print(extracted_features)

    # Parse extracted features into a dictionary
    feature_dict = parse_extracted_features(extracted_features)
    
    # Print the parsed feature dictionary
    print("\nParsed Feature Dictionary:")
    print(feature_dict)

# Get user input
user_input = input("Enter your query: ")

# Process user input and extract entities
process_user_input(user_input)


Enter your query:  what is the status of a project IIFL Samasta


  schema_df = pd.read_sql(query, conn)


Extracted Features from OpenAI:
## Output:
    {
        "project_name": "IIFL Samasta",
        "status": "in progress"
    }
Error parsing features: unexpected indent (<string>, line 2)

Parsed Feature Dictionary:
{}


In [2]:
# import openai
# import psycopg2
# import pandas as pd
# import re
# import json

# # Initialize OpenAI API key
# OPENAI_API_KEY = "sk-proj-UnzdWuWBs7ZQRbRPiRCoT3BlbkFJhPM1p7DdZUMklcpnWK1S"
# openai.api_key = OPENAI_API_KEY

# # Database connection details
# DATABASE_HOST = "database-test-postgress-instance.cpk2uyae6iza.ap-south-1.rds.amazonaws.com"
# DATABASE_USERNAME = "postgres"
# DATABASE_PASSWORD = "valign#123"
# DATABASE_DB = "python_test_poc"
# PORT = 5432

# # Function to connect to PostgreSQL database
# def connect_to_db():
#     try:
#         conn = psycopg2.connect(
#             dbname=DATABASE_DB,
#             user=DATABASE_USERNAME,
#             password=DATABASE_PASSWORD,
#             host=DATABASE_HOST,
#             port=PORT
#         )
#         return conn
#     except psycopg2.Error as e:
#         print(f"Error connecting to the database: {e}")
#         raise

# # Function to fetch schema from PostgreSQL database
# def fetch_schema(conn):
#     try:
#         query = """
#         SELECT table_name, column_name
#         FROM information_schema.columns
#         WHERE table_schema = 'public'
#         """
#         schema_df = pd.read_sql(query, conn)
#         return schema_df
#     except Exception as e:
#         print(f"Error fetching schema: {e}")
#         raise

# # Function to process schema: remove special characters and convert to lowercase
# def process_schema(schema_df):
#     def clean_column_name(name):
#         return re.sub(r'[^a-zA-Z]', '', name).lower()

#     schema_df['processed_column_name'] = schema_df['column_name'].apply(clean_column_name)
#     return schema_df

# # Function to extract features (like project name, owner, etc.) using OpenAI's LLM
# def extract_features_with_openai(user_input, processed_schema_df):
#     schema_json = processed_schema_df.to_json(orient='records')
    
#     # Refined prompt to ensure OpenAI only returns valid matches
#     prompt = f"""
#     ## Database Schema Context:
#     The following represents the columns available in the database:
#     {schema_json}

#     ## User Input:
#     The user has provided the following input: "{user_input}"

#     ## Task:
#     Extract the relevant features or values from the user input based on the schema. These features might include project names, owners, dates, statuses, etc. 

#     ## Instructions:
#     - Return the fields that have valid values based on the user input in a JSON dictionary format.
#     - Omit any fields where the value is empty or null.
#     - Format the output as a JSON object with keys only for fields that have values.
#     - Example:
#       {{
#         "project_name": "Some Project",
#         "owner": "Some Owner"
#       }}
#     """

#     try:
#         response = openai.completions.create(
#             model="gpt-3.5-turbo-instruct",
#             prompt=prompt,
#             max_tokens=500,
#             temperature=0.5
#         )
#         extracted_features = response.choices[0].text.strip()
#         print("Raw Extracted Features:")
#         print(extracted_features)  # Print the raw output for debugging
#         return extracted_features
#     except openai.OpenAIError as e:
#         print(f"Error with OpenAI: {e}")
#         raise
      
# # Main function to process user input and extract entities
# def process_user_input(user_input):
#     # Connect to DB and fetch schema
#     conn = connect_to_db()
#     schema_df = fetch_schema(conn)
#     processed_schema_df = process_schema(schema_df)

#     # Extract features from user input using OpenAI's LLM
#     extracted_features = extract_features_with_openai(user_input, processed_schema_df)
    

# # Get user input
# user_input = input("Enter your query: ")

# # Process user input and extract entities
# process_user_input(user_input)


In [3]:
# #Extract entities or features from user input
# import openai
# import psycopg2
# import pandas as pd
# import re
# import json

# # Initialize OpenAI API key
# OPENAI_API_KEY = "sk-proj-UnzdWuWBs7ZQRbRPiRCoT3BlbkFJhPM1p7DdZUMklcpnWK1S"
# openai.api_key = OPENAI_API_KEY

# # Database connection details
# DATABASE_HOST = "database-test-postgress-instance.cpk2uyae6iza.ap-south-1.rds.amazonaws.com"
# DATABASE_USERNAME = "postgres"
# DATABASE_PASSWORD = "valign#123"
# DATABASE_DB = "python_test_poc"
# PORT = 5432

# # Function to connect to PostgreSQL database
# def connect_to_db():
#     try:
#         conn = psycopg2.connect(
#             dbname=DATABASE_DB,
#             user=DATABASE_USERNAME,
#             password=DATABASE_PASSWORD,
#             host=DATABASE_HOST,
#             port=PORT
#         )
#         return conn
#     except psycopg2.Error as e:
#         print(f"Error connecting to the database: {e}")
#         raise

# # Function to fetch schema from PostgreSQL database
# def fetch_schema(conn):
#     try:
#         query = """
#         SELECT table_name, column_name
#         FROM information_schema.columns
#         WHERE table_schema = 'public'
#         """
#         schema_df = pd.read_sql(query, conn)
#         return schema_df
#     except Exception as e:
#         print(f"Error fetching schema: {e}")
#         raise

# # Function to process schema: remove special characters and convert to lowercase
# def process_schema(schema_df):
#     def clean_column_name(name):
#         return re.sub(r'[^a-zA-Z]', '', name).lower()

#     schema_df['processed_column_name'] = schema_df['column_name'].apply(clean_column_name)
#     return schema_df

# # Function to extract features (like project name, owner, etc.) using OpenAI's LLM
# def extract_features_with_openai(user_input, processed_schema_df):
#     schema_json = processed_schema_df.to_json(orient='records')
    
#     # Refined prompt to ensure OpenAI only returns valid matches
#     prompt = f"""
#     ## Database Schema Context:
#     The following represents the columns available in the database:
#     {schema_json}

#     ## User Input:
#     The user has provided the following input: "{user_input}"

#     ## Task:
#     Extract the relevant features or values from the user input based on the schema. These features might include project names, owners, dates, statuses, etc. 

#     ## Instructions:
#     - Return the fields that have valid values based on the user input in a JSON dictionary format.
#     - Omit any fields where the value is empty or null.
#     - Format the output as a JSON object with keys only for fields that have values.
#     """

#     try:
#         response = openai.completions.create(
#             model="gpt-3.5-turbo-instruct",
#             prompt=prompt,
#             max_tokens=500,
#             temperature=0.5
#         )
#         extracted_features = response.choices[0].text.strip()
#         return extracted_features
#     except openai.OpenAIError as e:
#         print(f"Error with OpenAI: {e}")
#         raise

# # Function to remove null, None, empty values from JSON and list
# def clean_extracted_features(feature_dict):
#     # Remove any keys with None or empty values
#     cleaned_feature_dict = {k: v for k, v in feature_dict.items() if v}
#     # Extract the non-null values into a list
#     feature_list = list(cleaned_feature_dict.values())
#     return cleaned_feature_dict, feature_list

# # Function to parse and process extracted features
# def process_extracted_features(extracted_features):
#     try:
#         # Remove the "## Solution:" part and any other non-JSON text
#         json_match = re.search(r'\{.*\}', extracted_features, re.DOTALL)
        
#         if json_match:
#             # Extract the JSON part from the matched result
#             cleaned_features = json_match.group(0)

#             # Convert JSON string to a Python dictionary
#             feature_dict = json.loads(cleaned_features)

#             # Clean feature dictionary and feature list to remove nulls and empty values
#             cleaned_feature_dict, feature_list = clean_extracted_features(feature_dict)

#             # Return cleaned JSON and feature list
#             return json.dumps(cleaned_feature_dict, indent=4), feature_list
#         else:
#             return None, []
#     except (json.JSONDecodeError, ValueError) as e:
#         print(f"Error parsing features: {e}")
#         return None, []

# # Main function to process user input and extract entities
# def process_user_input(user_input):
#     # Connect to DB and fetch schema
#     conn = connect_to_db()
#     schema_df = fetch_schema(conn)
#     processed_schema_df = process_schema(schema_df)

#     # Extract features from user input using OpenAI's LLM
#     extracted_features = extract_features_with_openai(user_input, processed_schema_df)

#     # Process the extracted features and clean them
#     cleaned_json, feature_list = process_extracted_features(extracted_features)

#     # Output cleaned JSON and feature list
#     if cleaned_json:
#         print("Cleaned Extracted JSON Features:")
#         print(cleaned_json)
    
#     print("Feature List:")
#     print(feature_list)

# # Get user input
# user_input = input("Enter your query: ")

# # Process user input and extract entities
# process_user_input(user_input)
