## Importing libraries

In [1]:
from dotenv import load_dotenv
import os
from neo4j import GraphDatabase
from py2neo import Graph
import pandas as pd
import neo4j


In [None]:
import pathlib
import textwrap

import google.generativeai as genai  # genai library
from datetime import datetime
from IPython.display import display
from IPython.display import Markdown


In [5]:
load_dotenv() # Loading env file and reading credentials
password=os.getenv('password')
user=os.getenv('user')
Google_API_KEY=os.getenv('Google_api_key') #Loading the keys

### Connecting to Neo4j database and example cyphers

In [6]:

uri = "bolt://localhost:7687"
user = user
password = password
#database_name = "SchoolGPT.SchoolAI_dbms"  # Specify your database name
# add db name to avoid starting theough neo4j desktop
driver = GraphDatabase.driver(uri, auth=(user, password))

In [None]:
query="MATCH (n)-[r]->(m) RETURN n, r, m LIMIT 10"
            #"MATCH (n1:Node {name: 'system'}) RETURN n1 as node_name" #write proper queries
with driver.session() as session:
    result = session.run(query)
    n = [record['m'] for record in result]

In [None]:
n ## sample query and output

### Displaying cypher query responses as pandas df

In [9]:
#Function to query the graph and return the results as a pandas dataframe
def run_query_and_return_df(query):
    with driver.session() as session:
        result = session.run(query)
        return pd.DataFrame([r.values() for r in result], columns=result.keys())


In [None]:
query1 = """
MATCH (n)-[r]->(m)
WHERE r.description contains 'first order reaction'
RETURN n.name as concept_1, m.name as concept_2,
r.edge as r_edge, r.description as r_description
"""
run_query_and_return_df(query1)


### Leveraging LLM to generate cypher queries

In [11]:
#From Google Gemini AI to 
def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [70]:
# Add a few more examples
examples = """
# What are some of the concepts directly related to "internal energy"?
MATCH (n)-[r]->(m)
WHERE n.name CONTAINS 'internal energy' AND r.edge contains 'related'
RETURN n.name as concept_1,
r.edge as r_edge, r.description as r_description, m.name as concept_2

# What is a first order reaction and associated entities?
MATCH (n)-[r]->(m)
WHERE r.description contains 'first order reaction'
RETURN n.name as concept_1, m.name as concept_2,
r.edge as r_edge, r.description as r_description


# What is a first order reaction?
MATCH (n)-[r]->(m)
WHERE r.description contains 'first order reaction'
RETURN r.description as r_description


# What can you tell me about the homogeneous systems?
MATCH (n)-[r]->(m)
WHERE n.name CONTAINS 'homogeneous system' or m.name contains 'homogeneous system' or r.description contains 'homogeneous system' 
RETURN n.name as concept_1,
r.edge as r_edge, r.description as r_description, m.name as concept_2

# What are all the nodes related to chemical reaction
MATCH (n:Concept {name: 'chemical equation'})--(m)
RETURN n.name,m.name


# Find all relationships for a specific keyword.
MATCH ()-[r:RELATIONSHIP]->()
where r.description contains 'energy'
RETURN r.edge,r.description


"""


***Best practices when adding content in the context field of your prompt: Give the chatbot an identity and persona.An identity and persona helps the chatbot role play***

In [13]:
## Creating system prompt for generating cyphers for Neo4jDB


sys_prompt=f""" You are an AI system specializes in generating Neo4jDB Cypher queries based on example Cypher queries.
Example Neo4jDB Cypher queries are: \n {examples} \n
You will refrain from providing explanations or additional information and solely focus on generating the Neo4jDB cypher queries.
You will strictly adhere to generating Neo4jDB cypher queries based on the given examples.
However, if the context of the conversation is insufficient, you will inform the user and specify the missing context.
I repeat, if the context of the conversation is insufficient please inform the user and specify the missing context.
"""
#Do not provide any AQL queries that can't be deduced from AQL query examples. 


In [71]:
genai.configure(api_key=Google_API_KEY) #passing the api key

#Passing system_instruction
model=genai.GenerativeModel('gemini-1.5-pro-latest',system_instruction=[sys_prompt]) # calling the gen model

In [72]:
# Passing conversation history

chat = model.start_chat(history=[
    {
        'role': 'user',
        'parts':{ 'text': "You are an AI system specializing in generating Neo4jDB Cypher queries based on example Cypher queries.You will refrain from providing explanations or additional information and solely focus on generating the Neo4jDB Cypher queries. You will strictly adhere to generating Neo4jDB Cypher queries based on the given examples. However, if the context of the conversation is insufficient, you will inform the user and specify the missing context."
                }
    },
    {
        'role': 'model',
        'parts': {
         
                'text': "Understood. I will only be writing Cypher queries for Neo4j DB."
            }
        
    }
])


In [73]:

user_input="Find all relationships for keyword 'quasi-static process'"#"What can you tell me about the 'quasi-static process'?"
response = chat.send_message(user_input,stream=False)

In [65]:
## Passing function for chat response
def get_chat_response(prompt: str):
    text_response = []
    responses = chat.send_message(prompt, stream=False)
    for chunk in responses:
        text_response.append(chunk.text)
    return "".join(text_response)

In [74]:
user_input="Find all relationships for keyword 'quasi-static process'"
response=get_chat_response(user_input)

In [75]:
response

"MATCH ()-[r:RELATIONSHIP]->()\nwhere r.description contains 'quasi-static process'\nRETURN r.edge,r.description "

### Processing the cyphers and validating them

Note: have to standardise the output 

In [41]:
##Function to clean up the cypher
import re
pattern = pattern = r'```cypher\n(.*?)\n```'
match = re.sub(pattern,"", cypher, flags=re.DOTALL)
# Define the regex pattern to match the Cypher query
pattern = r"```cypher(.*?)```"

# Extract the Cypher query using regex
cypher_query = re.search(pattern, cypher, re.DOTALL).group(1).strip()
cypher_query

pattern = r"#(.*?)?"

cypher_query = re.sub(pattern,"", cypher_query, re.DOTALL)
cypher_query

In [69]:
##Final pattern 
pattern = r"```cypher(.*?)```"

# Extract the Cypher query using regex
cypher_query = re.search(pattern, response, re.DOTALL).group(1).strip()
try:
        print(cypher_query.split("?")[1])
except:
         print(cypher_query.split(":")[1])
    


MATCH ()-[r


In [84]:
# check for valid Neo4j cypher query
def is_valid_cypher_query(query):
    try:
        with driver.session() as session:
                result = session.run(query)
                return pd.DataFrame([r.values() for r in result], columns=result.keys())

    except Exception:
        return False

In [85]:
is_valid_cypher_query(response)
#session.run(response)

Unnamed: 0,r.edge,r.description
0,can-be,A quasi-static process can be an ideal gas in ...
1,is-a,a quasi-static process is a hypothetical const...
2,is-a,A quasi-static process is a nearly static proc...
3,is-a,A quasi-static process is a process in which t...
4,requires,A quasi-static process is a process that is in...
5,excludes,A quasi-static process is a process without di...
6,is-a,A quasi-static process is a reversible process...
7,is-a,"A quasi-static process is a slow process, duri..."
8,part-of,a quasi-static process is a specific type of p...
9,is-related-to,A quasi-static process is an idealized notion.


## References

1. https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/send-chat-prompts-gemini
2.https://cloud.google.com/vertex-ai/generative-ai/docs/chat/chat-prompts
3. https://www.googlecloudcommunity.com/gc/AI-ML/Gemini-Pro-Context-Option/m-p/684704/highlight/true#M4159
4.https://www.kaggle.com/code/rahulsingh04/chat-with-resume
