# Prompt Strategies to improve LLM Results using KG

# Setup 

#### Load '.env'

In [42]:
from dotenv import load_dotenv
import os

load_dotenv()

## Openai
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")


# Nvidia
os.environ["NVIDIA_API_KEY"] = os.getenv("NVIDIA_API_KEY")

## langsmith tracking
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")

## Huggingface
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

## Neo4j
os.environ["NEO4J_URI"] = os.getenv("NEO4J_URI")
os.environ["NEO4J_USERNAME"] = os.getenv("NEO4J_USERNAME")
os.environ["NEO4J_PASSWORD"] = os.getenv("NEO4J_PASSWORD")


#### Load Graph DB

In [43]:
from langchain_community.graphs import Neo4jGraph

graph = Neo4jGraph(
    url=os.environ["NEO4J_URI"],
    username=os.environ["NEO4J_USERNAME"],
    password=os.environ["NEO4J_PASSWORD"],
)

In [44]:
graph.refresh_schema()
print(graph.schema)

Node properties:
CEO {name: STRING, POB: STRING, YOB: INTEGER}
Company {name: STRING}
User {name: STRING, city: STRING, userId: INTEGER, age: INTEGER}
Post {userId: INTEGER}
Movie {movieId: INTEGER, title: STRING, imdbRating: FLOAT}
Director {name: STRING}
Actor {name: STRING}
Genre {name: STRING}
Relationship properties:

The relationships:
(:CEO)-[:CEO]->(:Company)
(:User)-[:POSTED]->(:Post)
(:User)-[:FRIEND]->(:User)
(:User)-[:LIKES]->(:User)
(:Movie)-[:DIRECTED_BY]->(:Director)
(:Movie)-[:HAS_ACTOR]->(:Actor)
(:Movie)-[:HAS_GENRE]->(:Genre)
(:Director)-[:DIRECTOR_OF]->(:Movie)
(:Actor)-[:ACTOR_OF]->(:Movie)
(:Genre)-[:GENRE_OF]->(:Movie)


#### Chose LLM

In [45]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# Cypher Chain

In [46]:
from langchain.chains import GraphCypherQAChain

chain = GraphCypherQAChain.from_llm(
    llm=llm, 
    graph=graph,
    verbose=True,
    allow_dangerous_requests=True,
    return_intermediate_steps=True,
    use_function_response=True,
    top_k=5,
    #exclude_nodes=["Movie", "Person"],
    #exclude_types=["Movie", "Person"],
    )


In [47]:
chain

GraphCypherQAChain(verbose=True, graph=<langchain_community.graphs.neo4j_graph.Neo4jGraph object at 0x73c43b582980>, cypher_generation_chain=LLMChain(prompt=PromptTemplate(input_variables=['question', 'schema'], template='Task:Generate Cypher statement to query a graph database.\nInstructions:\nUse only the provided relationship types and properties in the schema.\nDo not use any other relationship types or properties that are not provided.\nSchema:\n{schema}\nNote: Do not include any explanations or apologies in your responses.\nDo not respond to any questions that might ask anything else than for you to construct a Cypher statement.\nDo not include any text except the generated Cypher statement.\n\nThe question is:\n{question}'), llm=ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x73c43b403130>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x73c43b46a200>, root_client=<openai.OpenAI object at 0x73c43b4

In [48]:
chain.schema

<bound method BaseModel.schema of <class 'langchain_community.chains.graph_qa.cypher.GraphCypherQAChain'>>

In [49]:
chain.graph.schema

'Node properties:\nCEO {name: STRING, POB: STRING, YOB: INTEGER}\nCompany {name: STRING}\nUser {name: STRING, city: STRING, userId: INTEGER, age: INTEGER}\nPost {userId: INTEGER}\nMovie {movieId: INTEGER, title: STRING, imdbRating: FLOAT}\nDirector {name: STRING}\nActor {name: STRING}\nGenre {name: STRING}\nRelationship properties:\n\nThe relationships:\n(:CEO)-[:CEO]->(:Company)\n(:User)-[:POSTED]->(:Post)\n(:User)-[:FRIEND]->(:User)\n(:User)-[:LIKES]->(:User)\n(:Movie)-[:DIRECTED_BY]->(:Director)\n(:Movie)-[:HAS_ACTOR]->(:Actor)\n(:Movie)-[:HAS_GENRE]->(:Genre)\n(:Director)-[:DIRECTOR_OF]->(:Movie)\n(:Actor)-[:ACTOR_OF]->(:Movie)\n(:Genre)-[:GENRE_OF]->(:Movie)'

# Provide FewShot Prompt

In [None]:
examples = [
    {
        "question": "How many artists are there?",
        "query": "MATCH (a:Person)-[:ACTED_IN]->(:Movie) RETURN count(DISTINCT a)",
    },
    {
        "question": "Which actors played in the movie Casino?",
        "query": "MATCH (m:Movie {{title: 'Casino'}})<-[:ACTED_IN]-(a) RETURN a.name",
    },
    {
        "question": "How many movies has Tom Hanks acted in?",
        "query": "MATCH (a:Person {name: 'Tom Hanks'})-[:ACTED_IN]->(m:Movie) RETURN count(m)",
    },
    {
        "question": "List all the genres of the movie Schindler's List",
        "query": "MATCH (m:Movie {{title: 'Schindler\\'s List'}})-[:IN_GENRE]->(g:Genre) RETURN g.name",
    },
    {
        "question": "Which actors have worked in movies from both the comedy and action genres?",
        "query": "MATCH (a:Person)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name",
    },
    {
        "question": "Which directors have made movies with at least three different actors named 'John'?",
        "query": "MATCH (d:Person)-[:DIRECTED]->(m:Movie)<-[:ACTED_IN]-(a:Person) WHERE a.name STARTS WITH 'John' WITH d, COUNT(DISTINCT a) AS JohnsCount WHERE JohnsCount >= 3 RETURN d.name",
    },
    {
        "question": "Identify movies where directors also played a role in the film.",
        "query": "MATCH (p:Person)-[:DIRECTED]->(m:Movie), (p)-[:ACTED_IN]->(m) RETURN m.title, p.name",
    },
    {
        "question": "Find the actor with the highest number of movies in the database.",
        "query": "MATCH (a:Actor)-[:ACTED_IN]->(m:Movie) RETURN a.name, COUNT(m) AS movieCount ORDER BY movieCount DESC LIMIT 1",
    },
]

In [None]:
from langchain_core.prompts import FewShotPromptTemplate,PromptTemplate

example_prompt=PromptTemplate.from_template(
    "User input:{question}\n Cypher query:{query}"
)

prompt=FewShotPromptTemplate(
    examples=examples[:5],
    example_prompt=example_prompt,
    prefix="You are a Neo4j expert. Given an input question,create a syntactically very accurate Cypher query",
    suffix="User input: {question}\nCypher query: ",
    input_variables=["question","schema"]
)

In [62]:
prompt

FewShotPromptTemplate(input_variables=['question', 'schema'], examples=[{'question': 'How many actors are there?', 'cypher_query': 'MATCH (a:Actor) RETURN count(a)'}, {'question': 'Which actors played in the movie Casino?', 'cypher_query': "MATCH (m:Movie {title: 'Casino'})<-[:ACTOR_OF]-(a:Actor) RETURN a.name"}, {'question': 'How many movies has Tom Hanks acted in?', 'cypher_query': "MATCH (a:Actor {name: 'Tom Hanks'})-[:ACTOR_OF]->(m:Movie) RETURN count(m)"}, {'question': "List all the genres of the movie Schindler's List", 'cypher_query': "MATCH (m:Movie {title: 'Schindler\\'s List'})-[:HAS_GENRE]->(g:Genre) RETURN g.name"}, {'question': 'Which actors have worked in movies from both the comedy and action genres?', 'cypher_query': "MATCH (a:Actor)-[:ACTOR_OF]->(:Movie)-[:HAS_GENRE]->(g1:Genre), (a)-[:ACTOR_OF]->(:Movie)-[:HAS_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name"}], example_prompt=PromptTemplate(input_variables=['cypher_query', 'qu

In [63]:
chain = GraphCypherQAChain.from_llm(
    llm=llm, 
    graph=graph,
    verbose=True,
    allow_dangerous_requests=True,
    return_intermediate_steps=True,
    use_function_response=True,
    top_k=5,
    cypher_prompt=prompt,
    )

In [None]:
chain.invoke({"query":"Which actors played in the movie Casino?"})

Error in StdOutCallbackHandler.on_chain_start callback: AttributeError("'NoneType' object has no attribute 'get'")


KeyError: 'title'