# Setup

### Imports

In [81]:
# setup venv
# !pip install langchain langchain-community neo4j openai wikipedia tiktoken langchain-openai torch pinecone
# %pip install llama-index-embeddings-openai
# %pip install llama-index-vector-stores-pinecone
# %pip install llama-index-llms-openai
# !pip install llama-index
# !pip -q install python-dotenv pinecone-client llama-index pymupdf

In [82]:
import os
import torch
import torch.nn as nn
from dotenv import load_dotenv
from openai import OpenAI
from langchain.graphs import Neo4jGraph
from tqdm import tqdm
import json
import uuid

import pinecone
from pinecone import Pinecone, ServerlessSpec

from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.schema import TextNode
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import VectorStoreIndex
from llama_index.core import StorageContext
# from llama_index.vector_stores.pinecone import PineconeVectorStore

from langchain_community.graphs.graph_document import (
    Node as BaseNode,
    Relationship as BaseRelationship,
    GraphDocument,
)
from langchain.schema import Document
from typing import List, Dict, Any, Optional
from langchain.pydantic_v1 import Field, BaseModel
from langchain.chains import GraphCypherQAChain

### Env and tooling

In [83]:
load_dotenv(dotenv_path='secrets.env')

True

In [84]:
api_key = os.environ['PINECONE_API_KEY']
environment = os.environ['PINECONE_ENVIRONMENT']
pinecone = Pinecone(api_key=api_key, environment=environment)
index_name = "knowledge-graph"
pinecone_index = pinecone.Index(index_name)
# vector_store = PineconeVectorStore(pinecone_index=pinecone_index)

In [85]:
load_dotenv(dotenv_path='secrets.env')
graph = Neo4jGraph(
    url=os.environ["NEO4J_URI"],
    username=os.environ["NEO4J_USERNAME"],
    password=os.environ["NEO4J_PASSWORD"]
)

In [86]:
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

# Graph Classes and Functions

In [18]:
class Property(BaseModel):
    key: str = Field(..., description="key")
    value: str = Field(..., description="value")

class Node(BaseNode):
    properties: Optional[List[Property]] = Field(None, description="node properties")

class Relationship(BaseRelationship):
    properties: Optional[List[Property]] = Field(None, description="relationship properties")

class KnowledgeGraph(BaseModel):
    nodes: List[Node] = Field(..., description="nodes in the graph")
    rels: List[Relationship] = Field(..., description="relationships in the graph")

In [19]:
def format_property_key(s: str) -> str:
    words = s.split()
    if not words:
        return s
    first_word = words[0].lower()
    capitalized_words = [word.capitalize() for word in words[1:]]
    return first_word + "".join(capitalized_words)

def props_to_dict(props) -> dict:
    properties = {}
    if not props:
        return properties
    for p in props:
        properties[format_property_key(p.key)] = p.value
    return properties

def map_to_base_node(node: Node) -> BaseNode:
    properties = props_to_dict(node.properties) if node.properties else {}
    properties["name"] = node.id.title()
    return BaseNode(
        id=node.id.title(), type=node.type.capitalize(), properties=properties
    )

def map_to_base_relationship(rel: Relationship) -> BaseRelationship:
    source = map_to_base_node(rel.source)
    target = map_to_base_node(rel.target)
    properties = props_to_dict(rel.properties) if rel.properties else {}
    return BaseRelationship(
        source=source, target=target, type=rel.type, properties=properties
    )

In [20]:
from langchain.chains.openai_functions import (
    create_openai_fn_chain,
    create_structured_output_chain,
)
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

llm = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0)

def get_extraction_chain(
        allowed_nodes: Optional[List[str]] = None,
        allowed_rels: Optional[List[str]] = None
    ):
    prompt = ChatPromptTemplate.from_messages(
        [(
            "system",
            f"""# Knowledge Graph Instructions for GPT-4
    ## 1. Overview
    You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph.
    - **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
    - The aim is to achieve simplicity and clarity in the knowledge graph, making it accessible for a vast audience.
    ## 2. Labeling Nodes
    - **Consistency**: Ensure you use basic or elementary types for node labels.
    - For example, when you identify an entity representing a person, always label it as **"person"**. Avoid using more specific terms like "mathematician" or "scientist".
    - **Node IDs**: Never utilize integers as node IDs. Node IDs should be names or human-readable identifiers found in the text.
    {'- **Allowed Node Labels:**' + ", ".join(allowed_nodes) if allowed_nodes else ""}
    {'- **Allowed Relationship Types**:' + ", ".join(allowed_rels) if allowed_rels else ""}
    ## 3. Handling Numerical Data and Dates
    - Numerical data, like age or other related information, should be incorporated as attributes or properties of the respective nodes.
    - **No Separate Nodes for Dates/Numbers**: Do not create separate nodes for dates or numerical values. Always attach them as attributes or properties of nodes.
    - **Property Format**: Properties must be in a key-value format.
    - **Quotation Marks**: Never use escaped single or double quotes within property values.
    - **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.
    ## 4. Coreference Resolution
    - **Maintain Entity Consistency**: When extracting entities, it's vital to ensure consistency.
    If an entity, such as "John Doe", is mentioned multiple times in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),
    always use the most complete identifier for that entity throughout the knowledge graph. In this example, use "John Doe" as the entity ID.
    Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial.
    ## 5. Strict Compliance
    Adhere to the rules strictly. Non-compliance will result in termination.
            """),
            ("human", "Use the given format to extract info from the following input: {input}"),
            ("human", "Tip: Make sure to answer in the correct format"),
        ])
    return create_structured_output_chain(KnowledgeGraph, llm, prompt, verbose=False)

In [21]:
def extract_and_store_graph(
      document:Document,
      nodes:Optional[List[str]] = None,
      rels:Optional[List[str]]=None) -> GraphDocument:
    
    # Extract graph data using OpenAI functions
    extract_chain = get_extraction_chain(nodes, rels)
    data = extract_chain.invoke(document.page_content)['function']
    # Construct a graph document
    graph_document = GraphDocument(
      nodes = [map_to_base_node(node) for node in data.nodes],
      relationships = [map_to_base_relationship(rel) for rel in data.rels],
      source = document
    )
        
    graph.add_graph_documents([graph_document])


# Loading Text

### Wikipedia Loader

In [14]:
from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import TokenTextSplitter

# Read the wikipedia article
raw_documents = WikipediaLoader(query="History of France").load()

# Define chunking strategy
text_splitter = TokenTextSplitter(chunk_size=600, chunk_overlap=24)

# Only take the first the raw_documents
documents = text_splitter.split_documents(raw_documents)[:10]

### Adversarial Prompt

In [None]:
text = """
Event A came before Event B.
Event B came before Event C.
Event C came before Event A.
"""
documents = [Document(page_content=text)]

# Build Graph

### Delete Graph

In [16]:
# Delete the graph
graph.query("MATCH (n) DETACH DELETE n")

[]

### Construct Graph

In [17]:
from tqdm import tqdm

for i, d in tqdm(enumerate(documents), total=len(documents)):
    extract_and_store_graph(d)

  0%|          | 0/10 [00:00<?, ?it/s]

100%|██████████| 10/10 [03:45<00:00, 22.56s/it]


# Get Nodes and Relationships, Upsert to Pinecone

In [22]:
query = "MATCH (n) RETURN n"
all_nodes = graph.query(query)
all_nodes_list = []
for record in all_nodes:
    if (record.get('n').get('name') is not None):
        all_nodes_list.append(record.get('n').get('name'))
print(all_nodes_list)

['France', "Napoleon'S French Empire", 'Second Republic', 'Second Empire', 'French Third Republic', 'Triple Entente', 'Germany', 'Central Powers', 'Allied Powers', 'Nazi Germany', 'Vichy Government', 'Fourth Republic', 'Baby Boom', 'Indochina', 'Algeria', 'Charles De Gaulle', 'French Fifth Republic', 'French Colonial Empire', 'Medievalkingdomoffrance', 'Frenchrevolution', 'Napoleonicera', 'Unitedkingdom', 'Industrialization', 'Imperialism', 'Late19Thandearly20Thcenturies', 'Welfarestate', 'Europeanunion', 'Eurozone', 'Nicolas Sarkozy', 'Conservative Government', 'Kingdom Of France', 'West Francia', 'Hugh Capet', 'Philip Ii', 'French Revolution', 'House Of Plantagenet', 'Kingdom Of England', 'Angevin Empire', "Hundred Years' War", 'French Throne', 'Italy', 'Spain', 'Holy Roman Empire', 'Italian Wars', 'Capetian Dynasty', 'House Of France', 'Robertians', 'Karlings', 'French', 'Romance Language', 'Gallo-Romance Languages', 'Gauls', 'Belgae', 'Iberians', 'Ligures', 'Greek Colonials', 'Vasc

In [23]:
query = "MATCH (n)-[r]-(m) RETURN distinct type(r)"
all_rel_types = graph.query(query)
all_rel_types_filtered = []
for record in all_rel_types:
    # split into list by :
    rel_type = record.get('type(r)').split(':')
    print(rel_type[0].lower())
    all_rel_types_filtered.append(rel_type[0].lower())

experienced
hashistorian
hasleader
controlledby
hascolonialempire
involvedin
governedas
partof
conqueredby
controlled
hasruler
hasterritory
hasrivalry
hasconflict
defeated_in
opposed
paved_the_way_for
sought_to_extend_influence_into
hasinfluence
hashistoricalperiod
participated_in
underwent
wentthrough
opposedin
establishedby
has_political_history
tookplacein
hadimpacton
competitionwith
introducedin
integratedwith
impact
affect
first_king
abolished_by
originated_as
ruled_by
abolished
ruled
involved_in
claimed
defeated_by
branch_of
descendant_of
descendedfrom
classifiedunder
exhibits
spokenby
influencedby
derivedfrom
writtenfor
hassequence
hasinscription
haslanguage
hasvariation
translation
hasexample
opposed_to


### Upserting to Pinecone

In [50]:
# test text embeddings ada 002
def get_embeddings(text):
    response = client.embeddings.create(
        model="text-embedding-3-large",
        input = text
    )
    return response.data[0].embedding

In [87]:
vec1 = get_embeddings("hascolonialterritory")
vec2 = get_embeddings("has colonial territory")
vec3 = get_embeddings("british 13 original")
vec4 = get_embeddings("asd.f..da sdf asdf dfa fdsja;slktThteh cat sdjljuajsdpfasd asss")

In [88]:
print(vec3)

[0.012759219855070114, 0.019824106246232986, 0.004384961910545826, -0.006497901864349842, -0.02765585295855999, -0.007346340920776129, -0.006012496538460255, 0.0405292846262455, -0.005082476884126663, 0.02235310897231102, -0.019383572041988373, -0.007901089265942574, -0.016870886087417603, -0.015973499044775963, 0.011502876877784729, 0.021374139934778214, -0.012400264851748943, 0.021537302061915398, -0.00594315305352211, 0.022532586008310318, 0.028993776068091393, -0.010719702579081059, -0.0005934995133429766, -0.005310902837663889, 0.006705932319164276, 0.008272281847894192, -0.008590446785092354, 0.0003069473314099014, -0.024898424744606018, 0.019807791337370872, 0.0056331465020775795, -0.0016326335025951266, -0.0009973238920792937, -0.021586250513792038, 0.011298924684524536, 0.012702113017439842, -0.00208438653498888, -0.016838254407048225, -0.01628350466489792, 0.008194779977202415, 0.0017029967857524753, -0.010874705389142036, -0.00021427677711471915, 0.01974252611398697, 0.01387

In [89]:
cos = nn.CosineSimilarity(dim=0, eps=1e-6)
print(cos(torch.tensor(vec1), torch.tensor(vec2)))
print(cos(torch.tensor(vec1), torch.tensor(vec3)))
print(cos(torch.tensor(vec2), torch.tensor(vec3)))
print(cos(torch.tensor(vec1), torch.tensor(vec4)))
print(cos(torch.tensor(vec2), torch.tensor(vec4)))

tensor(0.7979)
tensor(0.4142)
tensor(0.4140)
tensor(0.1316)
tensor(0.0588)


In [100]:
pinecone_index.query(namespace="relationship-types", vector=vec3, top_k=10, include_metadata=True)
pinecone_index.query(namespace="nodes", vector=vec3, top_k=10, include_metadata=True)

{'matches': [{'id': '88e8fac1-da21-4540-a0b4-d781309fabda',
              'metadata': {'text': 'Great Britain'},
              'score': 0.40286237,
              'values': []},
             {'id': '700c5846-4b4a-4289-9a75-9e87b08c2ab8',
              'metadata': {'text': 'Greek Colonials'},
              'score': 0.340299428,
              'values': []},
             {'id': '1e6ed60b-d4a4-44cc-b5df-b49dc809f6ad',
              'metadata': {'text': 'Kingdom Of England'},
              'score': 0.32262066,
              'values': []},
             {'id': 'a2cf26a4-81c0-4d49-b92c-73cc0a71e544',
              'metadata': {'text': 'England'},
              'score': 0.315521598,
              'values': []},
             {'id': '8035ff0b-96ff-4b21-afce-51584fd083a8',
              'metadata': {'text': 'Unitedkingdom'},
              'score': 0.306652576,
              'values': []},
             {'id': 'e789ce4b-e564-4874-a0d7-4b99bcbccd22',
              'metadata': {'text': "Seven Years' Wa

In [94]:
documents = [all_rel_types_filtered, all_nodes_list]
to_upsert_rel_types = []
to_upsert_nodes = []

for record in tqdm(all_rel_types_filtered, desc="Processing rel types"):
    if (record is not None):
        data = {
            "id": str(uuid.uuid4()),
            "values": get_embeddings(record),
            "metadata": {
                "text": record
            }
        }
        to_upsert_rel_types.append(data)

for record in tqdm(all_nodes_list, desc="Processing nodes"):
    if (record is not None):
        data = {
            "id": str(uuid.uuid4()),
            "values": get_embeddings(record),
            "metadata": {
                "text": record
            }
        }
        to_upsert_nodes.append(data)

Processing rel types: 100%|██████████| 58/58 [00:16<00:00,  3.42it/s]
Processing nodes: 100%|██████████| 169/169 [00:46<00:00,  3.65it/s]


In [97]:
batch_size = 10
for i in tqdm(range(0, len(to_upsert_rel_types), batch_size), desc="Upserting rel types batches"):
    batch = to_upsert_rel_types[i:i + batch_size]
    pinecone_index.upsert(vectors=batch, namespace="relationship-types")
for i in tqdm(range(0, len(to_upsert_nodes), batch_size), desc="Upserting node batches"):
    batch = to_upsert_nodes[i:i + batch_size]
    pinecone_index.upsert(vectors=batch, namespace="nodes")

Upserting rel types batches: 100%|██████████| 6/6 [00:03<00:00,  1.94it/s]
Upserting node batches: 100%|██████████| 17/17 [00:07<00:00,  2.31it/s]


### Delete Namespace

In [96]:
# pinecone_index.delete(delete_all=True, namespace='nodes')
# pinecone_index.delete(delete_all=True, namespace='relationship-types')



{}

# Intelligent Graph Queries

In [None]:
def get_rag_for_query(query):
    query_embed = get_embeddings(query)
    nodes_returned = pinecone_index.query(namespace="nodes", vector=query_embed, top_k=10, include_metadata=True)
    relationships_returned = pinecone_index.query(namespace="relationship-types", vector=query_embed, top_k=10, include_metadata=True)

    nodes_text_list = []
    relationships_text_list = []

    for node in nodes_returned['matches']:
        nodes_text_list.append(node['metadata']['text'])

    for relationship in relationships_returned['matches']:
        relationships_text_list.append(relationship['metadata']['text'])
    
    return nodes_text_list, relationships_text_list

In [101]:

# MAY NOT NEED THIS

def get_nodes_and_rels_in_query(query):
    # split query into individual words
    words = query.split()
    
    # ask GPT to generate a list of nodes and relationships in the query
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {'role': 'system', 'content': 'You are a highly intelligent agent whose task is to alter original prompts to make similar prompts which are worded differently. Use synonyms, paraphrases, and other techniques to alter the original prompts. Freely switch out words in the prompt with other nodes and relationships provided.'},
            {'role': 'user', 'content': 
                'Generate a numbered list of important nouns and verbs in the following query:\n' 
                + query
            }
        ]
    )
    print(response.choices[0].message.content)

In [102]:
res = get_nodes_and_rels_in_query("Have France and Germany even been allies in the past?")
print(res)

1. France
2. Germany
3. been
4. allies
5. past
None


In [130]:
# wrapper function that can edit the original query
def query_runner(query):
    # nodes should be a list of nodes
    # relationships should be a list of relationships
    # query should be a string
    nodes_text_list, rels_text_list = get_rag_for_query(query)
    print(nodes_text_list)
    print(rels_text_list)  
    get_prompts = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {'role': 'system', 'content': 'You are a highly intelligent agent whose task is to alter original prompts to make similar prompts which are worded differently. Use synonyms, paraphrases, and other techniques to alter the original prompts. Freely switch out words in the prompt with other nodes and relationships provided.'},
            {'role': 'user', 'content': 
                'Generate 9 prompts in a numbered list with no extra newlines. Be creative and replace nouns and verbs using the following word lists. You have significant leeway in altering the original prompt. You must use every node and relationship below at some point. The nodes in this graph are:\n' 
                + ', '.join(nodes_text_list)
                + '\nThe relationships in this graph are:\n' 
                + ', '.join(rels_text_list)
                + 'The original prompt is:\n' 
                + query
            }
        ]
    )

    # print(get_prompts.choices[0].message.content)
    # parse the numbered list that is returned
    prompts_list = get_prompts.choices[0].message.content.split('\n')
    res = []
    for i in range(len(prompts_list)):
        prompt = prompts_list[i][3:]
        res.append(prompt)
        
    return res
    

In [141]:
query_runner("List all the countries that are part of the same alliance as France.")

['hascolonialempire', 'sought_to_extend_influence_into', 'participated_in', 'has_political_history', 'involved_in', 'partof', 'haslanguage', 'integratedwith', 'hasleader', 'defeated_in']
['Allies', 'Allied Powers', 'Kingdom Of France', 'France', 'Triple Entente', "Napoleon'S French Empire", 'House Of France', 'West Francia', 'Europeanunion', 'Algeria']


['Identify all nations allied with the Kingdom of France. ',
 'Enumerate the states that were members of the Triple Entente alongside France.',
 "Name the countries that have been involved in alliances with Napoleon's French Empire.",
 'Detail the regions that have political history integrated with the House of France. ',
 'List the territories that were sought to extend influence into by West Francia.',
 'Catalog the lands that have colonial empires associated with France.',
 'Identify nations that participated in coalitions with the European Union.',
 'Describe the areas that were defeated in conflicts with the Kingdom of France.',
 'Outline the countries that have leaders from the House of France.']

In [170]:
def query_runner_loop(query):
    iteration_ctr = 0
    satisfied_output = False
    graph.refresh_schema()

    cypher_chain = GraphCypherQAChain.from_llm(
        graph=graph,
        cypher_llm=ChatOpenAI(temperature=0, model="gpt-4o"),
        qa_llm=ChatOpenAI(temperature=0, model="gpt-4o"),
        validate_cypher=True, # Validate relationship directions
        verbose=True
    )

    while (not satisfied_output and iteration_ctr < 10):
        iteration_ctr += 1
        prompts = query_runner(query)
        prompts.append(query)
        outputs = []
        for prompt in prompts:
            response = client.chat.completions.create(
                model="gpt-4o",
                messages=[
                    {'role': 'system', 'content': 'You are the world expert in generating Cypher queries on a Neo4J graph to answer a natural language query.'},
                    {'role': 'user', 'content': 
                        'The original query is:\n' 
                        + prompt + '\n'
                        + 'The available relationship types in the knowledge graph are:\n' + '\n'.join(all_rel_types_filtered)
                        + 'This is a list of all nodes in the knowledge graph:\n' + '\n'.join(all_nodes_list)
                        + 'An example of a query is:\n'
                        + 'MATCH (france:Country {name: "France"})-[:SOUGHT_TO_EXTEND_INFLUENCE_INTO]->(region)\n'
                        + 'RETURN region.name\n'
                        + 'Do not generate queries with more than 3 parts. Do not make your queries more complex than necessary. Only return the Cypher query. Do not explain anything or wrap your answer in quotes.'
                    }
                ]
            )
            cypher_query = response.choices[0].message.content
            print("cypher_query: " + cypher_query)
            try:
                res = graph.query(cypher_query) 
            except Exception as e:
                print(f"An error occurred: {e}")
                continue
            if res != "I don't know the answer." and res != []:
                print("\n\n\n\nresult: " + res)
                outputs.append(', '.join(res))

        # merge outputs into string
        context = "\n".join(outputs)

        query_response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {'role': 'system', 'content': 'You are a highly intelligent agent whose task is to answer questions only based on the context you are given. Your answers must be concise and very short.'},
                {'role': 'user', 'content': 
                    'The original question is: '+ query + '\n'
                    'The given context is: ' + context + '\n'
                    'Your task is to answer the original question using only the context you are given. Do not use outside knowledge. If the context is empty, say "I don\'t know the answer.". Your answer must be concise and very short.'
                }
            ]
        )

        print("\nAnswer: " + query_response.choices[0].message.content + "\n")

        decide_continue_response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {'role': 'system', 'content': 'You are a highly intelligent agent whose task is to answer questions only based on the context you are given. Your answers must be concise and very short.'},
                {'role': 'user', 'content': 
                    'The original prompt is:\n' 
                    + query + '\n'
                    + 'The answer given was the following:\n'
                    + query_response.choices[0].message.content + '\n'
                    + 'The answer is acceptable if it is not empty. An answer that is a single "yes" or "no" is acceptable. Is the given answer acceptable? Answer with only the word yes or no, do not explain or wrap your answer in quotes. Your answer must be a single word, either yes or no.'
                }
            ]
        )

        if (decide_continue_response.choices[0].message.content == "yes" or decide_continue_response.choices[0].message.content == "Yes" or decide_continue_response.choices[0].message.content == "Yes."):
            print("The answer is satisfactory and not empty.")
            satisfied_output = True
        else:
            print("The answer is not satisfactory or empty.")

In [171]:
query_runner_loop("Describe France's history.")

['has_political_history', 'hashistoricalperiod', 'hascolonialempire', 'hashistorian', 'ruled', 'conqueredby', 'hasterritory', 'descendedfrom', 'sought_to_extend_influence_into', 'underwent']
['Kingdom Of France', 'House Of France', 'West Francia', 'Medievalkingdomoffrance', "Napoleon'S French Empire", 'French Revolution', 'French Throne', 'France', 'Franks', 'Capetian Dynasty']
cypher_query: MATCH (kingdom:Country {name: "Kingdom Of France"})-[:HAS_POLITICAL_HISTORY]->(historical_event)
RETURN historical_event
cypher_query: MATCH (w:Country {name: "West Francia"})-[:HASHISTORICALPERIOD]->(period)
RETURN period.name
cypher_query: MATCH (napoleonEmpire)-[:HASLEADER]->(leader:Person {name: "Napoleon Bonaparte"}),
      (napoleonEmpire)-[:HASCOLONIALEMPIRE]->(colonialEmpire:Empire)
RETURN napoleonEmpire.name, colonialEmpire.name




cypher_query: MATCH (historian)-[:experienced]->(d:Capetian_Dynasty)
RETURN historian, d
cypher_query: MATCH (franks:People {name: "Franks"})-[:ruled]->(territory:Country {name: "France"})
RETURN territory

MATCH (franks:People {name: "Franks"})-[:ruled]->(territory:Country {name: "France"})-[:affected]->(aspect)
RETURN aspect
An error occurred: Generated Cypher Statement is not valid
{code: Neo.ClientError.Statement.SyntaxError} {message: RETURN can only be used at the end of the query. (line 2, column 1 (offset: 86))
"RETURN territory"
 ^}




cypher_query: MATCH (france:Country {name: "Medievalkingdomoffrance"})-[:conqueredby]->(region)
RETURN region.name




cypher_query: MATCH (house:House {name: "House Of France"})-[:HASTERRITORY]->(territory)
RETURN territory.name




cypher_query: MATCH (revolution:Event {name: "French Revolution"})-[:DESCENDED_FROM]->(turmoil:Event)
RETURN turmoil.name
cypher_query: MATCH (frenchThrone:Entity {name: "French Throne"})-[:SOUGHT_TO_EXTEND_INFLUENCE_INTO]->(region)
RETURN region.name




cypher_query: MATCH (event)-[:involvedin|controlledby|hashistoricalperiod|participated_in|underwent|wentthrough|conqueredby|tookplacein|defeated_in]->(france:Country {name: "France"})
RETURN event

Answer: I don't know the answer.

The answer is not satisfactory or empty.
['has_political_history', 'hashistoricalperiod', 'hascolonialempire', 'hashistorian', 'ruled', 'conqueredby', 'hasterritory', 'descendedfrom', 'sought_to_extend_influence_into', 'underwent']
['Kingdom Of France', 'House Of France', 'West Francia', 'Medievalkingdomoffrance', "Napoleon'S French Empire", 'French Revolution', 'French Throne', 'France', 'Franks', 'Capetian Dynasty']




cypher_query: MATCH (kingdom:Country {name: "Kingdom Of France"})-[:experienced]->(event)
RETURN event




cypher_query: MATCH (westFrancia:Region {name: "West Francia"})-[:underwent]->(event)
RETURN event




cypher_query: MATCH (revolution:Event {name: "French Revolution"})-[:hadimpacton]->(throne:Entity {name: "French Throne"})
RETURN revolution, throne




cypher_query: MATCH (dynasty:Capetian_Dynasty)-[:HAS_Political_History]->(history)
RETURN history




cypher_query: MATCH (france)-[:UNDERWENT]->(change)-[:DURING]->(period:HistoricalPeriod {name: "Napoleonicera"})
RETURN change




cypher_query: MATCH (franks:People {name: "Franks"})-[:hadimpacton]->(france:Country {name: "Medievalkingdomoffrance"})
RETURN franks, france
cypher_query: MATCH (house:Entity {name: "House Of France"})-[:ESTABLISHEDBY]->(empire:Entity)-[:HASCOLONIALEMPIRE]->(colonialEmpire:Entity {name: "French Colonial Empire"})
RETURN colonialEmpire
cypher_query: MATCH (country:Country {name: "France"})-[:SOUGHT_TO_EXTEND_INFLUENCE_INTO]->(territory)
RETURN territory


TypeError: can only concatenate str (not "list") to str

# Vanilla Graph Queries

### Testing GPT-4o's native ability to generate cypher queries

In [151]:
query = "List France's allies."

In [159]:
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {'role': 'system', 'content': 'You are the world expert in generating Cypher queries on a Neo4J graph to answer a natural language query.'},
        {'role': 'user', 'content': 
            'The original query is:\n' 
            + query + '\n'
            + 'The available relationship types in the knowledge graph are:\n' + '\n'.join(all_rel_types_filtered)
            + 'This is a list of all nodes in the knowledge graph:\n' + '\n'.join(all_nodes_list)
            + 'An example of a query is:\n'
            + 'MATCH (france:Country {name: "France"})-[:SOUGHT_TO_EXTEND_INFLUENCE_INTO]->(region)\n'
            + 'RETURN region.name\n'
            + 'Only return the Cypher query. Do not explain anything or wrap your answer in quotes.'
        }
    ]
)
print(response.choices[0].message.content)

MATCH (france {name: "France"})-[:ALLIES_WITH]-(ally)
RETURN ally.name


In [157]:
res = graph.query('MATCH (france:Country {name: "France"})-[:PARTOF]->(alliance:Alliance {name: "Triple Entente"})<-[:PARTOF]-(member:Country) RETURN member.name')

In [158]:
print(res)

[]


### Original Single Query

In [150]:
# original query for comparison
cypher_chain = GraphCypherQAChain.from_llm(
    graph=graph,
    cypher_llm=ChatOpenAI(temperature=0, model="gpt-4o"),
    qa_llm=ChatOpenAI(temperature=0, model="gpt-4o"),
    validate_cypher=True, # Validate relationship directions
    verbose=True
)

res = cypher_chain.run("Describe France's history.")
print(res)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (france:Country {name: "France"})-[:HAS_POLITICAL_HISTORY]->(history:Political entity)
RETURN history
[0m


ValueError: Generated Cypher Statement is not valid
{code: Neo.ClientError.Statement.SyntaxError} {message: Invalid input 'entity': expected a parameter, '&', ')', ':', 'WHERE', '{' or '|' (line 2, column 86 (offset: 92))
"MATCH (france:Country {name: "France"})-[:HAS_POLITICAL_HISTORY]->(history:Political entity)"
                                                                                      ^}

### Multiple Generated Queries + Summarization

#### Generate Prompts

In [None]:
decide_continue_response_with_context = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {'role': 'system', 'content': 'You are a highly intelligent agent whose task is to alter original prompts to make similar prompts which are worded differently. You will be given a list of relationship types. Use synonyms, paraphrases, and other techniques to alter the original prompts.'},
        {'role': 'user', 'content': 
            'Generate 9 prompts in a numbered list with no extra newlines, using the relationship types provided. The original prompt is:\n' 
            + original_query + '\n'
            + 'The available relationship types in the knowledge graph are:\n' + '\n'.join(all_rel_types_filtered)
            + 'This is a list of all nodes in the knowledge graph:\n' + '\n'.join(all_nodes_list)
        }
    ]
)

response = decide_continue_response_with_context.choices[0].message.content
print(response)

In [None]:

# more loose version


decide_continue_response_with_context = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {'role': 'system', 'content': 'You are a highly intelligent agent whose task is to alter original prompts to make similar prompts which are worded differently. You will be given a list of relationship types. Use synonyms, paraphrases, and other techniques to alter the original prompts.'},
        {'role': 'user', 'content': 
            'Generate 9 prompts in a numbered list with no extra newlines, using the relationship types provided. Feel free to be more creative with the original query. For example, you may replace nodes in the query with nearby nodes. The original prompt is:\n' 
            + original_query + '\n'
            + 'The available relationship types in the knowledge graph are:\n' + '\n'.join(all_rel_types_filtered)
            + 'This is a list of all nodes in the knowledge graph:\n' + '\n'.join(all_nodes_list)
        }
    ]
)

response = decide_continue_response_with_context.choices[0].message.content
print(response)

# make a list of the 9 prompts
prompts = response.split('\n')
for i in range(len(prompts)):
    prompts[i] = prompts[i][3:]

for prompt in prompts:
    print(prompt)

#### Run Queries

In [None]:
# Query the knowledge graph in a RAG application
graph.refresh_schema()

cypher_chain = GraphCypherQAChain.from_llm(
    graph=graph,
    cypher_llm=ChatOpenAI(temperature=0, model="gpt-4o"),
    qa_llm=ChatOpenAI(temperature=0, model="gpt-4o"),
    validate_cypher=True, # Validate relationship directions
    verbose=True
)
outputs = []
for prompt in prompts:
    try:
        res = cypher_chain.run(prompt)
    except Exception as e:
        print(f"An error occurred: {e}")
        continue
    if res is not "I don't know the answer.":
        outputs.append(res)

# merge outputs into string
context = "\n".join(outputs)

query_response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {'role': 'system', 'content': 'You are a highly intelligent agent whose task is to answer questions only based on the context you are given. Your answers must be concise and very short.'},
        {'role': 'user', 'content': 
            'The original question is: '+ original_query + '\n'
            'The given context is: ' + context + '\n'
            'Your task is to answer the original question using only the context you are given. Your answer must be concise and very short.'
        }
    ]
)

print(query_response.choices[0].message.content)


# embed query
# embed all edges 
# cosine similarity
