In [55]:
from neo4j import GraphDatabase
from langchain.graphs import Neo4jGraph
from openai import OpenAI
from dotenv import load_dotenv
import os
import json

load_dotenv(dotenv_path='secrets.env')

url=os.environ["NEO4J_URI"]
username=os.environ["NEO4J_USERNAME"]
password=os.environ["NEO4J_PASSWORD"]


In [54]:
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

In [27]:
graph = Neo4jGraph(
    url=os.environ["NEO4J_URI"],
    username=os.environ["NEO4J_USERNAME"],
    password=os.environ["NEO4J_PASSWORD"]
)

In [41]:
# Delete the graph
graph.query("MATCH (n) DETACH DELETE n")

[]

In [37]:
driver = GraphDatabase.driver(url, auth=(username, password))

In [51]:
def add_node(tx, name, type):
    tx.run(f"CREATE (n:{type} {{name: $name}})", name=name)

def add_edge(tx, source, target, type):
    tx.run(f"MATCH (s), (t) WHERE s.name = $source AND t.name = $target CREATE (s)-[r:{type}]->(t)", source=source, target=target)


In [53]:
# with driver.session() as session:
#     session.execute_write(add_node, "The Pentagon", "Building")
#     session.execute_write(add_edge, "Alice", "The Pentagon", "LIVES_IN")


In [57]:
node_functions = [
    {
        # could add Properties to the node
        "name": 'extract_node',
        "description": "Extract a node entity from a document",
        "parameters": {
            "type": "object",
            "properties": {
                "name": {
                    "type": "string",
                    "description": "A short name for the node entity."
                },
                "type": {
                    "type": "string",
                    "description": "The type of the node entity."
                }
            },
            "required": ["name", "type"]
        }
    }
]

In [56]:
test_string = "Alice lives in the Pentagon. Bob lives in the White House."

In [64]:
''' PARAMETERS '''

loop_parameter = 5

In [108]:
# function to identify nodes in a block of text

def get_nodes_from_text(text, loop_parameter=loop_parameter):
    nodes = []
    for i in range(loop_parameter):
        decide_continue_response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {'role': 'system', 'content': 'You are a highly intelligent agent creating a knowledge graph from a given block of text. Your job is to answer a yes or no question: whether you have found all nodes (entities and concepts) in the provided text.'},
                {'role': 'user', 'content': 
                    'The block of text is:\n\n' 
                    + text 
                    + '\n\nYou have already identified these nodes:\n\n'
                    + ','.join(str(x) for x in nodes) 
                    + '\n\nIf you think you have identified every node, please respond with the word "yes". Else, please respond with the word "no". Do not respond with anything else. You must respond with either yes or no.'}
                ]
        )
        if (decide_continue_response.choices[0].message.content.lower() == 'yes'):
            break
        elif (decide_continue_response.choices[0].message.content.lower() == 'no'):
            node_response = client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{'role': 'user', 'content': 
                        'You are a highly intelligent agent creating a knowledge graph from a given block of text. Your job is to identify nodes (entities and concepts) in the text. Nodes must have short, unique names and a clear type. You have already identified the following nodes\n' + ','.join(str(x) for x in nodes) + '\n Do not duplicate any of the nodes that you have already identified. There is a heavy penalty if you repeat the same node. If you have identified every node in the text, do not call the function.\n\nThe block of text is as follows: \n\n' + text}],
                functions = node_functions,
                function_call = 'auto'
            )
        
            json_response = json.loads(node_response.choices[0].message.function_call.arguments)
            if (json_response not in nodes):
                nodes.append(json_response)
        else:
            print("Error: Unexpected response from the model.\n\n" + decide_continue_response.choices[0].message.content)
            print("Nodes identified so far:")
            return nodes
    
    print("Finished identifying nodes.")
    
    if (len(nodes) == 0):
        print("Error: No nodes were identified in the text.")
        print("Trying again...")
        return get_nodes_from_text(text, loop_parameter)
    else:
        return nodes

In [None]:
nodes = get_nodes_from_text(test_string)
for node in nodes:
    print(node)
