In [1]:
#%pip install --upgrade --quiet  langchain langchain-experimental langchain-openai neo4j wikipedia

Note: you may need to restart the kernel to use updated packages.


#### Initiate environment and set example query

In [1]:
import dotenv
import os
import json
import requests
import openai
from langchain_experimental.graph_transformers.diffbot import DiffbotGraphTransformer

In [2]:
dotenv.load_dotenv()
DIFFBOT_API_KEY = os.environ['DIFFBOT_API_KEY']
OPENAI_API_KEY = os.environ['OPENAI_API_KEY']

In [3]:
DIFFBOT_NLP = DiffbotGraphTransformer(diffbot_api_key=DIFFBOT_API_KEY)

In [15]:
from langchain_community.document_loaders import WikipediaLoader

query = "Elon Musk"
raw_documents = WikipediaLoader(query=query).load()
graph_documents = DIFFBOT_NLP.convert_to_graph_documents(raw_documents)



  lis = BeautifulSoup(html).find_all('li')


In [16]:
# observe raw doc retrieval from wiki
print(raw_documents)

[Document(page_content="Elon Reeve Musk (; EE-lon; born June 28, 1971) is a businessman and investor. He is the founder, chairman, CEO, and CTO of SpaceX; angel investor, CEO, product architect, and former chairman of Tesla, Inc.; owner, executive chairman, and CTO of X Corp.; founder of the Boring Company and xAI; co-founder of Neuralink and OpenAI; and president of the Musk Foundation. He is one of the wealthiest people in the world, with an estimated net worth of US$232 billion as of December 2023, according to the Bloomberg Billionaires Index, and $182.6 billion according to Forbes, primarily from his ownership stakes in Tesla and SpaceX.A member of the wealthy South African Musk family, Elon was born in Pretoria and briefly attended the University of Pretoria before immigrating to Canada at age 18, acquiring citizenship through his Canadian-born mother. Two years later, he matriculated at Queen's University at Kingston in Canada. Musk later transferred to the University of Pennsyl

#### Load a docker container to explore graph via GUI

In [None]:
# get a docker going via CLI
# docker run \
#    --name neo4j \
#    -p 7474:7474 -p 7687:7687 \
#    -d \
#    -e NEO4J_AUTH=neo4j/pleaseletmein \
#    -e NEO4J_PLUGINS=\[\"apoc\"\]  \
#    neo4j:latest

In [8]:
from langchain_community.graphs import Neo4jGraph

# if running docker in WSL, command wsl hostname -I to get WSL IP
# neo4j gui port is 7474
url = "bolt://172.29.153.143:7687"
username = "neo4j"
password = "pleaseletmein"

graph = Neo4jGraph(url=url, username=username, password=password)

Load GraphDocuments into the docker knowledge graph then refresh graph schema

In [17]:
graph.add_graph_documents(graph_documents)

In [32]:
graph.refresh_schema()

In [34]:
print(graph.schema)

Node properties are the following:
Organization {id: STRING, name: STRING, foundingDate: STRING, productType: STRING},Skill {name: STRING, id: STRING},Person {dateOfBirth: STRING, positionHeld: STRING, id: STRING, name: STRING, age: STRING, academicDegree: STRING, numberOfChildren: STRING},Location {id: STRING, name: STRING},Award {id: STRING, name: STRING}
Relationship properties are the following:
INDUSTRY {evidence: STRING},EMPLOYEE_OR_MEMBER_OF {evidence: STRING, isNotCurrent: STRING, startTime: STRING, isCurrent: STRING, positionHeld: STRING, endTime: STRING},FAMILY_MEMBER {evidence: STRING, startTime: STRING, isNotCurrent: STRING},SOCIAL_RELATIONSHIP {evidence: STRING, startTime: STRING, isNotCurrent: STRING},HAS_CHILD {evidence: STRING},PLACE_OF_BIRTH {evidence: STRING},PERSON_LOCATION {evidence: STRING, startTime: STRING, isNotCurrent: STRING, isCurrent: STRING},SIBLING {evidence: STRING},FOUNDED_BY {evidence: STRING},ACQUIRED_BY {evidence: STRING, pointInTime: STRING},EDUCATED

In [45]:
# test a graph query manually to verify connectivity before burning OpenAI calls
graph.query(
    """
MATCH p=()-[r:CHIEF_EXECUTIVE_OFFICER]->() RETURN p LIMIT 25
"""
)

[{'p': [{'name': 'SpaceX', 'id': 'http://www.wikidata.org/entity/Q193701'},
   'CHIEF_EXECUTIVE_OFFICER',
   {'name': 'Elon Musk',
    'dateOfBirth': '1971-06-28',
    'id': 'http://www.wikidata.org/entity/Q317521',
    'positionHeld': 'product architect',
    'age': '18'}]},
 {'p': [{'name': 'Tesla', 'id': 'http://www.wikidata.org/entity/Q478214'},
   'CHIEF_EXECUTIVE_OFFICER',
   {'name': 'Elon Musk',
    'dateOfBirth': '1971-06-28',
    'id': 'http://www.wikidata.org/entity/Q317521',
    'positionHeld': 'product architect',
    'age': '18'}]},
 {'p': [{'name': 'SolarCity', 'id': 'http://www.wikidata.org/entity/Q7555824'},
   'CHIEF_EXECUTIVE_OFFICER',
   {'name': 'Lyndon Rive',
    'dateOfBirth': '1977',
    'id': 'http://www.wikidata.org/entity/Q6708744',
    'positionHeld': 'businessman'}]},
 {'p': [{'name': 'X Corp', 'id': 'X Corp'},
   'CHIEF_EXECUTIVE_OFFICER',
   {'name': 'Linda Yaccarino',
    'id': 'http://www.wikidata.org/entity/Q115655652'}]},
 {'p': [{'name': 'X', 'id': '

Log into Neo4J by navigating to http://\{docker ip}/7474 and logging in with credentials set above. To view the graph, you can run a simple query of `MATCH (n) RETURN n LIMIT 25`

#### Query the graph

In [70]:
from langchain.chains import GraphCypherQAChain
from langchain_openai import ChatOpenAI

chain = GraphCypherQAChain.from_llm(
    cypher_llm=ChatOpenAI(temperature=0, model_name="gpt-4-0125-preview"),
    qa_llm=ChatOpenAI(temperature=.3, model_name="gpt-3.5-turbo"),
    graph=graph,
    verbose=True,
)

In [71]:
chain.run("Does Elon Musk have kids?")




[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (p:Person {name: "Elon Musk"})-[:HAS_CHILD]->(c:Person)
RETURN c.name AS ChildName
[0m
Full Context:
[32;1m[1;3m[{'ChildName': 'Tosca.Maye'}][0m

[1m> Finished chain.[0m


'Yes, Elon Musk has a child named Tosca Maye.'