# Neo4j 环境设置
使用本地环境，同时安装 apoc 插件

In [2]:
from icecream import ic

In [3]:
from langchain.graphs import Neo4jGraph
url = "neo4j://localhost:7687"
username ="neo4j"
password = "langchain"

graph = Neo4jGraph(
    url=url, 
    username=username, 
    password=password
)
ic(graph)

ic| graph: <langchain_community.graphs.neo4j_graph.Neo4jGraph object at 0x7fc84ba4dac0>


<langchain_community.graphs.neo4j_graph.Neo4jGraph at 0x7fc84ba4dac0>

## 数据集
使用一个综合数据集。该数据集是 ChatGPT 辅助的。这是一个只有 100 个节点的小型数据集。

In [4]:
import requests

url = "https://gist.githubusercontent.com/tomasonjo/08dc8ba0e19d592c4c3cde40dd6abcc3/raw/da8882249af3e819a80debf3160ebbb3513ee962/microservices.json"
import_query = requests.get(url).json()['query']
graph.query(
    import_query
)

[]

## Neo4j 向量索引
首先实现向量索引搜索，通过名称和描述查找相关任务。

In [12]:
import os
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
os.environ['OPENAI_API_KEY'] = "sk-x"
url = "neo4j://localhost:7687"
vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(),
    url=url,
    username=username,
    password=password,
    index_name='tasks',
    node_label="Task",
    text_node_properties=['name', 'description', 'status'],
    embedding_node_property='embedding',
)
ic(vector_index)

ic| vector_index: <langchain_community.vectorstores.neo4j_vector.Neo4jVector object at 0x7fc84df2c220>


<langchain_community.vectorstores.neo4j_vector.Neo4jVector at 0x7fc84df2c220>

In [14]:
response = vector_index.similarity_search(
    "How will RecommendationService be updated?"
)
ic(response[0].page_content)
# name: BugFix
# description: Add a new feature to RecommendationService to provide ...
# status: In Progress

ic| response[0].page_content: ('
                              '
                               'name: RecommendationFeature
                              '
                               'description: Add feature to RecommendationService
                              '
                               'status: In Progress')


'\nname: RecommendationFeature\ndescription: Add feature to RecommendationService\nstatus: In Progress'

In [20]:
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
vector_qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(),
    chain_type="stuff",
    retriever=vector_index.as_retriever()
)
vector_qa.invoke(
    "How will recommendation service be updated?"
)
# The RecommendationService is currently being updated to include a new feature 
# that will provide more personalized and accurate product recommendations to 
# users. This update involves leveraging user behavior and preference data to 
# enhance the recommendation algorithm. The status of this update is currently
# in progress.

{'query': 'How will recommendation service be updated?',
 'result': "I don't have enough information to provide a specific answer on how the recommendation service will be updated. If there are any specific tasks or projects related to updating the recommendation service, please provide more details so I can assist you better."}

In [21]:
vector_qa.invoke(
    "How many open tickets are there?"
)
# There are 4 open tickets.

{'query': 'How many open tickets are there?',
 'result': 'There are 3 open tickets currently.'}

In [22]:
graph.query(
    "MATCH (t:Task {status:'Open'}) RETURN count(*)"
)
# [{'count(*)': 5}]

[{'count(*)': 4}]

In [24]:
from langchain.chains import GraphCypherQAChain

graph.refresh_schema()

cypher_chain = GraphCypherQAChain.from_llm(
    cypher_llm = ChatOpenAI(temperature=0, model_name='gpt-4'),
    qa_llm = ChatOpenAI(temperature=0), graph=graph, verbose=True,
)
ic(cypher_chain)

ic| cypher_chain: GraphCypherQAChain(verbose=True, graph=<langchain_community.graphs.neo4j_graph.Neo4jGraph object at 0x7fc84ba4dac0>, cypher_generation_chain=LLMChain(prompt=PromptTemplate(input_variables=['question', 'schema'], template='Task:Generate Cypher statement to query a graph database.
                  Instructions:
                  Use only the provided relationship types and properties in the schema.
                  Do not use any other relationship types or properties that are not provided.
                  Schema:
                  {schema}
                  Note: Do not include any explanations or apologies in your responses.
                  Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
                  Do not include any text except the generated Cypher statement.
                  
                  The question is:
                  {question}'), llm=ChatOpenAI(client=<openai.resources.chat.completi

GraphCypherQAChain(verbose=True, graph=<langchain_community.graphs.neo4j_graph.Neo4jGraph object at 0x7fc84ba4dac0>, cypher_generation_chain=LLMChain(prompt=PromptTemplate(input_variables=['question', 'schema'], template='Task:Generate Cypher statement to query a graph database.\nInstructions:\nUse only the provided relationship types and properties in the schema.\nDo not use any other relationship types or properties that are not provided.\nSchema:\n{schema}\nNote: Do not include any explanations or apologies in your responses.\nDo not respond to any questions that might ask anything else than for you to construct a Cypher statement.\nDo not include any text except the generated Cypher statement.\n\nThe question is:\n{question}'), llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x7fc8346536d0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x7fc83465c460>, model_name='gpt-4', temperature=0.0, openai_api_key=SecretStr('*******

In [25]:
cypher_chain.invoke(
    "How many open tickets there are?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (t:Task {status: 'open'}) RETURN COUNT(t)[0m
Full Context:
[32;1m[1;3m[{'COUNT(t)': 0}][0m

[1m> Finished chain.[0m


{'query': 'How many open tickets there are?',
 'result': 'There are 0 open tickets.'}

In [32]:
cypher_chain.invoke(
    "Which team has the most open tasks?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (t:Task)-[:ASSIGNED_TO]->(team:Team)
WHERE t.status = 'open'
RETURN team.name, COUNT(t) AS openTasks
ORDER BY openTasks DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


{'query': 'Which team has the most open tasks?',
 'result': "I don't know the answer."}

In [33]:
cypher_chain.invoke(
    "Which services depend on Database directly?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Microservice)-[:DEPENDS_ON]->(d:Dependency {type: 'Database'}) RETURN m.name[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


{'query': 'Which services depend on Database directly?',
 'result': "I don't know the answer."}

In [29]:
cypher_chain.invoke(
    "Which services depend on Database indirectly?"
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Microservice)-[:DEPENDS_ON*2..]->(d:Dependency {name: 'Database'}) RETURN m.name[0m
Full Context:
[32;1m[1;3m[{'m.name': 'OrderService'}, {'m.name': 'OrderService'}, {'m.name': 'OrderService'}, {'m.name': 'OrderService'}, {'m.name': 'UserService'}, {'m.name': 'PaymentService'}, {'m.name': 'ShippingService'}][0m

[1m> Finished chain.[0m


{'query': 'Which services depend on Database indirectly?',
 'result': 'OrderService, UserService, PaymentService, ShippingService depend on Database indirectly.'}

In [30]:
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType

tools = [
    Tool(
        name="Tasks",
        func=vector_qa.run,
        description="""Useful when you need to answer questions about descriptions of tasks.
        Not useful for counting the number of tasks.
        Use full question as input.
        """,
    ),
    Tool(
        name="Graph",
        func=cypher_chain.run,
        description="""Useful when you need to answer questions about microservices,
        their dependencies or assigned people. Also useful for any sort of 
        aggregation like counting the number of tasks, etc.
        Use full question as input.
        """,
    ),
]

mrkl = initialize_agent(
    tools, 
    ChatOpenAI(temperature=0, model_name='gpt-4'),
    agent=AgentType.OPENAI_FUNCTIONS, verbose=True
)
ic(mrkl)

  warn_deprecated(
ic| mrkl: AgentExecutor(verbose=True, tags=['openai-functions'], agent=OpenAIFunctionsAgent(llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x7fc836eb01f0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x7fc836eb5b20>, model_name='gpt-4', temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy=''), tools=[Tool(name='Tasks', description='Useful when you need to answer questions about descriptions of tasks.
                  Not useful for counting the number of tasks.
                  Use full question as input.
                  ', func=<bound method Chain.run of RetrievalQA(combine_documents_chain=StuffDocumentsChain(llm_chain=LLMChain(prompt=ChatPromptTemplate(input_variables=['context', 'question'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template="Use the following pieces of context to answer the user's question. 
          If you don't kno

AgentExecutor(verbose=True, tags=['openai-functions'], agent=OpenAIFunctionsAgent(llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x7fc836eb01f0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x7fc836eb5b20>, model_name='gpt-4', temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy=''), tools=[Tool(name='Tasks', description='Useful when you need to answer questions about descriptions of tasks.\n        Not useful for counting the number of tasks.\n        Use full question as input.\n        ', func=<bound method Chain.run of RetrievalQA(combine_documents_chain=StuffDocumentsChain(llm_chain=LLMChain(prompt=ChatPromptTemplate(input_variables=['context', 'question'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template="Use the following pieces of context to answer the user's question. \nIf you don't know the answer, just say that you don't know, don't try to make up 

In [34]:
response = mrkl.run("Which team is assigned to maintain PaymentService?")
ic(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Graph` with `Which team is assigned to maintain PaymentService?`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Microservice {name: 'PaymentService'})-[:MAINTAINED_BY]->(t:Team) RETURN t.name[0m
Full Context:
[32;1m[1;3m[{'t.name': 'TeamD'}][0m

[1m> Finished chain.[0m
[33;1m[1;3mTeamD is assigned to maintain PaymentService.[0m

ic| response: 'TeamD is assigned to maintain the PaymentService.'


[32;1m[1;3mTeamD is assigned to maintain the PaymentService.[0m

[1m> Finished chain.[0m


'TeamD is assigned to maintain the PaymentService.'

In [35]:
response = mrkl.invoke("Which tasks have optimization in their description?")
print(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Tasks` with `Which tasks have optimization in their description?`


[0m[36;1m[1;3mTasks that have optimization in their description are:

1. Optimize PaymentService
2. Optimize AuthService[0m[32;1m[1;3mThe tasks that have optimization in their description are:

1. Optimize PaymentService
2. Optimize AuthService[0m

[1m> Finished chain.[0m
{'input': 'Which tasks have optimization in their description?', 'output': 'The tasks that have optimization in their description are:\n\n1. Optimize PaymentService\n2. Optimize AuthService'}
