## Import Libraries

In [1]:
import openai

from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain.chat_models import ChatOpenAI
from langchain.chains import GraphCypherQAChain
from langchain.graphs import Neo4jGraph
from transformers import AutoTokenizer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA

from langchain.document_transformers import LongContextReorder
from langchain.graphs.graph_document import (
    Node as BaseNode,
    Relationship as BaseRelationship,
    GraphDocument,
)
from langchain.schema import Document
from typing import List, Dict, Any, Optional
from langchain.pydantic_v1 import Field, BaseModel

from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType

from langchain.chains.openai_functions import create_structured_output_chain
from langchain.prompts import ChatPromptTemplate


from tqdm import tqdm

import os
os.environ["OPENAI_API_KEY"] = ''   

  from .autonotebook import tqdm as notebook_tqdm


## Loading Data, Vectors and Graphs

### AutoTokenizer & PyPDFLoader

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

def bert_len(text):
    tokens = tokenizer.encode(text)
    return len(tokens)

In [None]:
loader = PyPDFLoader("C:/Users/valli/Downloads/contract3.pdf")
pages = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
          chunk_size = 200,
          chunk_overlap  = 20,
          length_function = bert_len,
          separators=['\n\n', '\n', ' ', ''],
      )

documents = text_splitter.split_documents(pages)

### Neo4J Vector and Graphs Set Up

In [None]:
graph = Neo4jGraph(
    url='bolt://localhost:7687',
    username='neo4j',
    password='11111111',
    database='final'
)

graph2 = Neo4jGraph(
    url='bolt://localhost:7687',
    username='neo4j',
    password='11111111',
    database='finalcost'
)

In [None]:
neo4j_vector = Neo4jVector.from_documents(
    documents,
    OpenAIEmbeddings(),
    url='bolt://localhost:7687',
    username='neo4j',
    password='11111111',
    database = 'final'
)

In [None]:
vector_qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(), chain_type="stuff", retriever=neo4j_vector.as_retriever())

vector_qa.run(
    "What is this about"
)

'This text is about an agreement between an owner and an engineer for a project. It discusses various responsibilities, compensation, jurisdiction, and the use of documents related to the project.'

### Graph Construction

In [None]:
class Property(BaseModel):
  """A single property consisting of key and value"""
  key: str = Field(..., description="key")
  value: str = Field(..., description="value")

class Node(BaseNode):
    properties: Optional[List[Property]] = Field(
        None, description="List of node properties")

class Relationship(BaseRelationship):
    properties: Optional[List[Property]] = Field(
        None, description="List of relationship properties"
    )

class KnowledgeGraph(BaseModel):
    """Generate a knowledge graph with entities and relationships."""
    nodes: List[Node] = Field(
        ..., description="List of nodes in the knowledge graph")
    rels: List[Relationship] = Field(
        ..., description="List of relationships in the knowledge graph"
    )

In [None]:
def format_property_key(s: str) -> str:
    words = s.split()
    if not words:
        return s
    first_word = words[0].lower()
    capitalized_words = [word.capitalize() for word in words[1:]]
    return "".join([first_word] + capitalized_words)

def props_to_dict(props) -> dict:
    """Convert properties to a dictionary."""
    properties = {}
    if not props:
      return properties
    for p in props:
        properties[format_property_key(p.key)] = p.value
    return properties

def map_to_base_node(node: Node) -> BaseNode:
    """Map the KnowledgeGraph Node to the base Node."""
    properties = props_to_dict(node.properties) if node.properties else {}
    # Add name property for better Cypher statement generation
    properties["name"] = node.id.title()
    return BaseNode(
        id=node.id.title(), type=node.type.capitalize(), properties=properties
    )


def map_to_base_relationship(rel: Relationship) -> BaseRelationship:
    """Map the KnowledgeGraph Relationship to the base Relationship."""
    source = map_to_base_node(rel.source)
    target = map_to_base_node(rel.target)
    properties = props_to_dict(rel.properties) if rel.properties else {}
    return BaseRelationship(
        source=source, target=target, type=rel.type, properties=properties
    )

In [None]:
llm = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0)

def get_extraction_chain(
    allowed_nodes: Optional[List[str]] = None,
    allowed_rels: Optional[List[str]] = None
):
    prompt = ChatPromptTemplate.from_messages(
        [(
          "system",
          f"""# Knowledge Graph Instructions for GPT-4
## 1. Overview
You are a sophisticated algorithm designed for extracting key elements related to timeframes and scheduling from contract documents to construct a knowledge graph.
- **Nodes** represent key entities such as contracts, milestones, tasks, and deadlines.
- The goal is to extract and represent contractual timeframes and scheduling in a clear and structured knowledge graph format.
## 2. Labeling Nodes
- **Consistency**: Maintain uniformity in node labels, using terms specific to timeframes and scheduling.
  - For instance, use labels like **"contract"**, **"milestone"**, **"task"**, and **"deadline"**.
- **Node IDs**: Use meaningful identifiers from the contract text for node IDs.
{'- **Allowed Node Labels:**' + ", ".join(allowed_nodes) if allowed_nodes else ""}
{'- **Allowed Relationship Types**:' + ", ".join(allowed_rels) if allowed_rels else ""}
## 3. Handling Contract-Specific Data
- Focus on details like milestones, tasks, deadlines, and their dependencies.
- **Attributes for Dates and Task Details**: Attach dates and task-specific information as properties of relevant nodes.
- **Property Format**: Follow a key-value format for properties, using camelCase for keys, e.g., `expectedCompletionDate`.
## 4. Coreference Resolution
- **Entity Consistency**: Ensure consistency in referencing contract entities.
Use a consistent identifier throughout the knowledge graph for each entity.
## 5. Strict Compliance
Strictly adhere to these guidelines. Deviations from the rules will not be acceptable.
          """),
            ("human", "Use the given format to extract information related to timeframes and scheduling from the following contract: {input}"),
            ("human", "Tip: Ensure accuracy and adherence to the format in your response"),
        ])
    return create_structured_output_chain(KnowledgeGraph, llm, prompt, verbose=False)


In [None]:
def extract_and_store_graph(
    document: Document,
    nodes:Optional[List[str]] = None,
    rels:Optional[List[str]]=None) -> None:
    # Extract graph data using OpenAI functions
    extract_chain = get_extraction_chain(nodes, rels)
    data = extract_chain.run(document.page_content)
    # Construct a graph document
    graph_document = GraphDocument(
      nodes = [map_to_base_node(node) for node in data.nodes],
      relationships = [map_to_base_relationship(rel) for rel in data.rels],
      source = document
    )
    # Store information into a graph
    graph.add_graph_documents([graph_document])

### Extract and Store

In [None]:
for i, d in tqdm(enumerate(documents), total=len(documents)):
    extract_and_store_graph(d)

 38%|███▊      | 10/26 [05:28<08:46, 32.90s/it]


ValidationError: 1 validation error for _OutputFormatter
output -> rels
  field required (type=value_error.missing)

In [None]:
for i, d in tqdm(enumerate(documents[11:]), total=len(documents[11:])):
    extract_and_store_graph(d)

  0%|          | 0/15 [00:00<?, ?it/s]

100%|██████████| 15/15 [12:09<00:00, 48.63s/it]


In [None]:
for i, d in tqdm(enumerate(documents), total=len(documents)):
    extract_and_store_graph(d)

 34%|███▍      | 28/82 [34:54<1:07:18, 74.79s/it] 


ValidationError: 1 validation error for _OutputFormatter
output -> rels
  field required (type=value_error.missing)

In [None]:
for i, d in tqdm(enumerate(documents[35:]), total=len(documents[35:])):
    extract_and_store_graph(d)

 96%|█████████▌| 45/47 [45:18<02:00, 60.42s/it]


ValidationError: 1 validation error for _OutputFormatter
output -> rels
  field required (type=value_error.missing)

In [None]:
graph.refresh_schema()

cypher_chain = GraphCypherQAChain.from_llm(
    graph=graph,
    cypher_llm=ChatOpenAI(temperature=0, model="gpt-4"),
    qa_llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo"),
    validate_cypher=True, # Validate relationship directions
    verbose=True
)

cypher_chain.run("How are deadlines defined?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Contract)-[:DEFINES]->(d:Deadline) RETURN c,d;[0m
Full Context:
[32;1m[1;3m[{'c': {'revisiondate': '01/17', 'name': 'Contract', 'id': 'Contract', 'title': 'Agreement for Engineering Services'}, 'd': {'name': 'Maximumallowableconstructioncost', 'description': 'Maximum Allowable Construction Cost', 'id': 'Maximumallowableconstructioncost'}}, {'c': {'revisiondate': '01/17', 'name': 'Contract', 'id': 'Contract', 'title': 'Agreement for Engineering Services'}, 'd': {'name': 'Constructioncontractaward', 'description': 'Construction Contract Award', 'id': 'Constructioncontractaward'}}, {'c': {'revisiondate': '01/17', 'name': 'Contract', 'id': 'Contract', 'title': 'Agreement for Engineering Services'}, 'd': {'date': 'Attachment A', 'name': 'Rateschedule', 'id': 'Rateschedule'}}][0m

[1m> Finished chain.[0m


'Deadlines are typically defined in the Agreement for Engineering Services, which is a contract that outlines the terms and conditions of the project. The contract may specify specific dates or timeframes for completing certain tasks or milestones. Additionally, the contract may also include provisions for extensions or modifications to the deadlines if necessary.'

## Methods to Converse with Contract Data

### Simple Pipeline: Vector Search + Cypher Query

In [None]:
def combined_one(query):
    vector_results = neo4j_vector.similarity_search(query, k=2)
    vector_result = ''
    for i, res in enumerate(vector_results):
        
        # print(res.page_content)
        # if i != len(vector_results)-1:
        vector_result += res.page_content
    chain = GraphCypherQAChain.from_llm(ChatOpenAI(temperature=0), graph=graph, verbose=True)
    graph_result = chain.run(query)

    final_prompt = f"""You are a helpful question-answering agent. Your task is to analyze
    and synthesize information from two sources: the top result from a similarity search
    (unstructured information) and relevant data from a graph database (structured information).
    Given the user's query: {query}, provide a meaningful and efficient answer based
    on the insights derived from the following data:

    Unstructured information: {vector_result}.
    Structured information: {graph_result}.
    """

    final_prompt = final_prompt[:1000]

    completion = openai.ChatCompletion.create(
    # Use GPT 3.5 as the LLM
    model="gpt-3.5-turbo",
    # Pre-define conversation messages for the possible roles 
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": final_prompt}
    ]
    )

    print(completion.choices[0].message)
    
combined_one('How can deadlines be optimised in this project?')

{
  "role": "assistant",
  "content": "Based on the information provided, there are a few strategies that can be implemented to optimize deadlines in a project:\n\n1. Clearly define and prioritize project tasks: Breaking down the project into smaller, manageable tasks will allow for a more accurate estimation of the time required for each task. This will help in setting realistic deadlines and avoiding potential delays.\n\n2. Effective communication and collaboration: Ensuring that team members are aware of their responsibilities and deadlines will help streamline the workflow. Regular updates and communication among team members can help address any issues or concerns that may arise and prevent any unnecessary delays.\n\n3. Regularly review and adjust deadlines: It is important to regularly review and adjust deadlines as needed. This will help keep the project on track and ensure that any changes or deviations from the schedule are communicated and addressed promptly.\n\n4. Utilize pr

#### Sample Questions

In [None]:
combined_one("How can deadlines be optimised in this project?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Contract)-[:HASTASK]->(t:Task)-[:HASDEADLINE]->(d:Deadline)
WITH c, t, d
ORDER BY d.deadline ASC
WITH c, COLLECT(t) AS tasks, COLLECT(d) AS deadlines
FOREACH (i IN RANGE(0, SIZE(tasks)-2) |
  FOREACH (task1 IN [tasks[i]] |
    FOREACH (task2 IN [tasks[i+1]] |
      MERGE (task1)-[:DEPENDENCY]->(task2)
    )
  )
)
RETURN c[0m
Full Context:
[32;1m[1;3m[{'c': {'revisiondate': '01/17', 'name': 'Contract', 'id': 'Contract', 'title': 'Agreement for Engineering Services'}}][0m

[1m> Finished chain.[0m
{
  "role": "assistant",
  "content": "Based on the available information, to optimize deadlines in this project, it is important to have a clear understanding of the agreement for engineering services and the project schedule provided by the Engineer. The contract should outline any specific deadlines or milestones that have been set.\n\nAdditionally, effective project management practices can hel

In [None]:
combined_one("What tasks can be optimised in this project?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Project)-[:HASTASK]->(t:Task)
WHERE NOT EXISTS((t)-[:DEPENDENCY]->())
RETURN t.name AS OptimizedTask[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
{
  "role": "assistant",
  "content": "Based on the provided information, it is unclear which specific tasks can be optimized in this project. The unstructured information mentions the owner's commitment to providing opportunities for participation by small business entities, disadvantaged business enterprises, minority business enterprises, women's business enterprises, and minority women's business enterprises. The owner has set an aspirational goal for combined participation from these entities. However, this information does not explicitly indicate which tasks can be optimized.\n\nThe structured information states that without any details about the project, it cannot be determined which tasks can be optimized. Therefore, more s

In [None]:
combined_one("What is the general timeframe for key milestones?")
# MAX TOKENS HIT -> Problem



[1m> Entering new GraphCypherQAChain chain...[0m


InvalidRequestError: This model's maximum context length is 4097 tokens. However, your messages resulted in 5297 tokens. Please reduce the length of the messages.

### Tools Based Methodology

In [None]:
tools = [
    Tool(
        name="Tasks",
        func=vector_qa.run,
        description="""Useful when you need to answer questions about descriptions of tasks, deadlines, timeframe, milestones in the contract.
        Use these get the descriptions of tasks, objects and entities.
        Use full question as input.
        """,
    ),
    Tool(
        name="Graph",
        func=cypher_chain.run,
        description="""Useful when you need to answer questions from the unstructured text that cannot be simply obtained by similarity search. 
        Use this to get information about dependencies and relationships between entities. Also useful for any sort of
        aggregation like counting the number of tasks, etc.
        Use full question as input.
        """,
    ),
]

mrkl = initialize_agent(
    tools, ChatOpenAI(temperature=0, model_name='gpt-4'), agent=AgentType.OPENAI_FUNCTIONS, verbose=True
)
mrkl.run("What is the general timeframe for key milestones?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Tasks` with `What is the general timeframe for key milestones?`


[0m[36;1m[1;3mBased on the provided context, there is no specific mention of the general timeframe for key milestones. The agreement states that the engineer will provide the owner with a project schedule setting forth the expected period of time required for completion of the design, approvals, and construction of the project. The schedule will be established jointly by the engineer and owner before the start of Part I, and any adjustments to the schedule must be approved in writing by the owner. Therefore, the timeframe for key milestones would depend on the specific project and the agreed-upon schedule between the engineer and owner.[0m[32;1m[1;3mThe general timeframe for key milestones is not specifically mentioned. However, the agreement states that the engineer will provide the owner with a project schedule outlining the expected period 

'The general timeframe for key milestones is not specifically mentioned. However, the agreement states that the engineer will provide the owner with a project schedule outlining the expected period of time required for completion of the design, approvals, and construction of the project. This schedule will be established jointly by the engineer and owner before the start of the project. Any adjustments to the schedule must be approved in writing by the owner. Therefore, the timeframe for key milestones would depend on the specific project and the agreed-upon schedule between the engineer and owner.'

#### Sample Questions

In [None]:
# WITH ENTIRE GRAPH
mrkl.run("How can deadlines be optimised in this project??")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mTo optimise deadlines in a project, you can follow these steps:

1. **Prioritise Tasks**: Identify the most important tasks that contribute significantly to the project's success. Prioritise them and ensure they are completed first.

2. **Break Down Tasks**: Large tasks can be overwhelming and time-consuming. Break them down into smaller, manageable tasks. This makes it easier to estimate how much time each task will take and set realistic deadlines.

3. **Allocate Resources Wisely**: Ensure that tasks are assigned to team members based on their skills and capabilities. This will ensure tasks are completed efficiently and on time.

4. **Use Project Management Tools**: These tools can help you track progress, manage tasks, and set deadlines. They can also send reminders to keep everyone on track.

5. **Set Buffer Time**: Unexpected issues can arise in any project. Setting buffer time can help accommodate these unforeseen delay

"To optimise deadlines in a project, you can follow these steps:\n\n1. **Prioritise Tasks**: Identify the most important tasks that contribute significantly to the project's success. Prioritise them and ensure they are completed first.\n\n2. **Break Down Tasks**: Large tasks can be overwhelming and time-consuming. Break them down into smaller, manageable tasks. This makes it easier to estimate how much time each task will take and set realistic deadlines.\n\n3. **Allocate Resources Wisely**: Ensure that tasks are assigned to team members based on their skills and capabilities. This will ensure tasks are completed efficiently and on time.\n\n4. **Use Project Management Tools**: These tools can help you track progress, manage tasks, and set deadlines. They can also send reminders to keep everyone on track.\n\n5. **Set Buffer Time**: Unexpected issues can arise in any project. Setting buffer time can help accommodate these unforeseen delays and keep the project on schedule.\n\n6. **Regula

In [None]:
# WITH ENTIRE GRAPH
mrkl.run("What tasks can be optimised in this project??")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Tasks` with `What tasks can be optimised in this project?`


[0m[36;1m[1;3mBased on the provided context, it is mentioned that the project uses Target Value Design (TVD) to drive innovation and provide optimum value to the Owner. TVD focuses on cost, schedule, and constructability as basic components of the design criteria. Therefore, tasks that can be optimized in this project may include:

1. Design decisions: The design process should prioritize cost and value to the Owner, ensuring that design decisions are made within the context of the project's budget goals.

2. Cross-checking plans: The methodology for cross-checking plans of various disciplines to ensure completeness and accuracy at each submittal stage can be optimized. This helps identify any inconsistencies or gaps in the design.

3. Cross-checking plans and specifications: Similar to the previous point, optimizing the methodology for cross-checking

'Based on the provided context, the tasks that can be optimised in this project include:\n\n1. **Design decisions**: The design process should be optimised to prioritise cost and value to the Owner, ensuring that design decisions are made within the context of the project\'s budget goals.\n\n2. **Cross-checking plans**: The methodology for cross-checking plans of various disciplines to ensure completeness and accuracy at each submittal stage can be optimised. This helps identify any inconsistencies or gaps in the design.\n\n3. **Cross-checking plans and specifications**: Similar to the previous point, optimising the methodology for cross-checking plans and specifications to ensure completeness and accuracy can improve the overall quality of the project.\n\n4. **On-site investigation**: Developing a checklist of items to be researched during on-site investigations can help gather necessary information and identify any potential issues early on.\n\n5. **Managing project requirements**: M

### Second Graph Construction
#### Cost Coverage

In [None]:
def get_extraction_chain_for_cost(
    allowed_nodes: Optional[List[str]] = None,
    allowed_rels: Optional[List[str]] = None
):
    prompt = ChatPromptTemplate.from_messages(
        [(
          "system",
          f"""# Knowledge Graph Instructions for GPT-4
## 1. Overview
You are a sophisticated algorithm designed for extracting key elements related to cost, resource allocation, and optimization from engineering contracts to construct a knowledge graph.
- **Nodes** represent key entities such as contracts, engineering tasks, resources, and cost elements.
- The goal is to extract and represent contractual cost details, resource allocation, and optimization strategies in a clear and structured knowledge graph format.
## 2. Labeling Nodes
- **Consistency**: Maintain uniformity in node labels, using terms specific to cost, resource allocation, and optimization.
  - For instance, use labels like **"contract"**, **"engineeringTask"**, **"resource"**, and **"costElement"**.
- **Node IDs**: Use meaningful identifiers from the contract text for node IDs.
{'- **Allowed Node Labels:**' + ", ".join(allowed_nodes) if allowed_nodes else ""}
{'- **Allowed Relationship Types**:' + ", ".join(allowed_rels) if allowed_rels else ""}
## 3. Handling Contract-Specific Data
- Focus on details like engineering tasks, resource allocation, cost elements, and optimization strategies.
- **Attributes for Costs and Resource Details**: Attach cost-related information and resource details as properties of relevant nodes.
- **Property Format**: Follow a key-value format for properties, using camelCase for keys, e.g., `estimatedCost` and `resourceAllocation`.
## 4. Coreference Resolution
- **Entity Consistency**: Ensure consistency in referencing contract entities.
Use a consistent identifier throughout the knowledge graph for each entity.
## 5. Strict Compliance
Strictly adhere to these guidelines. Deviations from the rules will not be acceptable.
          """),
            ("human", "Use the given format to extract information related to cost, resource allocation, and optimization from the following engineering contract: {input}"),
            ("human", "Tip: Ensure accuracy and adherence to the format in your response"),
        ])
    return create_structured_output_chain(KnowledgeGraph, llm, prompt, verbose=False)


In [None]:

def extract_and_store_graph(
    document: Document,
    nodes:Optional[List[str]] = None,
    rels:Optional[List[str]]=None) -> None:
    # Extract graph data using OpenAI functions
    extract_chain = get_extraction_chain_for_cost(nodes, rels)
    data = extract_chain.run(document.page_content)
    # Construct a graph document
    graph_document = GraphDocument(
      nodes = [map_to_base_node(node) for node in data.nodes],
      relationships = [map_to_base_relationship(rel) for rel in data.rels],
      source = document
    )
    # Store information into a graph
    graph2.add_graph_documents([graph_document])

In [None]:
for i, d in tqdm(enumerate(documents), total=len(documents)):
    try:
        extract_and_store_graph(d)
    except Exception:
        print(f"Error {i}")

for i, d in tqdm(enumerate(documents), total=len(documents)):
    try:
        extract_and_store_graph(d)
    except Exception:
        print(f"Error {i}")

    

 42%|████▏     | 11/26 [04:31<04:51, 19.46s/it]

Error 10


100%|██████████| 26/26 [11:27<00:00, 26.46s/it]


Error 25


 35%|███▌      | 29/82 [13:47<17:49, 20.18s/it] 

Error 28


 43%|████▎     | 35/82 [15:05<10:33, 13.48s/it]

Error 34


 99%|█████████▉| 81/82 [36:10<00:27, 27.74s/it]

Error 80


100%|██████████| 82/82 [36:24<00:00, 26.64s/it]


### Reconstruction of Tool Methodology
#### Long Context Reordering 

In [None]:
from langchain.document_transformers import LongContextReorder
reordering = LongContextReorder()


graph.refresh_schema()

cypher_chain1 = GraphCypherQAChain.from_llm(
    graph=graph,
    cypher_llm=ChatOpenAI(temperature=0, model="gpt-4"),
    qa_llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo"),
    validate_cypher=True, # Validate relationship directions
    document_transformers = reordering, 
    verbose=True
)

graph2.refresh_schema()

cypher_chain2 = GraphCypherQAChain.from_llm(
    graph=graph2,
    cypher_llm=ChatOpenAI(temperature=0, model="gpt-4"),
    qa_llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo"),
    validate_cypher=True, # Validate relationship directions
    document_transformers = reordering, 
    verbose=True
)

In [None]:
tools = [
    Tool(
        name="Long_Context_Reorder",
        func=reordering.transform_documents,
        description="Reorders the documents retrieved by the retriever to avoid performance degradation when dealing with long contexts.",
    ),
    Tool(
        name="Tasks",
        func=vector_qa.run,
        description="""Useful when you need to answer questions about descriptions of tasks, deadlines, timeframe, milestones in the contract.
        Use these get the descriptions of tasks, objects and entities.
        Use full question as input.
        """,
    ),
    Tool(
        name="Timeline_Graph",
        func=cypher_chain.run,
        description="""Useful when you need to answer questions from the unstructured text that cannot be simply obtained by similarity search. 
        Use this to get information about dependencies and relationships between entities when it comes to timelines, tasks, deadlines, milestones. Also useful for any sort of
        aggregation like counting the number of tasks, etc.
        Use full question as input.
        """,
    ),

    Tool(
        name="Cost_and_Resource_Graph",
        func=cypher_chain2.run,
        description="""Useful when you need to answer questions from the unstructured text that cannot be simply obtained by similarity search. 
        Use this to get information about dependencies and relationships between entities about cost, resources, optimisation etc. Also useful for any sort of
        aggregation like counting the number of tasks, etc.
        Use full question as input.
        """,
    )
]

final_agent = initialize_agent(
    tools, ChatOpenAI(temperature=0, model_name='gpt-4'), agent=AgentType.OPENAI_FUNCTIONS, verbose=True
)

#### Sample Questions

In [None]:
final_agent.run("How can engineers ensure tasks are done within time and cost constraints?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mEngineers can ensure tasks are done within time and cost constraints by following these steps:

1. **Planning**: Before starting any project, it's crucial to have a detailed plan. This includes defining the scope of the project, identifying the tasks that need to be completed, estimating the time and resources each task will require, and setting a budget.

2. **Scheduling**: Once the tasks have been identified, they should be scheduled in a logical order. This can be done using a Gantt chart or a similar tool. The schedule should take into account the dependencies between tasks and the availability of resources.

3. **Monitoring**: Throughout the project, the progress of the tasks should be monitored. This involves comparing the actual progress with the planned progress and identifying any deviations.

4. **Controlling**: If any deviations are identified during the monitoring stage, corrective actions should be taken. This co

"Engineers can ensure tasks are done within time and cost constraints by following these steps:\n\n1. **Planning**: Before starting any project, it's crucial to have a detailed plan. This includes defining the scope of the project, identifying the tasks that need to be completed, estimating the time and resources each task will require, and setting a budget.\n\n2. **Scheduling**: Once the tasks have been identified, they should be scheduled in a logical order. This can be done using a Gantt chart or a similar tool. The schedule should take into account the dependencies between tasks and the availability of resources.\n\n3. **Monitoring**: Throughout the project, the progress of the tasks should be monitored. This involves comparing the actual progress with the planned progress and identifying any deviations.\n\n4. **Controlling**: If any deviations are identified during the monitoring stage, corrective actions should be taken. This could involve re-allocating resources, adjusting the s

In [None]:
final_agent.run("What are the different phases under basic services?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Tasks` with `What are the different phases under basic services?`


[0m[33;1m[1;3mThe different phases under basic services are:

1. Schematic Design Phase
2. Design Development Phase
3. Construction Document Phase
4. Bidding Phase
5. Construction Phase
6. Construction Completion Phase[0m[32;1m[1;3mThe different phases under basic services are:

1. Schematic Design Phase
2. Design Development Phase
3. Construction Document Phase
4. Bidding Phase
5. Construction Phase
6. Construction Completion Phase[0m

[1m> Finished chain.[0m


'The different phases under basic services are:\n\n1. Schematic Design Phase\n2. Design Development Phase\n3. Construction Document Phase\n4. Bidding Phase\n5. Construction Phase\n6. Construction Completion Phase'

In [None]:
final_agent.run("What are the different expenses in different phases?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Cost_and_Resource_Graph` with `What are the different expenses in different phases?`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (t:Task)-[:INCLUDES]->(p:Phase) RETURN t.amount, p.percentage[0m
Full Context:
[32;1m[1;3m[{'t.amount': None, 'p.percentage': '40%'}, {'t.amount': None, 'p.percentage': '60%'}, {'t.amount': None, 'p.percentage': '95%'}, {'t.amount': None, 'p.percentage': '5%'}, {'t.amount': None, 'p.percentage': '90%'}, {'t.amount': None, 'p.percentage': '10%'}][0m

[1m> Finished chain.[0m
[36;1m[1;3mIn different phases, there are various expenses. These include expenses that account for 40%, 60%, 95%, 5%, 90%, and 10% of the total amount.[0m[32;1m[1;3mThe different expenses in different phases are distributed as follows:

- Some expenses account for 40% of the total amount.
- Some expenses account for 60% of the total amount.
- Some expenses

'The different expenses in different phases are distributed as follows:\n\n- Some expenses account for 40% of the total amount.\n- Some expenses account for 60% of the total amount.\n- Some expenses account for 95% of the total amount.\n- Some expenses account for 5% of the total amount.\n- Some expenses account for 90% of the total amount.\n- Some expenses account for 10% of the total amount.\n\nPlease note that these percentages represent the proportion of total expenses in each phase, not the exact monetary value.'

### Agent for Tools to interact with each other

In [None]:
PREFIX = """Answer the following questions as best you can. You have access to the following tools:"""
FORMAT_INSTRUCTIONS = """Answer the following questions as best you can. \
 You have access to the following tools:

Long_Context_Reorder: Use it to Reorders the documents retrieved by the retriever to avoid performance degradation when dealing with long contexts. \
                Always used as an intermediate tool
Tasks: Use this to lookup structured information from vector index. \
        Use it with both the Timeline_Graph and Cost_and_Resource_Graph.
Timeline_Graph: Use this to lookup unstructured information from graph about timelines, deadlines. \
        Use it with Tasks and Cost_and_Resource_Graph.
Cost_and_Resource_Graph: Use this to lookup unstructured information from graph about cost, resources. \
        Use it with Tasks and Timeline_Graph.

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be two of [Tasks, Timeline_Graph, Cost_and_Resource_Graph]. 
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question"""
SUFFIX = """Begin!

Question: {input}
Thought:{agent_scratchpad}"""

In [None]:
final_agent = initialize_agent(
    tools, ChatOpenAI(temperature=0, model_name='gpt-4'), agent=AgentType.OPENAI_FUNCTIONS, verbose=True, agent_kwargs={
        'prefix':PREFIX,
        'format_instructions':FORMAT_INSTRUCTIONS,
        'suffix':SUFFIX
    }
)

In [None]:
print(final_agent)

verbose=True tags=['openai-functions'] agent=OpenAIFunctionsAgent(llm=ChatOpenAI(client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-4', temperature=0.0, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy=''), tools=[Tool(name='Long_Context_Reorder', description='Reorders the documents retrieved by the retriever to avoid performance degradation when dealing with long contexts.', func=<bound method LongContextReorder.transform_documents of LongContextReorder()>), Tool(name='Tasks', description='Useful when you need to answer questions about descriptions of tasks, deadlines, timeframe, milestones in the contract.\n        Use these get the descriptions of tasks, objects and entities.\n        Use full question as input.\n        ', func=<bound method Chain.run of RetrievalQA(combine_documents_chain=StuffDocumentsChain(llm_chain=LLMChain(prompt=ChatPromptTemplate(input_variables=[

## Questions posed to Final Agent

In [None]:
input_question = "What's the different cost for different phases?"
final_agent.run(input_question)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Cost_and_Resource_Graph` with `What's the different cost for different phases?`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Phase) RETURN p.name, p.percentage[0m
Full Context:
[32;1m[1;3m[{'p.name': 'Schematicdesignphase', 'p.percentage': '40%'}, {'p.name': 'Designdevelopmentphase', 'p.percentage': '60%'}, {'p.name': 'Constructiondocphase', 'p.percentage': '95%'}, {'p.name': 'Biddingphase', 'p.percentage': '5%'}, {'p.name': 'Constructionphase', 'p.percentage': '90%'}, {'p.name': 'Constructioncompletionphase', 'p.percentage': '10%'}][0m

[1m> Finished chain.[0m
[36;1m[1;3mThe cost for different phases varies depending on the percentage of completion. In the Schematic Design Phase, the cost is 40%. In the Design Development Phase, the cost is 60%. In the Construction Document Phase, the cost is 95%. In the Bidding Phase, the cost is 5%. In the Constructi

'The cost for different phases varies depending on the percentage of completion:\n\n- In the Schematic Design Phase, the cost is 40%.\n- In the Design Development Phase, the cost is 60%.\n- In the Construction Document Phase, the cost is 95%.\n- In the Bidding Phase, the cost is 5%.\n- In the Construction Phase, the cost is 90%.\n- In the Construction Completion Phase, the cost is 10%.'

In [None]:
input_question = "What are the Engineer's Tasks in the Schematic Design Phase?"
final_agent.run(input_question)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Tasks` with `What are the Engineer's Tasks in the Schematic Design Phase?`


[0m[33;1m[1;3mIn the Schematic Design Phase, the Engineer's tasks include:

1. Working with the Owner and other designated individuals or entities to establish the general and detailed requirements for the Project based on the approved program document.
2. Preparing Schematic Design Documents that represent the Engineer's recommendations for the Project.
3. Continuously updating the "Design Tracking Model" column in the cost model to ensure that the schematic design is consistent with the Target Value Design goals.
4. Submitting the then-current version of the cost model to secure Owner's written approval of the Schematic Design Documents.
5. Providing an updated project schedule to the Owner.
6. Incorporating any significant revisions required by the Owner's approval into an integrated reference set of documents.
7. Securing the Owner

'In the Schematic Design Phase, the Engineer\'s tasks include:\n\n1. Working with the Owner and other designated individuals or entities to establish the general and detailed requirements for the Project based on the approved program document.\n2. Preparing Schematic Design Documents that represent the Engineer\'s recommendations for the Project.\n3. Continuously updating the "Design Tracking Model" column in the cost model to ensure that the schematic design is consistent with the Target Value Design goals.\n4. Submitting the then-current version of the cost model to secure Owner\'s written approval of the Schematic Design Documents.\n5. Providing an updated project schedule to the Owner.\n6. Incorporating any significant revisions required by the Owner\'s approval into an integrated reference set of documents.\n7. Securing the Owner\'s written approval of the Schematic Design Documents before proceeding to the next phase.\n\nPlease note that this information is based on the given con

In [None]:
input_question = "How will an engineer maintain cost model in the different phases?"
final_agent.run(input_question)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Cost_and_Resource_Graph` with `How will an engineer maintain cost model in the different phases?`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
[36;1m[1;3mAn engineer can maintain a cost model in different phases by regularly updating and reviewing the cost estimates throughout the project. This includes analyzing the costs of materials, labor, equipment, and any other relevant expenses. Additionally, the engineer should track any changes or updates to the project scope or requirements that may impact the cost model. By actively monitoring and adjusting the cost model as needed, the engineer can ensure that the project stays within budget and identify any potential cost-saving opportunities.[0m[32;1m[1;3mAn engineer can maintain a cost model in different phases by regularly updating and reviewing the 

'An engineer can maintain a cost model in different phases by regularly updating and reviewing the cost estimates throughout the project. This includes analyzing the costs of materials, labor, equipment, and any other relevant expenses. Additionally, the engineer should track any changes or updates to the project scope or requirements that may impact the cost model. By actively monitoring and adjusting the cost model as needed, the engineer can ensure that the project stays within budget and identify any potential cost-saving opportunities.'

In [None]:
input_question = "What are things to look out for when drafting contracts?"
final_agent.run(input_question)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mWhen drafting contracts, there are several key things to look out for:

1. **Clear and Precise Language**: The language used in the contract should be clear and precise to avoid any ambiguity. It should clearly define the rights, duties, and obligations of each party.

2. **Parties Involved**: The contract should clearly identify all the parties involved. This includes their legal names and addresses.

3. **Scope of Work**: The contract should clearly define the scope of work or services to be provided. This includes the tasks to be performed, the timeline for completion, and the expected outcome.

4. **Payment Terms**: The contract should clearly outline the payment terms, including the amount to be paid, the payment schedule, and the method of payment.

5. **Termination Clause**: The contract should include a termination clause that outlines the conditions under which the contract can be terminated.

6. **Dispute Resolution

"When drafting contracts, there are several key things to look out for:\n\n1. **Clear and Precise Language**: The language used in the contract should be clear and precise to avoid any ambiguity. It should clearly define the rights, duties, and obligations of each party.\n\n2. **Parties Involved**: The contract should clearly identify all the parties involved. This includes their legal names and addresses.\n\n3. **Scope of Work**: The contract should clearly define the scope of work or services to be provided. This includes the tasks to be performed, the timeline for completion, and the expected outcome.\n\n4. **Payment Terms**: The contract should clearly outline the payment terms, including the amount to be paid, the payment schedule, and the method of payment.\n\n5. **Termination Clause**: The contract should include a termination clause that outlines the conditions under which the contract can be terminated.\n\n6. **Dispute Resolution**: The contract should include a clause that ou

In [None]:
input_question = "When is there a possibility of additional or unexpected expenses?"
final_agent.run(input_question)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Cost_and_Resource_Graph` with `When is there a possibility of additional or unexpected expenses?`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Contract)-[:ADDITIONALSERVICES]->(t:Task) RETURN c.title, t.name, t.description, t.costmodel, t.amount[0m
Full Context:
[32;1m[1;3m[{'c.title': None, 't.name': '3', 't.description': 'Utilize Target Value Design', 't.costmodel': None, 't.amount': None}][0m

[1m> Finished chain.[0m
[36;1m[1;3mThere is a possibility of additional or unexpected expenses when utilizing Target Value Design.[0m[32;1m[1;3mThere is a possibility of additional or unexpected expenses when utilizing Target Value Design. This approach involves setting a target cost for a project and then designing to that cost. If the actual costs exceed the target, it can result in additional or unexpected expenses.[0m

[1m> Finished chain.[0m


'There is a possibility of additional or unexpected expenses when utilizing Target Value Design. This approach involves setting a target cost for a project and then designing to that cost. If the actual costs exceed the target, it can result in additional or unexpected expenses.'

In [None]:
input_question = "In which cases would the Owner have to pay extra?"
final_agent.run(input_question)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Cost_and_Resource_Graph` with `In which cases would the Owner have to pay extra?`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Contract)-[:COMPENSATION]->(r:Reimbursable) RETURN c.title, r.name[0m
Full Context:
[32;1m[1;3m[{'c.title': 'Agreement for Engineering Services', 'r.name': 'Reimbursables'}][0m

[1m> Finished chain.[0m
[36;1m[1;3mThe Owner would have to pay extra in cases where there are reimbursable expenses incurred for the Agreement for Engineering Services.[0m[32;1m[1;3mThe Owner would have to pay extra in cases where there are reimbursable expenses incurred for the Agreement for Engineering Services.[0m

[1m> Finished chain.[0m


'The Owner would have to pay extra in cases where there are reimbursable expenses incurred for the Agreement for Engineering Services.'

In [None]:
input_question = "What are some reimbursable expenses?"
final_agent.run(input_question)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Tasks` with `What are some reimbursable expenses?`


[0m[33;1m[1;3mReimbursable expenses are additional costs incurred by the Engineer that are not included in their basic services compensation or compensation for additional services. The specific reimbursable expenses are described in Article III of the agreement, but without that information, it is not possible to provide a list of specific reimbursable expenses.[0m[32;1m[1;3mReimbursable expenses are additional costs incurred by the Engineer that are not included in their basic services compensation or compensation for additional services. The specific reimbursable expenses are described in Article III of the agreement. However, without the specific details of the agreement, it's not possible to provide a list of specific reimbursable expenses.[0m

[1m> Finished chain.[0m


"Reimbursable expenses are additional costs incurred by the Engineer that are not included in their basic services compensation or compensation for additional services. The specific reimbursable expenses are described in Article III of the agreement. However, without the specific details of the agreement, it's not possible to provide a list of specific reimbursable expenses."

In [None]:
input_question = "What are some additional services owner will have to pay for?"
final_agent.run(input_question)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Cost_and_Resource_Graph` with `What are some additional services owner will have to pay for?`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Contract)-[:COMPENSATION]->(s:Service) RETURN s.name[0m
Full Context:
[32;1m[1;3m[{'s.name': 'Additionalservices'}][0m

[1m> Finished chain.[0m
[36;1m[1;3mSome additional services that the owner will have to pay for include maintenance, repairs, and any upgrades or renovations to the property.[0m[32;1m[1;3mThe owner will have to pay for additional services such as maintenance, repairs, and any upgrades or renovations to the property.[0m

[1m> Finished chain.[0m


'The owner will have to pay for additional services such as maintenance, repairs, and any upgrades or renovations to the property.'