# Create a Graph from PDF Documents

This notebook demonstrates how to extract a knowledge graph from PDF documents using the graph maker

Steps:
- Define an Ontology
- Upload PDF documents
- Convert PDFs to text chunks
- Create Graph using GPT-3.5
- Save the graph to Neo4j db
- Visualise

Loading the graph maker functions ->

In [None]:
from knowledge_graph_maker import GraphMaker, Ontology, OpenAIClient
from knowledge_graph_maker import Document
from pdf_processor import process_pdf_files
import ipywidgets as widgets
from IPython.display import display, clear_output

# Define the Ontology

In [None]:
ontology = Ontology(
    labels=[
        {"Person": "Person name without any adjectives, Remember a person may be referenced by their name or using a pronoun"},
        {"Object": "Do not add the definite article 'the' in the object name"},
        {"Event": "Event event involving multiple people. Do not include qualifiers or verbs like gives, leaves, works etc."},
        "Place",
        "Document",
        "Organisation",
        "Action",
        {"Miscellaneous": "Any important concept can not be categorised with any other given label"},
    ],
    relationships=[
        "Relation between any pair of Entities"
        ],
)

# Upload PDF Files

In [None]:
uploader = widgets.FileUpload(
    accept='.pdf',  # Only accept PDF files
    multiple=True  # Allow multiple files
)
display(uploader)

In [None]:
# Process uploaded PDFs
pdf_files = [file['content'] for file in uploader.value.values()]
text_chunks = process_pdf_files(pdf_files)
print(f"Processed {len(text_chunks)} text chunks from {len(pdf_files)} PDF files")

# Create Graph from PDF Content

In [None]:
import datetime
current_time = str(datetime.datetime.now())

## Open AI model
oai_model="gpt-3.5-turbo"

## Use OpenAI
llm = OpenAIClient(model=oai_model, temperature=0.1, top_p=0.5)

graph_maker = GraphMaker(ontology=ontology, llm_client=llm, verbose=False)

def generate_summary(text):
    SYS_PROMPT = (
        "Succintly summarise the text provided by the user. "
        "Respond only with the summary and no other comments"
    )
    return llm.generate(SYS_PROMPT, text)

documents = []
for text in text_chunks:
    summary = generate_summary(text)
    documents.append(
        Document(
            text=text,
            metadata={
                "summary": summary,
                "timestamp": current_time,
            },
        )
    )

graph = graph_maker.create_graph(documents)

# Save to Neo4j

In [None]:
from knowledge_graph_maker import Neo4jGraphModel

create_indices = False
neo4j_graph = Neo4jGraphModel(edges=graph, create_indices=create_indices)
neo4j_graph.save()