**This notebook save Kb file to neo4j as knowledge graph.**

**Only one graph being generated and saved, where this graph contains all information exxtracted from all 10 documents, using both REBEL and CoreNLP.**

In [1]:
#declare the class KB
import wikipedia

class KB():
    def __init__(self):
        self.entities = {} # { entity_title: {...} }
        self.relations = [] # [ head: entity_title, type: ..., tail: entity_title,
          # meta: { article_url: { spans: [...] } } ]
        self.sources = {} # { article_url: {...} }

    def merge_with_kb(self, kb2):
        for r in kb2.relations:
            article_url = list(r["meta"].keys())[0]
            source_data = kb2.sources[article_url]
            self.add_relation(r, source_data["article_title"],
                              source_data["article_publish_date"])

    def are_relations_equal(self, r1, r2):
        return all(r1[attr] == r2[attr] for attr in ["head", "type", "tail"])

    def exists_relation(self, r1):
        return any(self.are_relations_equal(r1, r2) for r2 in self.relations)

    def merge_relations(self, r2):
        r1 = [r for r in self.relations
              if self.are_relations_equal(r2, r)][0]

        # if different article
        article_url = list(r2["meta"].keys())[0]
        if article_url not in r1["meta"]:
            r1["meta"][article_url] = r2["meta"][article_url]

        # if existing article
        else:
            spans_to_add = [span for span in r2["meta"][article_url]["spans"]
                            if span not in r1["meta"][article_url]["spans"]]
            r1["meta"][article_url]["spans"] += spans_to_add

    def get_wikipedia_data(self, candidate_entity):
        try:
          #page = wikipedia.page(candidate_entity, auto_suggest=False)
          page = wikipedia.page(candidate_entity, auto_suggest=False)
            
          entity_data = {
            "title": page.title,
            "url": page.url,
            "summary": page.summary
          }
          return entity_data
        except:
          entity_data = {
            "title": candidate_entity+"*",
            "url": "",
            "summary": ""
          }
          return entity_data
          #return None

    def add_entity(self, e):
        self.entities[e["title"]] = {k:v for k,v in e.items() if k != "title"}

    def add_relation(self, r, article_title, article_publish_date):
        # check on wikipedia
        candidate_entities = [r["head"], r["tail"]]
        entities = [self.get_wikipedia_data(ent) for ent in candidate_entities]

        # if one entity does not exist, stop
        if any(ent is None for ent in entities):
            return

        # manage new entities
        for e in entities:
            self.add_entity(e)

        # rename relation entities with their wikipedia titles
        r["head"] = entities[0]["title"]
        r["tail"] = entities[1]["title"]

        # add source if not in kb
        article_url = list(r["meta"].keys())[0]
        if article_url not in self.sources:
            self.sources[article_url] = {
                "article_title": article_title,
                "article_publish_date": article_publish_date
            }

        # manage new relation
        if not self.exists_relation(r):
            self.relations.append(r)
        else:
            self.merge_relations(r)

    def print(self):
        print("Entities:")
        for e in self.entities.items():
            print(f"  {e}")
        print("Relations:")
        for r in self.relations:
            print(f"  {r}")
        print("Sources:")
        for s in self.sources.items():
            print(f"  {s}")

In [2]:
#load the saved Knowledge Graph (in kb file) 
import pickle

def load_kb(filename):
    res = None
    with open(filename, "rb") as f:
        res = pickle.load(f)
    return res

In [3]:
#save the Knowledge Graph to neo4j
#study the neo4j with pyhton https://neo4j.com/docs/python-manual/current/
from neo4j import GraphDatabase

def save_kb_to_neo4j(kb, uri, username, password):
    # Connect to Neo4j database
    driver = GraphDatabase.driver(uri, auth=(username, password))
    with driver.session() as session:
        # Create entities
        for entity_title, entity_data in kb.entities.items():
            entity = {
                "title": entity_title,
                "url": entity_data.get("url", ""),
                "summary": entity_data.get("summary", "")
            }
            session.run(
                "MERGE (e:Entity {title: $title}) "
                "SET e.url = $url, e.summary = $summary",
                entity
            )

        # Create relations
        for relation in kb.relations:
            head = relation["head"]
            tail = relation["tail"]
            relation_type = relation["type"]
            session.run(
                "MATCH (h:Entity {title: $head}), (t:Entity {title: $tail}) "
                "MERGE (h)-[r:`" + relation_type + "`]->(t)",
                {"head": head, "tail": tail}
            )

    # Close the connection
    driver.close()
    print("KB saved to Neo4j successfully.")

In [4]:
neo4j_uri = "bolt://localhost:7687"  
neo4j_username = "limlimlim"  #the user id created in the Neo4j Desktop
neo4j_password = "12345678" #the password created based on the user id

In [5]:
#load kb file
kb = load_kb("kb_folder/Raymond Kurzweil KB.kb")

In [6]:
# Save KB to Neo4j
save_kb_to_neo4j(kb, neo4j_uri, neo4j_username, neo4j_password)

KB saved to Neo4j successfully.


In [7]:
# check if the relationship is correctly saved to the neo4j
driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_username, neo4j_password))
with driver.session() as session:
    for relation in kb.relations:
        head = relation["head"]
        tail = relation["tail"]
        relation_type = relation["type"]
    
        # Debugging print statements
        print(f"Creating relationship: {head} -[{relation_type}]-> {tail}")

        session.run(
            "MATCH (h:`" + head + "`), (t:`" + tail + "`) "
            "MERGE (h)-[r:`" + relation_type + "`]->(t)",
            {"head": head, "tail": tail}
        )

Creating relationship: Ray Kurzweil -[date of birth]-> February 12, 1948*
Creating relationship: Ray Kurzweil -[occupation]-> Computer scientist
Creating relationship: Ray Kurzweil -[award received]-> National Medal of Technology and Innovation
Creating relationship: National Medal of Technology and Innovation -[country]-> United States
Creating relationship: Ray Kurzweil -[award received]-> Lemelson–MIT Prize
Creating relationship: Ray Kurzweil -[award received]-> National Inventors Hall of Fame
Creating relationship: National Inventors Hall of Fame -[winner]-> Ray Kurzweil
Creating relationship: National Inventors Hall of Fame -[country]-> United States
Creating relationship: Queens -[located in the administrative territorial entity]-> New York City
Creating relationship: New York City -[contains administrative territorial entity]-> Queens
Creating relationship: Kingsbury Elementary School* -[located in the administrative territorial entity]-> New York City
Creating relationship: Kin