In [3]:
import json
import string
import random
from neo4j import GraphDatabase

uri = "bolt://localhost:7687"
user = "neo4j"
password = "12345678"
json_file_path = r"C:\Users\Tuba.Gokhan\Desktop\KG\CoBs_Obligations_Named_Entities_Defined_Terms (1).json"

class Neo4jConnection:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
        self.used_ids = set()
        print("Connected to Neo4j.")  

    def close(self):
        self.driver.close()
        print("Connection to Neo4j closed.") 
        
    def clear_database(self):
        with self.driver.session(database="documents") as session:
            session.run("MATCH (n) DETACH DELETE n")
            self.used_ids.clear()
            print("Previous data deleted from the database.")  

    def generate_unique_id(self, length=8):
        characters = string.ascii_letters + string.digits
        while True:
            unique_id = ''.join(random.choice(characters) for _ in range(length))
            if unique_id not in self.used_ids:
                self.used_ids.add(unique_id)
                #print(f"Generated unique ID: {unique_id}") 
                return unique_id

    def create_paragraphs_and_obligations(self, document_title, document_version, document_date, data):
        self.clear_database()
        print("Creating paragraphs and obligations...")  
        with self.driver.session(database="documents") as session:
            doc_id = self.generate_unique_id()
            print(f"Creating Document node with ID: {doc_id}")  
            session.run("CREATE (:Document {id: $id, title: $title, version: $version, date: $date})", 
                        id=doc_id, title=document_title, version=document_version, date=document_date)
            
            for item in data:
                if item['ContextID'].endswith('.'):
                    item['ContextID'] = item['ContextID'][:-1]
                # Create Paragraph node with a unique ID
                para_id = self.generate_unique_id()
                session.run("CREATE (p:Paragraph {id: $id, contextID: $contextID, text: $text}) RETURN p", 
                            id=para_id, contextID=item['ContextID'], text=item['Text'])
                
                # Determine parent ID (document or another paragraph)
                parent_context_id = '.'.join(item['ContextID'].split('.')[:-1])
                if parent_context_id:
                    # Link to parent paragraph
                    session.run("""
                    MATCH (parent:Paragraph {contextID: $parent_context_id}), (child:Paragraph {id: $para_id})
                    MERGE (parent)-[:CONTAINS]->(child)
                    """, parent_context_id=parent_context_id, para_id=para_id)
                    print(f"Paragraph node created with ContextID: {item['ContextID']}, linked to parent Paragraph ContextID: {parent_context_id}")
                else:
                    # Link directly to document
                    session.run("""
                    MATCH (doc:Document {id: $doc_id}), (para:Paragraph {id: $para_id})
                    MERGE (doc)-[:CONTAINS]->(para)
                    """, doc_id=doc_id, para_id=para_id)
                    print(f"Paragraph node created with ContextID: {item['ContextID']}, linked directly to Document")
                
                self.create_obligations(session, para_id, item.get('Obligations', []))
                self.create_named_entities(session, para_id, item.get('NamedEntities', []))
                self.create_defined_terms(session, para_id, item.get('DefinedTerms', []))

    def create_obligations(self, session, para_id, obligations):
        #print(f"Creating Obligations for Paragraph ID: {para_id}")  
        for obligation_text in obligations:
            obli_id = self.generate_unique_id()
            #print(f"Creating Obligation node with ID: {obli_id}")  
            heading, description = self.parse_obligation_text(obligation_text)
            session.run("""
                MATCH (p:Paragraph {id: $para_id})
                CREATE (o:Obligation {id: $obli_id, heading: $heading, description: $description})
                MERGE (p)-[:HAS_OBLIGATION]->(o)
                """, para_id=para_id, obli_id=obli_id, heading=heading, description=description)

    def parse_obligation_text(self, text):
        #print(f"Parsing Obligation text: {text}")  
        # Remove the last character if it is a double quote
        if text.endswith('"'):
            text = text[:-1]

        # Check if ':**' is in the text
        if '**' in text:
            # Find the start of the heading after ':**'
            temp = text.split('**')
            heading = temp[0].split('"')[1]
            description = temp[1]
        else:
            # Find the first occurrence of a double quote and use the rest of the text as the heading
            try:
                start_index = text.index(' \"') + 1  # Start after the first double quote
                heading = description =  text[start_index:]  # Assign the rest of the text to heading
            except ValueError:
                # Handle cases where the double quotes are not found or improperly formatted
                heading = "Invalid format or missing double quotes"
                description = ""

        # Remove leading double quotes from heading and description, if present
        if heading.endswith(':'):
            heading = heading[:-1]
        if heading.startswith('"'):
            heading = heading[1:]
        if description.startswith('"'):
            description = description[1:]
        if description.startswith(':'):
            description = description[1:]

        return heading, description

    def create_named_entities(self, session, para_id, named_entities):
        print(f"Creating Named Entities for Paragraph ID: {para_id}")  
        for entity in named_entities:
            entity_id = self.generate_unique_id()
            print(f"Creating Named Entity node with ID: {entity_id}")  
            session.run("""
                MATCH (p:Paragraph {id: $para_id})
                CREATE (e:NamedEntity {id: $entity_id, term: $term, description: $description})
                MERGE (p)-[:HAS_NAMED_ENTITY]->(e)
                """, para_id=para_id, entity_id=entity_id, term=entity['ContextID'], description=entity['Description'])

    def create_defined_terms(self, session, para_id, defined_terms):
        print(f"Creating Defined Terms for Paragraph ID: {para_id}")  # Print statement added
        for term in defined_terms:
            term_id = self.generate_unique_id()
            print(f"Creating Defined Term node with ID: {term_id}")  # Print statement added
            session.run("""
                MATCH (p:Paragraph {id: $para_id})
                CREATE (d:DefinedTerm {id: $term_id, term: $term, description: $description})
                MERGE (p)-[:HAS_DEFINED_TERM]->(d)
                """, para_id=para_id, term_id=term_id, term=term['ContextID'], description=term['Description'])

if __name__ == "__main__":
    conn = Neo4jConnection(uri, user, password)
    
    with open(json_file_path, 'r') as file:
        data = json.load(file)
    
    document_info = {
        "title": "Conduct of Business Rulebook (COBS)",
        "version": "VER15.150823",
        "date": "15/08/2023"
    }
    
    conn.create_paragraphs_and_obligations(document_info['title'], document_info['version'], document_info['date'], data)
    conn.close()


Connected to Neo4j.
Previous data deleted from the database.
Creating paragraphs and obligations...
Creating Document node with ID: etQ9QTsh
Paragraph node created with ContextID: 1, linked directly to Document
Creating Named Entities for Paragraph ID: kIRMSKtO
Creating Defined Terms for Paragraph ID: kIRMSKtO
Paragraph node created with ContextID: 1.1, linked to parent Paragraph ContextID: 1
Creating Named Entities for Paragraph ID: DMnls5k2
Creating Defined Terms for Paragraph ID: DMnls5k2
Creating Defined Term node with ID: jMXLoRVO
Creating Defined Term node with ID: jkMqr9QV
Creating Defined Term node with ID: MiWiYi9U
Creating Defined Term node with ID: ciEbL6Ja
Creating Defined Term node with ID: bBWv6GLI
Creating Defined Term node with ID: BoQRStgI
Paragraph node created with ContextID: 2, linked directly to Document
Creating Named Entities for Paragraph ID: nhSt0T0P
Creating Defined Terms for Paragraph ID: nhSt0T0P
Paragraph node created with ContextID: 2.1, linked to parent P