In [11]:
import json
import string
import random
from neo4j import GraphDatabase

uri = "bolt://localhost:7687"
user = "neo4j"
password = "12345678"
json_file_path = r"C:\Users\Tuba.Gokhan\Desktop\KG\filtered_COBS.json"  # Adjusted to the uploaded file path

class Neo4jConnection:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
        self.used_ids = set()  # Set to keep track of used IDs
        self.document_title = None
        print("Connected to Neo4j.")

    def close(self):
        self.driver.close()
        print("Connection to Neo4j closed.")
        
    def clear_database(self):
        with self.driver.session(database="documents") as session:
            session.run("MATCH (n) DETACH DELETE n")
            self.used_ids.clear()
            print("Previous data deleted from the database.")

    def generate_unique_id(self, length=8):
        """Generate a unique ID of mixed characters and digits."""
        characters = string.ascii_letters + string.digits
        while True:
            unique_id = ''.join(random.choice(characters) for _ in range(length))
            if unique_id not in self.used_ids:
                self.used_ids.add(unique_id)
                return unique_id

    def create_paragraphs_and_obligations(self, document_title, document_version, document_date, data):
        self.clear_database()  # Clear previous data before inserting new data
        self.document_title = document_title  # Store document title for later reference
        with self.driver.session(database="documents") as session:
            # Create Document node with a unique ID
            doc_id = self.generate_unique_id()
            session.run("CREATE (:Document {id: $id, title: $title, version: $version, date: $date})", 
                        id=doc_id, title=document_title, version=document_version, date=document_date)
            print(f"Document node created with Title: {document_title}")
            
            for item in data:
                # Create Paragraph node with a unique ID
                para_id = self.generate_unique_id()
                session.run("CREATE (:Paragraph {id: $id, contextID: $contextID, text: $text})", 
                            id=para_id, contextID=item['ContextID'], text=item['Text'])
                
                # Create Obligation nodes and link them to the Paragraph node
                for obligation in item['Obligations']:
                    heading, description = self.parse_obligation_text(obligation)
                    obli_id = self.generate_unique_id()
                    session.run("""
                    MATCH (p:Paragraph {id: $para_id})
                    CREATE (o:Obligation {id: $obli_id, heading: $heading, description: $description})
                    MERGE (p)-[:HAS_OBLIGATION]->(o)
                    """, para_id=para_id, obli_id=obli_id, heading=heading, description=description)
                    #print(f"Obligation node created with ID: {obli_id}, linked to Paragraph ID: {para_id}")

        # After creating all nodes, establish CONTAINS relationships
        self.create_contains_relationships(data)

        
    def create_paragraphs_and_obligations(self, document_title, document_version, document_date, data):
        self.clear_database()  # Clear previous data before inserting new data
        self.document_title = document_title  # Store document title for later reference
        with self.driver.session(database="documents") as session:
            # Create Document node with a unique ID
            doc_id = self.generate_unique_id()
            session.run("CREATE (:Document {id: $id, title: $title, version: $version, date: $date})", 
                        id=doc_id, title=document_title, version=document_version, date=document_date)
            print(f"Document node created with Title: {document_title}")
    
            for item in data:
                if item['ContextID'].endswith('.'):
                    item['ContextID'] = item['ContextID'][:-1]
                # Create Paragraph node with a unique ID
                para_id = self.generate_unique_id()
                session.run("CREATE (p:Paragraph {id: $id, contextID: $contextID, text: $text}) RETURN p", 
                            id=para_id, contextID=item['ContextID'], text=item['Text'])
                
                # Determine parent ID (document or another paragraph)
                parent_context_id = '.'.join(item['ContextID'].split('.')[:-1])
                if parent_context_id:
                    # Link to parent paragraph
                    session.run("""
                    MATCH (parent:Paragraph {contextID: $parent_context_id}), (child:Paragraph {id: $para_id})
                    MERGE (parent)-[:CONTAINS]->(child)
                    """, parent_context_id=parent_context_id, para_id=para_id)
                    print(f"Paragraph node created with ContextID: {item['ContextID']}, linked to parent Paragraph ContextID: {parent_context_id}")
                else:
                    # Link directly to document
                    session.run("""
                    MATCH (doc:Document {id: $doc_id}), (para:Paragraph {id: $para_id})
                    MERGE (doc)-[:CONTAINS]->(para)
                    """, doc_id=doc_id, para_id=para_id)
                    print(f"Paragraph node created with ContextID: {item['ContextID']}, linked directly to Document")
    
                # Create Obligation nodes and link them to the Paragraph node
                for obligation in item['Obligations']:
                    heading, description = self.parse_obligation_text(obligation)
                    obli_id = self.generate_unique_id()
                    session.run("""
                    MATCH (p:Paragraph {id: $para_id})
                    CREATE (o:Obligation {id: $obli_id, heading: $heading, description: $description})
                    MERGE (p)-[:HAS_OBLIGATION]->(o)
                    """, para_id=para_id, obli_id=obli_id, heading=heading, description=description)
                    #print(f"Obligation node created with ID: {obli_id}, linked to Paragraph ID: {para_id}")



    def parse_obligation_text(self, text):
          # Remove the last character if it is a double quote
        if text.endswith('"'):
                text = text[:-1]
    
          # Check if ':**' is in the text
        if '**' in text:
                  # Find the start of the heading after ':**'
                  temp = text.split('**')
                  heading = temp[0].split('"')[1]
                  description = temp[1]
        else:
              # Find the first occurrence of a double quote and use the rest of the text as the heading
              try:
                  start_index = text.index(' \"') + 1  # Start after the first double quote
                  heading = description =  text[start_index:]  # Assign the rest of the text to heading
              except ValueError:
                  # Handle cases where the double quotes are not found or improperly formatted
                  heading = "Invalid format or missing double quotes"
                  description = ""
    
          # Remove leading double quotes from heading and description, if present
        
        if heading.endswith(':'):
            heading = heading[:-1]
        if heading.startswith('"'):
            heading = heading[1:]
        if description.startswith('"'):
            description = description[1:]
        if description.startswith(':'):
            description = description[1:]

          
        return heading, description

if __name__ == "__main__":
    conn = Neo4jConnection(uri, user, password)
    
    with open(json_file_path, 'r') as file:
        data = json.load(file)
    
    document_info = {
        "title": "Conduct of Business Rulebook (COBS)",
        "version": "VER15.150823",
        "date": "15/08/2023"
    }
    
    conn.create_paragraphs_and_obligations(document_info['title'], document_info['version'], document_info['date'], data)
    conn.close()


Connected to Neo4j.
Previous data deleted from the database.
Document node created with Title: Conduct of Business Rulebook (COBS)
Paragraph node created with ContextID: 1, linked directly to Document
Paragraph node created with ContextID: 1.1, linked to parent Paragraph ContextID: 1
Paragraph node created with ContextID: 2, linked directly to Document
Paragraph node created with ContextID: 2.1, linked to parent Paragraph ContextID: 2
Paragraph node created with ContextID: 2.1.1, linked to parent Paragraph ContextID: 2.1
Paragraph node created with ContextID: 2.1.2, linked to parent Paragraph ContextID: 2.1
Paragraph node created with ContextID: 2.1.3, linked to parent Paragraph ContextID: 2.1
Paragraph node created with ContextID: 2.1.3.Guidance, linked to parent Paragraph ContextID: 2.1.3
Paragraph node created with ContextID: 2.1.3.Guidance.1, linked to parent Paragraph ContextID: 2.1.3.Guidance
Paragraph node created with ContextID: 2.1.3.Guidance.2, linked to parent Paragraph Cont