In [1]:
pip install neo4j


Note: you may need to restart the kernel to use updated packages.


In [2]:
from neo4j import GraphDatabase

# Neo4j connection
url = "neo4j+s://39edf771.databases.neo4j.io"
username = "neo4j"
password = "31Nwe5MwJKLGHFCTtkmWQVO7R3DU1fYYvX_D63HZGEM"

# Initialize the driver
driver = GraphDatabase.driver(url, auth=(username, password))

def get_neo4j_stats(driver):
    with driver.session() as session:
        # Number of nodes
        num_nodes = session.run("MATCH (n) RETURN count(n) AS num_nodes").single()["num_nodes"]
        
        # Number of relationships
        num_rels = session.run("MATCH ()-[r]->() RETURN count(r) AS num_rels").single()["num_rels"]
        
        # Number of property keys
        num_prop_keys = session.run("CALL db.propertyKeys()").value()
        num_prop_keys = len(num_prop_keys)
        
        # Number of classes (distinct labels)
        num_classes = session.run("CALL db.labels()").value()
        num_classes = len(num_classes)
        
        # Number of RDF triples (considering a triple as a relationship with properties)
        num_rdf_triples = session.run("MATCH (s)-[p]->(o) RETURN count(p) AS num_rdf_triples").single()["num_rdf_triples"]
        
        # Number of instances (distinct nodes with distinct labels)
        num_instances = session.run("MATCH (n) RETURN count(distinct n) AS num_instances").single()["num_instances"]
        
        # Calculate additional metrics
        instantiated_class_ratio = num_instances / num_classes if num_classes != 0 else 0
        instantiated_property_ratio = num_instances / num_prop_keys if num_prop_keys != 0 else 0

        # Class instantiation: Number of instances per class
        class_instantiation = session.run("""
            MATCH (n)
            RETURN labels(n) AS label, count(n) AS count
        """).data()

        # Inverse multiple inheritance: Number of classes with more than one parent class
        inverse_multiple_inheritance = session.run("""
            MATCH (c:Class)-[:SUBCLASS_OF]->(p:Class)
            WITH c, count(p) AS num_parents
            WHERE num_parents > 1
            RETURN count(c) AS inverse_multiple_inheritance
        """).single()["inverse_multiple_inheritance"]
        
        # Subclass property acquisition: Number of properties inherited by subclasses
        subclass_property_acquisition = session.run("""
            MATCH (c:Class)-[:SUBCLASS_OF]->(p:Class)
            MATCH (p)-[:HAS_PROPERTY]->(prop)
            RETURN count(prop) AS subclass_property_acquisition
        """).single()["subclass_property_acquisition"]
        
        # Subclass property instantiation: Number of properties instantiated by subclasses
        subclass_property_instantiation = session.run("""
            MATCH (s:Subclass)-[:HAS_PROPERTY]->(prop)
            RETURN count(prop) AS subclass_property_instantiation
        """).single()["subclass_property_instantiation"]

        return {
            "num_nodes": num_nodes,
            "num_rels": num_rels,
            "num_prop_keys": num_prop_keys,
            "num_classes": num_classes,
            "num_rdf_triples": num_rdf_triples,
            "num_instances": num_instances,
            "instantiated_class_ratio": instantiated_class_ratio,
            "instantiated_property_ratio": instantiated_property_ratio,
            "class_instantiation": class_instantiation,
            "inverse_multiple_inheritance": inverse_multiple_inheritance,
            "subclass_property_acquisition": subclass_property_acquisition,
            "subclass_property_instantiation": subclass_property_instantiation
        }

# Get Neo4j statistics
neo4j_stats = get_neo4j_stats(driver)

# Print the statistics
print(f"Number of nodes: {neo4j_stats['num_nodes']}")
print(f"Number of relationships: {neo4j_stats['num_rels']}")
print(f"Number of property keys: {neo4j_stats['num_prop_keys']}")
print(f"Number of classes: {neo4j_stats['num_classes']}")
print(f"Number of RDF triples: {neo4j_stats['num_rdf_triples']}")
print(f"Number of instances: {neo4j_stats['num_instances']}")
print(f"Instantiated class ratio: {neo4j_stats['instantiated_class_ratio']}")
print(f"Instantiated property ratio: {neo4j_stats['instantiated_property_ratio']}")

print("Class instantiation:")
for item in neo4j_stats['class_instantiation']:
    print(f"Label: {item['label'][0]}, Count: {item['count']}")

print(f"Inverse multiple inheritance: {neo4j_stats['inverse_multiple_inheritance']}")
print(f"Subclass property acquisition: {neo4j_stats['subclass_property_acquisition']}")
print(f"Subclass property instantiation: {neo4j_stats['subclass_property_instantiation']}")


  from pandas.core import (


Number of nodes: 30849
Number of relationships: 13387
Number of property keys: 3564
Number of classes: 1320
Number of RDF triples: 13387
Number of instances: 30849
Instantiated class ratio: 23.370454545454546
Instantiated property ratio: 8.655723905723907
Class instantiation:
Label: Section, Count: 1226
Label: Financial term, Count: 3734
Label: Concept, Count: 4939
Label: Financial metric, Count: 1028
Label: Financialterm, Count: 903
Label: Entity, Count: 2581
Label: Subsection, Count: 63
Label: Financial_metric, Count: 1354
Label: Table, Count: 32
Label: Policyterm, Count: 35
Label: Geographicalsegment, Count: 4
Label: Riskweightedasset, Count: 3
Label: Financial_statement, Count: 355
Label: Financial_asset, Count: 48
Label: Department, Count: 213
Label: Sub_department, Count: 5
Label: Financialstatement, Count: 35
Label: Policy, Count: 229
Label: Financial concept, Count: 36
Label: Currency, Count: 54
Label: Benchmark interest rate, Count: 8
Label: Financial instrument, Count: 13
Lab