In [None]:
import json
import os

In [None]:
import pandas as pd
import networkx as nx

In [None]:
from iac_sketch import data, etl, sketch, transform
from iac_sketch.extract import extract_python, extract_yaml
from iac_sketch import system_tests

In [None]:
# DEBUG
import importlib

importlib.reload(data)
importlib.reload(extract_yaml)
importlib.reload(extract_python)
importlib.reload(etl)
importlib.reload(transform)
importlib.reload(system_tests)
importlib.reload(sketch)

In [None]:
architect = sketch.Architect(
    root_dir="./test_data/healthcare_example",
    filename_patterns=[
        "./manifest/**/*.yaml",
        "./ra_lib/**/*.py",
    ],
)
registry = architect.perform_registry_etl()
tests, test_results = architect.validate_registry(
    min_priority=0.7,
    allowed_infrastructure=["research_analytics_infrastructure"],
)

In [None]:
# Select entities to export
entities = (
    registry.view("entity_source")
    .query("source == 'user'")
    .index.get_level_values("entity")
    .unique()
)
entities

In [None]:
# Get component instances for those entities
# and add the component types themselves to the list of entities.
# Repeat until no new entities are found
compinsts = registry.view("compinst")
n_current = len(entities)
n_new = n_current
while n_new > 0:
    new_entities = compinsts.loc[entities, "component_type"].unique()
    entities = entities.union(new_entities)
    n_new = len(entities) - n_current
    n_current = len(entities)
entities

In [None]:
# Get nodes data
entity_data = []
skip_types = ["compdef"]
for entity in entities:
    entity_data_i = registry.view_entity(
        entity,
        output_yaml=False,
        print_output=False,
    )

    entity_data_i = {k: v for k, v in entity_data_i.items() if k not in skip_types}

    entity_data_str_i = json.dumps(entity_data_i)
    entity_data.append((entity, entity_data_str_i))

In [None]:
# Get edges data
links = registry.view("link").query("source.isin(@entities) & target.isin(@entities)")

In [None]:
# Build graph
graph = nx.from_pandas_edgelist(
    links,
    source="source",
    target="target",
    edge_key="link_type",
    create_using=nx.MultiDiGraph,
)
graph.add_nodes_from(entity_data)

In [None]:
# Export to graph ML
nx.write_graphml(graph, "./healthcare_example.graphml")