In [1]:
import json
import os

In [2]:
import pandas as pd
import networkx as nx

In [3]:
from iac_sketch import data, etl, sketch, transform
from iac_sketch.extract import extract_python, extract_yaml
from iac_sketch import system_tests

In [4]:
# DEBUG
import importlib

importlib.reload(data)
importlib.reload(extract_yaml)
importlib.reload(extract_python)
importlib.reload(etl)
importlib.reload(transform)
importlib.reload(system_tests)
importlib.reload(sketch)

<module 'iac_sketch.sketch' from '/Users/zhafen/repos/iac-sketch/iac_sketch/sketch.py'>

In [5]:
architect = sketch.Architect(
    root_dir="./test_data/healthcare_example",
    filename_patterns=[
        "./manifest/**/*.yaml",
        "./manifest/**/*.py",
    ],
)
registry = architect.perform_registry_etl()
tests, test_results = architect.validate_registry(
    min_priority=0.7,
    allowed_infrastructure=["research_analytics_infrastructure"],
)

entity: ../../../iac_sketch/system_tests.test_designed
requirement: fully_designed
priority: 0.9
description:
    Each [requirement] entity has at least one [satisfies] entity or one [child]
    entity.
test_passed: False
failed_components:


Unnamed: 0,entity,requirement.comp_key,requirement.priority,requirement.value,link.source,link.target,link.link_type,link.value,status.comp_key,status.value,test.comp_key,test.value
50,can_identify_patients_in_a_given_cohort_at_a_g...,1,1.0,research_analytics_infrastructure,,,,,,,,
59,can_query_onprem_data,0,1.0,,,,,,,,,


--------------------------------------------------------------------------------

entity: ../../../iac_sketch/system_tests.test_defined
requirement: fully_defined
priority: 0.8
description:
    All [compdef] entities are defined and valid.
test_passed: False
failed_components:


Unnamed: 0_level_0,comp_key,component.table,multiplicity,unparsed_fields,is_defined,fields,is_valid,errors
entity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
benefit,0,,0..*,{},False,"{'entity': <Schema Field(name=entity, type=Dat...",False,Component definition does not exist.
drawback,0,,0..*,{},False,"{'entity': <Schema Field(name=entity, type=Dat...",False,Component definition does not exist.
git_repo,0,,0..*,{},False,"{'entity': <Schema Field(name=entity, type=Dat...",False,Component definition does not exist.
question,0,,0..*,{},False,"{'entity': <Schema Field(name=entity, type=Dat...",False,Component definition does not exist.
representation_of,0,,0..*,{},False,"{'entity': <Schema Field(name=entity, type=Dat...",False,Component definition does not exist.


--------------------------------------------------------------------------------

entity: ../../../iac_sketch/system_tests.test_implemented
requirement: fully_implemented
priority: 0.7
description:
    Meets the same criteria as "fully_designed" and all [satisfies] entities
    either have a test that passes when executed or have a status of "in
    production".
test_passed: False
failed_components:


Unnamed: 0,entity,requirement.comp_key,requirement.priority,requirement.value,link.source,link.target,link.link_type,link.value,status.comp_key,status.value,test.comp_key,test.value
0,can_accept_and_process_request,1,1.0,research_analytics_infrastructure,intake_request_workflow,can_accept_and_process_request,satisfies,,2.0,in development,,
1,can_accept_and_process_request,1,1.0,research_analytics_infrastructure,intake_request_workflow,can_accept_and_process_request,satisfies,,5.0,in development,,
2,can_approve_analyst_role,1,0.9,,analyst_skills_assessment,can_approve_analyst_role,satisfies,,2.0,new,,
3,can_approve_power_user,0,0.9,,power_user_approval_process,can_approve_power_user,satisfies,,1.0,in development,,
6,can_audit_data_access_and_usage,0,1.0,research_analytics_infrastructure,compliance_reports_suite,can_audit_data_access_and_usage,satisfies,,2.0,new,,
8,can_build_reports,0,1.0,research_analytics_infrastructure,build_report_workflow,can_build_reports,satisfies,,4.0,in development,,
13,can_complete_requests_with_analysts,1,1.0,,can_support_and_manage_analysts,can_complete_requests_with_analysts,satisfies,,,,,
14,can_complete_requests_with_analysts,1,1.0,,cloud_framework_is_usable,can_complete_requests_with_analysts,satisfies,,,,,
15,can_control_report_access,0,1.0,research_analytics_infrastructure,set_report_access,can_control_report_access,satisfies,,4.0,in development,,
21,can_deliver_data_for_a_given_request,0,1.0,research_analytics_infrastructure,request_workflow,can_deliver_data_for_a_given_request,satisfies,,3.0,in development,,


--------------------------------------------------------------------------------



In [6]:
# Select entities to export
entities = (
    registry.view("entity_source")
    .query("source == 'user'")
    .index.get_level_values("entity")
    .unique()
)
entities

Index(['AzNPDFSMPowerUserTeam', 'AzNPDResearchAnalyticsTeam',
       'AzPRDFSMPowerUserTeam', 'AzPRDResearchAnalyticsTeam',
       'accept_intake_form', 'analyst_laptop', 'analyst_skills_assessment',
       'apply_security_groups_to_reports', 'assign_analyst', 'azdo_work_item',
       ...
       'training_process', 'turn_on_pbi_refresh',
       'updated_report_new_analyst_test', 'updated_report_test',
       'variable_naming_conventions', 'work_on_report', 'worklog',
       'write_report_to_catalog', 'write_report_to_sharepoint',
       'write_report_to_volume'],
      dtype='object', name='entity', length=168)

In [7]:
# Get component instances for those entities
# and add the component types themselves to the list of entities.
# Repeat until no new entities are found
compinsts = registry.view("compinst")
n_current = len(entities)
n_new = n_current
while n_new > 0:
    new_entities = compinsts.loc[entities, "component_type"].unique()
    entities = entities.union(new_entities)
    n_new = len(entities) - n_current
    n_current = len(entities)
entities

Index(['AzNPDFSMPowerUserTeam', 'AzNPDResearchAnalyticsTeam',
       'AzPRDFSMPowerUserTeam', 'AzPRDResearchAnalyticsTeam',
       'accept_intake_form', 'analyst_laptop', 'analyst_skills_assessment',
       'apply_security_groups_to_reports', 'assign_analyst', 'azdo_work_item',
       ...
       'turn_on_pbi_refresh', 'updated_report_new_analyst_test',
       'updated_report_test', 'url', 'variable_naming_conventions',
       'work_on_report', 'worklog', 'write_report_to_catalog',
       'write_report_to_sharepoint', 'write_report_to_volume'],
      dtype='object', name='entity', length=199)

In [8]:
# Get nodes data
entity_data = []
skip_types = ["compdef"]
for entity in entities:
    entity_data_i = registry.view_entity(
        entity,
        output_yaml=False,
        print_output=False,
    )

    entity_data_i = {k: v for k, v in entity_data_i.items() if k not in skip_types}


    entity_data_str_i = json.dumps(entity_data_i)
    entity_data.append((entity, entity_data_str_i))

In [9]:
# Get edges data
links = registry.view("link").query("source.isin(@entities) & target.isin(@entities)")

In [10]:
# Build graph
graph = nx.from_pandas_edgelist(
    links,
    source="source",
    target="target",
    edge_key="link_type",
    create_using=nx.MultiDiGraph,
)
graph.add_nodes_from(entity_data)

In [11]:
list(nx.generate_graphml(graph))

['<graphml xmlns="http://graphml.graphdrawing.org/xmlns" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">',
 '  <graph edgedefault="directed">',
 '    <node id="analyst_skills_assessment" />',
 '    <node id="can_approve_analyst_role" />',
 '    <node id="work_on_report" />',
 '    <node id="get_feedback_on_report" />',
 '    <node id="finalize_report" />',
 '    <node id="build_report_workflow" />',
 '    <node id="can_build_reports" />',
 '    <node id="can_hire_or_approve_analyst" />',
 '    <node id="can_approve_power_user" />',
 '    <node id="can_identify_patients_in_a_given_cohort_at_a_given_time" />',
 '    <node id="can_audit_data_access_and_usage" />',
 '    <node id="can_inspect_data_access_history" />',
 '    <node id="cloud_framework_is_usable" />',
 '    <node id="can_complete_requests_with_analysts" />',
 '    <node id="can_support_and_manage_analysts" /

In [12]:
# Export to graph ML
nx.write_graphml(graph, "./healthcare_example.graphml")