In [1]:
# default dependencies
import pandas as pd
import duckdb

# services
from snomed_characterization.services.import_duckdb_concepts_to_snomed_graph import (
    ImportDuckDBConceptsToSNOMEDGraph,
)

from snomed_characterization.services.load_snomed_concepts_from_duckdb import (
    LoadSNOMEDConceptsFromDuckdb,
)

from snomed_characterization.services.import_duckdb_concepts_to_snomed_complete_graph import (
    ImportDuckDBConceptsToCompleteSNOMEDGraph)

#graph
from snomed_characterization.snomed_graph import SNOMEDGraph
from snomed_characterization.graphs.snomed_complete_graph import SNOMEDCompleteGraph


# Create DuckDB DB

In [2]:
# define paths
path_concepts = "data/vocabulary/CONCEPT.csv"
path_concept_ancestors = "data/vocabulary/CONCEPT_ANCESTOR.csv"

source_con = duckdb.connect("data/data.duckdb", read_only=True)

# attach existing duckdb
target_con = duckdb.connect('data/full_data.duckdb')

# Load CSVs as tables
target_con.execute(f"CREATE TABLE IF not exists concept_ancestor AS SELECT * FROM read_csv_auto('{path_concept_ancestors}');")
target_con.execute(f"CREATE TABLE IF not exists concept AS SELECT * FROM read_csv_auto('{path_concepts}', quote='|');")

patients_df = source_con.execute("SELECT * FROM person").fetchdf()
target_con.execute("CREATE TABLE  IF NOT EXISTS person AS SELECT * FROM patients_df")

target_con.close()

there are 100 concepts we are interested on for people.

| Column Name            | Data Type  |
|------------------------|------------|
| concept_id             | BIGINT     |
| concept_name           | VARCHAR    |
| domain_id              | VARCHAR    |
| vocabulary_id          | VARCHAR    |
| concept_class_id       | VARCHAR    |
| standard_concept       | VARCHAR    |
| concept_code           | VARCHAR    |
| valid_start_date       | BIGINT     |
| valid_end_date         | BIGINT     |
| invalid_reason         | VARCHAR    |
| ancestor_concept_id    | BIGINT     |
| descendant_concept_id  | BIGINT     |
| min_levels_of_separation | BIGINT   |
| max_levels_of_separation | BIGINT   |

In [3]:
#import data 
db_path = "data/full_data.duckdb"
g = SNOMEDGraph()
importer_service = ImportDuckDBConceptsToSNOMEDGraph(db_path, g)

_ = importer_service.call()

# nx graph
graph = g.graph

print(len(graph.nodes))


99514


In [4]:
zero_out_degree_nodes = [node for node, degree in graph.out_degree() if degree == 0] 

In [5]:
len(zero_out_degree_nodes)

803

In [6]:
conn = duckdb.connect('data/full_data.duckdb')

q_domains = "select distinct(domain_id) from concept"
df = conn.execute(q_domains).fetchdf()

df

conn.close()


# Full Graph with descriptions

## domains list


## sql
This query was used to create the concepts
```sql
    select * from concept c 
    left join concept_ancestor ca
    on c.concept_id=ca.descendant_concept_id
    where c.invalid_reason is  null
    AND ca.min_levels_of_separation=1
     and standard_concept is not null
     and domain_id='Condition'
```

In [7]:
db_path = "data/full_data.duckdb"
g2 = SNOMEDCompleteGraph()

importer_service = ImportDuckDBConceptsToCompleteSNOMEDGraph(db_path, g2)

_ = importer_service.call()

# nx graph
graph = g2.graph

print(len(graph.nodes))


99224


In [8]:
## Creating neo4j graph
from snomed_characterization.services.import_nx_graph_into_neo4j import (
    ImportNXGraphIntoNeo4J,
)

db_path = "data/full_data.duckdb"
service = ImportNXGraphIntoNeo4J(
    db_path=db_path, uri="bolt://localhost:", user="neo4j", password="12345678"
)
service.call()


Graph successfully loaded into Neo4j


# Graph of concepts from Domain 'Condition'

![SNOMED](images/sample_snomed.png)