In [1]:
# !pip install -U --force-reinstall numpy matplotlib pyzmq jinja2 ipython

In [3]:
import pandas as pd
import numpy as np
import networkx as nx
from holoviews.operation.datashader import datashade, bundle_graph

In [None]:
import numpy as np
import pandas as pd
import holoviews as hv
import networkx as nx
from holoviews import opts

hv.extension('bokeh')

defaults = dict(width=400, height=400)
hv.opts.defaults(
    opts.EdgePaths(**defaults), opts.Graph(**defaults), opts.Nodes(**defaults))

In [4]:
con = pd.read_csv('data/concepts.csv')
sem = pd.read_csv('data/semantype.csv')

In [5]:
con = con[['CUI', 'SUI', 'STR', 'SCUI']]

In [6]:
# # SUI is unique
# con['SUI'].nunique()

In [7]:
# # CUI is not unique
# con['CUI'].nunique()

In [8]:
# sem[sem.duplicated(['CUI'])]

In [439]:
# sem[sem['CUI'] == 'C0054208']

## Combine multiple STY together

In [440]:
sems = sem[['CUI', 'STY']]

In [441]:
g = sems.groupby('CUI')['STY'].apply(lambda x: ' | '.join(list(np.unique(x))))

In [442]:
g.head()

CUI
C0000052             Amino Acid, Peptide, or Protein | Enzyme
C0000163                           Hormone | Organic Chemical
C0000167                           Hormone | Organic Chemical
C0000215    Hazardous or Poisonous Substance | Organic Che...
C0000220    Hazardous or Poisonous Substance | Organic Che...
Name: STY, dtype: object

## Join concept and semantic types

In [48]:
nodes = con.set_index('CUI').join(g)

In [49]:
nodes.head()

Unnamed: 0_level_0,SUI,STR,STY
CUI,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
C0000052,S0575717,"1,4-alpha-Glucan branching enzyme","Amino Acid, Peptide, or Protein | Enzyme"
C0000163,S0001456,17-Hydroxycorticosteroid,Hormone | Organic Chemical
C0000167,S3361366,17-Ketosteroid,Hormone | Organic Chemical
C0000215,S0576710,"2,4,5-Trichlorophenoxyacetic acid",Hazardous or Poisonous Substance | Organic Che...
C0000220,S0576727,"2,4-Dichlorophenoxyacetic acid",Hazardous or Poisonous Substance | Organic Che...


In [55]:
nodes = nodes.reset_index()

In [56]:
nodes.to_csv('data/nodes.csv', index=None)

## Links

In [70]:
rel = pd.read_csv('data/relations.csv')

In [71]:
rel.head()

Unnamed: 0,CUI1,AUI1,REL,CUI2,AUI2,RELA,RG
0,C2603351,A27786098,RN,C1959776,A13013550,part_of,0.0
1,C0024117,A3007198,RO,C3495060,A21460630,has_focus,0.0
2,C4707947,A29506081,RO,C0269506,A23045712,occurs_in,0.0
3,C0015967,A2881597,RO,C3536630,A21591815,associated_with,0.0
4,C0441987,A3200743,RO,C0924004,A11740683,has_laterality,0.0


In [72]:
# Two concepts can have two different relationships
rel[rel.duplicated(['CUI1', 'CUI2', 'RELA'])]

Unnamed: 0,CUI1,AUI1,REL,CUI2,AUI2,RELA,RG


In [74]:
rel[rel['REL']=='AQ']

Unnamed: 0,CUI1,AUI1,REL,CUI2,AUI2,RELA,RG


In [76]:
rel['REL'].unique()

array(['RN', 'RO', 'CHD', 'SY'], dtype=object)

In [77]:
rel = rel[['CUI1', 'CUI2', 'RELA']]

In [78]:
rel.head()

Unnamed: 0,CUI1,CUI2,RELA
0,C2603351,C1959776,part_of
1,C0024117,C3495060,has_focus
2,C4707947,C0269506,occurs_in
3,C0015967,C3536630,associated_with
4,C0441987,C0924004,has_laterality


In [79]:
rel['RELA'].nunique()

103

In [80]:
rel.to_csv('data/edges.csv', index=None)

## Plot networks

In [475]:
cons = pd.read_csv('data/nodes.csv')
rels = pd.read_csv('data/edges.csv')

In [476]:
suis = cons['SUI'].sample(2000)

In [487]:
def network(suis):
    nodes = cons[cons['SUI'].isin(suis)]
    cuis = nodes['CUI'].unique()
    edges = rels[(rels['CUI1'].isin(cuis)) & (rels['CUI2'].isin(cuis)) & (rels['CUI1'] != rels['CUI2'])]
    
    re_edges = []
    for index, row in edges.iterrows():
        cui1, cui2 = (row['CUI1'], row['CUI2'])
        for sui1 in nodes[nodes['CUI']==cui1]['SUI'].values:
            for sui2 in nodes[nodes['CUI']==cui2]['SUI'].values:
                re_edges.append([sui1, sui2, row['RELA']])

    re_edges = pd.DataFrame(re_edges, columns=['Source', 'Target', 'Rela'])
                            
    
    G = nx.from_pandas_edgelist(re_edges, source='Source', target='Target', edge_attr='Rela', create_using=nx.DiGraph)
    nx.set_node_attributes(G, nodes.set_index('SUI')['CUI'].to_dict(), 'CUI')
    nx.set_node_attributes(G, nodes.set_index('SUI')['STR'].to_dict(), 'Text')
    nx.set_node_attributes(G, nodes.set_index('SUI')['STY'].to_dict(), 'Type')
    
    return G

In [488]:
G = network(suis)

In [490]:
kwargs = dict(width=800, height=800, xaxis=None, yaxis=None)
opts.defaults(opts.Nodes(**kwargs), opts.Graph(**kwargs))

colors = ['#000000']+hv.Cycle('Category20').values
fb_graph = hv.Graph.from_networkx(G, nx.layout.fruchterman_reingold_layout)

fb_graph.opts(cmap=colors, node_size=10, edge_line_width=1,
              node_line_color='gray', node_color='Type', directed=True, arrowhead_length=0.02) 

In [474]:
# bundled = bundle_graph(fb_graph)
# bundled

In [129]:
# from ipywidgets import interact
# %matplotlib inline
# import matplotlib.pyplot as plt

In [128]:
# # wrap a few graph generation functions so they have the same signature

# def random_lobster(n, m, k, p):
#     return nx.random_lobster(n, p, p / m)

# def powerlaw_cluster(n, m, k, p):
#     return nx.powerlaw_cluster_graph(n, m, p)

# def erdos_renyi(n, m, k, p):
#     return nx.erdos_renyi_graph(n, p)

# def newman_watts_strogatz(n, m, k, p):
#     return nx.newman_watts_strogatz_graph(n, k, p)

# def plot_random_graph(n, m, k, p, generator):
#     g = generator(n, m, k, p)
#     nx.draw(g)
#     plt.show()

In [127]:
# interact(plot_random_graph, n=(2,30), m=(1,10), k=(1,10), p=(0.0, 1.0, 0.001),
#          generator={
#              'lobster': random_lobster,
#              'power law': powerlaw_cluster,
#              'Newman-Watts-Strogatz': newman_watts_strogatz,
#              u'Erdős-Rényi': erdos_renyi,
#          });