In [14]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
import sys
sys.path.append("../../src") 

import pickle

import pandas as pd
import numpy as np
import networkx as nx
import pymc3 as pm

from itertools import combinations
from tqdm import tqdm
from IPython.display import clear_output

import MCMC_Edges as mcmc

In [16]:
LUCAS_NODES = ('Smoking', 'Lung_cancer', 'Genetics', 'Attention_Disorder', 'Yellow_Fingers', 'Anxiety', 'Peer_Pressure', 'Allergy', 'Coughing', 'Fatigue', 'Car_Accident', 'Born_an_Even_Day')
POSSIBLE_EDGES = [*combinations(LUCAS_NODES, 2)]

In [17]:
f = open("final_population.txt", "r")
networks = []

for i, x in enumerate(f):
    edges_string = x.split(';')[:-1]
    network = []
    
    for edge_str in edges_string:
        network\
            .append(
                list(
                    edge_str[1:-1]\
                        .replace("'", "")\
                        .replace(' ', '')\
                        .split(',')
                )
            )
    
    networks.append(nx.DiGraph())
    networks[i].add_nodes_from(LUCAS_NODES)
    networks[i].add_edges_from(network)

print(str(len(networks)) + ' networks')

605 networks


In [18]:
observed_networks = [[] for i in range(len(networks))]

j = 0
for i, network in enumerate(networks):
    observed_networks[i].append([])

    for edge in POSSIBLE_EDGES:
        if (edge in network.edges):
            observed_networks[i][j].append(-1)
        elif ((edge[1], edge[0]) in network.edges):
            observed_networks[i][j].append(1)
        else: 
            observed_networks[i][j].append(0)

observed_networks = [np.array(x) for x in observed_networks]
observed_networks = np.array(observed_networks)

In [19]:
observed_networks[600][0]

array([ 1,  0,  0,  1,  1,  0,  0,  0,  0,  0,  0,  0,  0, -1,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0])

In [20]:
values = [-1, 0, 1]
k = len(values)          
n = len(networks)
total_count = len(observed_networks[0])

In [21]:
def learn_edge(edge, edge_index, chains=10, tune=10000, draws=10000, nthreads=10):
    observed_edges = mcmc.get_edge_frequency(values, observed_networks, edge_index)

    with pm.Model() as model_dm_explicit:
        frac = pm.Dirichlet("frac", a=np.ones(k))
        conc = pm.Lognormal("conc", mu=1, sigma=1)
        counts = pm.DirichletMultinomial(
            "counts", n=total_count, a=frac * conc, shape=(n, k), observed=observed_edges
        )

        trace_dm_explicit = pm.sample(chains=chains, tune=tune, draws=draws, step=pm.NUTS(), return_inferencedata=False, cores=nthreads)
        
        with open('./mcmc/sono_joao_c' + str(chains) + '_t' + str(tune) + '_d'+ str(draws) + '_mcmc_' + edge[0]+"-"+edge[1] + '.pickle', 'wb') as handle:
            pickle.dump({
                'model': model_dm_explicit, 
                'frac':  frac, 
                'conc': conc, 
                'counts': counts, 
                'trace': trace_dm_explicit
            }, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
        return pm.summary(trace_dm_explicit)

In [None]:
models = dict()

for i, edge in enumerate(tqdm(POSSIBLE_EDGES)):
    models[edge[0]+"-"+edge[1]] = learn_edge(edge, i, chains=20, tune=25000, draws=50000, nthreads=20)
    
    clear_output(wait=True)

  2%|▌                                        | 1/66 [03:21<3:38:40, 201.85s/it]Multiprocess sampling (20 chains in 20 jobs)
NUTS: [conc, frac]
