# Alec Version (simplified, no custom search code)

In [9]:
import sys
import numpy as np
import pandas as pd
from time import time, time_ns
import os

import pgmpy
from AicScore import AicScore
from pgmpy.estimators import HillClimbSearch
from EfficientTimeShuffling import EfficientShuffle
import random

sys.path.append('/home/ags72/Documents/MTWDBN/Tools')
from GraphFunctions import graph_to_matrix_6pop_3timeslice, generate_starting_graph

# bootstrap = int(sys.argv[1])
# iteration = int(sys.argv[2])
# shuffle = sys.argv[3] in ['True', 'true']
# drop = int(sys.argv[4])

bootstrap = 0
iteration = 1
shuffle = True
drop = 0

#Check if this search has been performed previously. If so, abort.
if shuffle:
    if os.path.isfile('DBN Outputs/DAGs/DAGs_drop{drop}_bootstrap{bootstrap}_iteration{iteration}_shuffle.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration)):
        sys.exit(0)
else:
    if os.path.isfile('DBN Outputs/DAGs/DAGs_drop{drop}_bootstrap{bootstrap}_iteration{iteration}.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration)):
        sys.exit(0)

#Hard coded variables
timelags = 3
num_starting_points = 40

#Load spike table
spikes_df_all = pd.read_csv('Alec Dataframes/laminar2_spikes_drop_{drop}_iteration_{iteration}.csv'.format(drop=drop, iteration = iteration), index_col = 0)

#Sample data for the bootstrap, set seed equal to bootstrap
spikes_df = spikes_df_all.sample(8000,replace = True, random_state = bootstrap)

#Shuffle data after already selecting bootstrap
if shuffle:
    test, df_shufflekeys = EfficientShuffle(spikes_df,seed = round(time_ns()))

#Arrays for storing starting points, resulting DAGs, and scores
DAGs = np.zeros((num_starting_points,18,6))
starting_graphs = np.zeros((num_starting_points,18,6))
scores = np.zeros((num_starting_points))

all_pops = list(spikes_df_all.columns)
from_pops = [pop for pop in all_pops if not pop.endswith('t{}'.format(timelags))]
to_pops = [pop for pop in all_pops if pop.endswith('t{}'.format(timelags))]

# causal_whitelist = [(from_pop,to_pop) for from_pop in from_pops for to_pop in to_pops]
acausal_whitelist = [(from_pop, to_pop) for from_pop in all_pops for to_pop in to_pops] #Allow edges in the last time slice

#Scoring criteria
aic = AicScore(spikes_df)

for starting_point in range(num_starting_points):

    #Create Random Starting Point (seed not controlled to increase randomness, but starting graph is saved)
    np.random.seed(round(time()))
    random.seed(round(time()))
    starting_graph = generate_starting_graph(nodes = all_pops, whitelist = acausal_whitelist, max_degree = 4, last_time_nodes = to_pops)
    starting_graphs[starting_point,:,:] = graph_to_matrix_6pop_3timeslice(list(starting_graph.edges())) #Save the starting graph now, otherwise it appears that pgmpy modifies it in place to resemble the final graph

    #Perform hill search for DAG
    hc = HillClimbSearch(spikes_df)
    model = hc.estimate(tabu_length= 7, max_indegree=None, white_list = acausal_whitelist, scoring_method = AicScore(spikes_df), start_dag = starting_graph)

    #Save resulting DAG and score
    DAGs[starting_point,:,:] = graph_to_matrix_6pop_3timeslice(list(model.edges()))
    scores[starting_point] = aic.score(model)

#Save all results
if shuffle:
    np.save('DBN Outputs/DAGs/DAGs_drop{drop}_bootstrap{bootstrap}_iteration{iteration}_shuffle.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration), DAGs)
    np.save('DBN Outputs/Starting Graphs/startGraphs_drop{drop}_bootstrap{bootstrap}_iteration{iteration}_shuffle.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration), starting_graphs)
    np.save('DBN Outputs/Scores/scores_drop{drop}_bootstrap{bootstrap}_iteration{iteration}_shuffle.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration), scores)

else:
    np.save('DBN Outputs/DAGs/DAGs_drop{drop}_bootstrap{bootstrap}_iteration{iteration}.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration), DAGs)
    np.save('DBN Outputs/Starting Graphs/startGraphs_drop{drop}_bootstrap{bootstrap}_iteration{iteration}.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration), starting_graphs)
    np.save('DBN Outputs/Scores/scores_drop{drop}_bootstrap{bootstrap}_iteration{iteration}.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration), scores)

   

SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


# Anirban Search Code

In [45]:
import sys
import numpy as np
import pandas as pd
from time import time, time_ns
import os
import sys

import pgmpy
from AicScore import AicScore
from pgmpy.estimators import HillClimbSearch
from EfficientTimeShuffling import EfficientShuffle
import random

#Custom function
sys.path.append('/home/ags72/Documents/MTWDBN/Tools')
from GraphFunctions import graph_to_matrix_6pop_3timeslice, generate_starting_graph
from Step2FittingBayesianNetworkToData import HillClimbSearch5

#input parameters
bootstrap = 1
iteration = 1
shuffle = True
drop = 20

# bootstrap = int(sys.argv[1])
# iteration = int(sys.argv[2])
# shuffle = sys.argv[3] in ['True', 'true']
# drop = int(sys.argv[4])

#Check if this search has been performed previously. If so, abort.
if shuffle:
    if os.path.isfile('DBN Outputs/DAGs/DAGs_drop{drop}_bootstrap{bootstrap}_iteration{iteration}_shuffle.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration)):
        sys.exit(0)
else:
    if os.path.isfile('DBN Outputs/DAGs/DAGs_drop{drop}_bootstrap{bootstrap}_iteration{iteration}.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration)):
        sys.exit(0)

#Hard coded variables
timelags = 3
num_starting_points = 10


#Load spike table
if shuffle:
    spikes_df_all = pd.read_csv('Dataframes/laminar2_spikes_drop_{drop}_iteration_{iteration}_shuffle.csv'.format(drop=drop, iteration = iteration), index_col = 0)
else:
    spikes_df_all = pd.read_csv('Dataframes/laminar2_spikes_drop_{drop}_iteration_{iteration}.csv'.format(drop=drop, iteration = iteration), index_col = 0)

#Sample data for the bootstrap, set seed equal to bootstrap
spikes_df = spikes_df_all.sample(8000,replace = True, random_state = bootstrap)

#Arrays for storing starting points, resulting DAGs, and scores
DAGs = np.zeros((num_starting_points,18,6))
starting_graphs = np.zeros((num_starting_points,18,6))
scores = np.zeros((num_starting_points))

all_pops = list(spikes_df_all.columns)
from_pops = [pop for pop in all_pops if not pop.endswith('t{}'.format(timelags))]
to_pops = [pop for pop in all_pops if pop.endswith('t{}'.format(timelags))]

# causal_whitelist = [(from_pop,to_pop) for from_pop in from_pops for to_pop in to_pops]
acausal_whitelist = [(from_pop, to_pop) for from_pop in all_pops for to_pop in to_pops] #Allow edges in the last time slice

#Scoring criteria
aic = AicScore(spikes_df)


#Anirban HillClimbSearch Analysis
hc = HillClimbSearch5(spikes_df, scoring_method= aic)


#Create random starting points using Anirban's code
starter_graphs = []
while len(np.unique([seed[1] for seed in starter_graphs])) != num_starting_points: #This is to ensure that 120 unique graphs are generated by checking that the seed for each graph is unique
    starter_graphs = []
    for iSeed in range(num_starting_points):
        random.seed(time_ns())
        starter_graphs.append(hc.createRandLegalDag3(timelags, maxdeg = 4, seed = random.randint(1,2**32-1)))
print('{} unique starting points generated'.format(num_starting_points))


for idx,starting_point in enumerate(starter_graphs):

    
    starting_graphs[idx,:,:] = graph_to_matrix_6pop_3timeslice(list(starting_point[0].edges()))
    model = hc.RestrictedEstimatetl(timelags,start=starting_point[0], tabu_length= 7 , max_indegree=None, N=5)
    score = hc.scoring_method.score(model)

    #Save resulting DAG and score
    DAGs[idx,:,:] = graph_to_matrix_6pop_3timeslice(list(model.edges()))
    scores[idx] = score


# Save all results
if shuffle:
    np.save('DBN Outputs/DAGs/DAGs_drop{drop}_bootstrap{bootstrap}_iteration{iteration}_shuffle.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration), DAGs)
    np.save('DBN Outputs/Starting Graphs/startGraphs_drop{drop}_bootstrap{bootstrap}_iteration{iteration}_shuffle.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration), starting_graphs)
    np.save('DBN Outputs/Scores/scores_drop{drop}_bootstrap{bootstrap}_iteration{iteration}_shuffle.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration), scores)

   
else:
    np.save('DBN Outputs/DAGs/DAGs_drop{drop}_bootstrap{bootstrap}_iteration{iteration}.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration), DAGs)
    np.save('DBN Outputs/Starting Graphs/startGraphs_drop{drop}_bootstrap{bootstrap}_iteration{iteration}.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration), starting_graphs)
    np.save('DBN Outputs/Scores/scores_drop{drop}_bootstrap{bootstrap}_iteration{iteration}.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration), scores)

   

1 unique starting points generated


# Anirban Search Code on His Dataframes

In [69]:
import sys
import numpy as np
import pandas as pd
from time import time, time_ns
import os
import sys
import pickle

import pgmpy
from AicScore import AicScore
from pgmpy.estimators import HillClimbSearch
from EfficientTimeShuffling import EfficientShuffle
import random

#Custom function
sys.path.append('/home/ags72/Documents/MTWDBN/Tools')
from GraphFunctions import graph_to_matrix_6pop_3timeslice, generate_starting_graph
from Step2FittingBayesianNetworkToData import HillClimbSearch5

#input parameters
bootstrap = 1
shuffle = False
drop = 20

# bootstrap = int(sys.argv[1])
# shuffle = sys.argv[2] in ['True', 'true']
# drop = int(sys.argv[3])

# Check if this search has been performed previously. If so, abort.
if shuffle:
    if os.path.isfile('DBN Outputs/DAGs/DAGs_drop{drop}_bootstrap{bootstrap}_iteration{iteration}_shuffle.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration)):
        sys.exit(0)
else:
    if os.path.isfile('DBN Outputs/DAGs/DAGs_drop{drop}_bootstrap{bootstrap}_iteration{iteration}.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration)):
        sys.exit(0)

#Hard coded variables
timelags = 3
num_starting_points = 10
iteration = 1

#Load spike table
spikes_df_all = pd.read_csv('Anirban Dataframes/{drop}Laminar_Set2_bins_1667_timelayers_3_trials_1000_units_45_pooled__triallength_2000.csv'.format(drop=drop), index_col = 0)
spikes_df_all.reset_index(inplace =True, drop = True)

#Shuffle data if set
if shuffle:
    spikes_df_all, df_shufflekeys = EfficientShuffle(spikes_df_all,seed = round(time_ns()))


#Sample data for the bootstrap, set seed equal to bootstrap
spikes_df = spikes_df_all.sample(8000,replace = True, random_state = bootstrap)

new_pop_names = {'V_lyr1_spk2_t1': 'E1_t1',
                 'V_lyr1_spk1_t1': 'I1_t1',
                 'V_lyr2_spk2_t1': 'E2_t1',
                 'V_lyr2_spk1_t1': 'I2_t1',
                 'V_lyr3_spk2_t1': 'E3_t1',
                 'V_lyr3_spk1_t1': 'I3_t1',
                 
                 'V_lyr1_spk2_t2': 'E1_t2',
                 'V_lyr1_spk1_t2': 'I1_t2',
                 'V_lyr2_spk2_t2': 'E2_t2',
                 'V_lyr2_spk1_t2': 'I2_t2',
                 'V_lyr3_spk2_t2': 'E3_t2',
                 'V_lyr3_spk1_t2': 'I3_t2',
                 
                 'V_lyr1_spk2_t3': 'E1_t3',
                 'V_lyr1_spk1_t3': 'I1_t3',
                 'V_lyr2_spk2_t3': 'E2_t3',
                 'V_lyr2_spk1_t3': 'I2_t3',
                 'V_lyr3_spk2_t3': 'E3_t3',
                 'V_lyr3_spk1_t3': 'I3_t3'}

spikes_df.rename(columns = new_pop_names,inplace = True)

#Arrays for storing starting points, resulting DAGs, and scores
DAGs = np.zeros((num_starting_points,18,6))
starting_graphs = np.zeros((num_starting_points,18,6))
scores = np.zeros((num_starting_points))

all_pops = list(spikes_df.columns)
from_pops = [pop for pop in all_pops if not pop.endswith('t{}'.format(timelags))]
to_pops = [pop for pop in all_pops if pop.endswith('t{}'.format(timelags))]

# causal_whitelist = [(from_pop,to_pop) for from_pop in from_pops for to_pop in to_pops]
acausal_whitelist = [(from_pop, to_pop) for from_pop in all_pops for to_pop in to_pops] #Allow edges in the last time slice

#Scoring criteria
aic = AicScore(spikes_df)


#Anirban HillClimbSearch Analysis
hc = HillClimbSearch5(spikes_df, scoring_method= aic)


#Create random starting points using Anirban's code
starter_graphs = []
while len(np.unique([seed[1] for seed in starter_graphs])) != num_starting_points: #This is to ensure that 120 unique graphs are generated by checking that the seed for each graph is unique
    starter_graphs = []
    for iSeed in range(num_starting_points):
        random.seed(time_ns())
        starter_graphs.append(hc.createRandLegalDag3(timelags, maxdeg = 4, seed = random.randint(1,2**32-1)))
print('{} unique starting points generated'.format(num_starting_points))


for idx,starting_point in enumerate(starter_graphs):

    
    starting_graphs[idx,:,:] = graph_to_matrix_6pop_3timeslice(list(starting_point[0].edges()))
    model = hc.RestrictedEstimatetl(timelags,start=starting_point[0], tabu_length= 7 , max_indegree=None, N=5)
    score = hc.scoring_method.score(model)

    #Save resulting DAG and score
    DAGs[idx,:,:] = graph_to_matrix_6pop_3timeslice(list(model.edges()))
    scores[idx] = score


# Save all results
if shuffle:
    np.save('DBN Outputs/DAGs/DAGs_drop{drop}_bootstrap{bootstrap}_iteration{iteration}_shuffle.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration), DAGs)
    np.save('DBN Outputs/Starting Graphs/startGraphs_drop{drop}_bootstrap{bootstrap}_iteration{iteration}_shuffle.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration), starting_graphs)
    np.save('DBN Outputs/Scores/scores_drop{drop}_bootstrap{bootstrap}_iteration{iteration}_shuffle.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration), scores)

   
else:
    np.save('DBN Outputs/DAGs/DAGs_drop{drop}_bootstrap{bootstrap}_iteration{iteration}.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration), DAGs)
    np.save('DBN Outputs/Starting Graphs/startGraphs_drop{drop}_bootstrap{bootstrap}_iteration{iteration}.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration), starting_graphs)
    np.save('DBN Outputs/Scores/scores_drop{drop}_bootstrap{bootstrap}_iteration{iteration}.npy'.format(drop = drop, bootstrap = bootstrap, iteration = iteration), scores)

   

1 unique starting points generated


In [None]:
import numpy as np

