## Imports

In [None]:
from typing import List
from itertools import product
import warnings

from baynet import DAG, metrics

import tqdm.notebook as tq

import numpy as np
import pandas as pd

import networkx as nx
from cdt.causality import graph
from cdt.utils import dagify_min_edge

## Utility Functions

In [None]:
def _cdt_to_baynet(cpdag: nx.DiGraph, columns: List[str]) -> DAG:
    adj_matrix = np.array(nx.adj_matrix(cpdag).todense())
    graph = DAG.from_amat(adj_matrix, columns)
    return graph

def PC(data: pd.DataFrame, ci_test: str = "discrete") -> DAG:
    """
    Wrap of the CDT predict function of the PC algorithm. Return a learnt DAG from data.

    :param data: The data which the structure will be learnt with (Pandas.DataFrame)
    :param ci_test: The (str) argument specifying which Conditional Indep. test to use
    :return: A learnt DAG (BayNet DAG)
    """
    pc_alg = graph.PC(CItest=ci_test, verbose=False)
    cpdag = pc_alg.predict(data)
    dag = dagify_min_edge(cpdag)
    return _cdt_to_baynet(dag, list(data.columns))

def GES(data: pd.DataFrame, score: str = "int") -> DAG:
    """
    Wrap of the CDT predict function of the GES algorithm. Return a learnt DAG from data.

    :param data: The data which the structure will be learnt with (Pandas.DataFrame)
    :param score: The (str) argument specifying which score function to use
    :return: A learnt DAG (BayNet DAG)
    """
    ges_alg = graph.GES(score=score, verbose=False)
    cpdag = ges_alg.predict(data)
    dag = dagify_min_edge(cpdag)
    return _cdt_to_baynet(dag, list(data.columns))    

## Input

In [None]:
trials = list(range(1, 11)) # 1 -> 10
structure_types = ["forest_fire", "barabasi_albert", "watts_strogatz", "ide_cozman", "waxman"] 
variables = [40]
samples = [5000]
alphas = [6.0]
max_levels = [4]

algorithms = [GES, PC]

## Run Experiments

In [None]:
warnings.filterwarnings('ignore')
tq.tqdm._instances.clear()

columns = ["Trial", "Structure Type", "N_Variables",
           "N_Samples", "Alpha", "Max_Level",
           "Algorithm", "Skeleton Precision", "Skeleton Recall", 
           "V_Structure_Precision", "V_Structure_Recall"]
results = []

for trial, structure_type, variable, sample, alpha, max_level, algorithm in tq.tqdm(list(product(*[trials, structure_types, variables, samples, alphas, max_levels, algorithms]))):
    # ---------  Create Data ------------
    dag = DAG.generate(structure_type, variable)
    dag.generate_discrete_parameters(alpha=alpha, min_levels=2, max_levels=max_level, seed=trial)
    data = dag.sample(sample)
    # --------- Learn BN ---------------
    # Learn Structure
    learnt_dag = algorithm(data)
    # Learn Parameters
    learnt_dag.estimate_parameters(data, infer_levels=True)
    # -------- Calculate Metrics -------------
    # Skeleton
    precision = metrics.precision(dag, learnt_dag, skeleton=True)
    recall = metrics.recall(dag, learnt_dag, skeleton=True)
    # V Structures
    v_precision = metrics.v_precision(dag, learnt_dag)
    v_recall = metrics.v_recall(dag, learnt_dag)
    results.append([trial, structure_type, variable, sample, alpha, max_level, algorithm.__name__, precision, recall, v_precision, v_recall])

results_df = pd.DataFrame(results, columns=columns)
results_df.to_csv("results.csv")

In [None]:
results_df