# Reading .bif Files
The `.bif` files (Bayesian Interchange Format) used in this notebook are discrete Bayesian network models sourced from the [bnlearn repository](https://www.bnlearn.com/bnrepository/).

In [1]:
import bnlearn as bn
# Name: pgmpy
# Version: 0.1.19
# Name: bnlearn
# Version: 0.10.2

# your .bif file path, modify according to the file path
DAG_name = 'asia'
bif_file = f'data_bif/{DAG_name}.bif'
# use bnlearn to import the DAG from the .bif file
model = bn.import_DAG(bif_file,verbose=0)
adj_matrix = model['adjmat']   # DataFrame, Adjacency matrix
node_list = model['model'].nodes()  # List, node list

# plot the DAG
# bn.plot(model)

In [None]:
print("Nodes in the Bayesian Network:")
print(f"Node List: {list(model['model'].nodes())}\n")

print("Edges in the Bayesian Network:")
print(f"Edge List: {list(model['model'].edges())}\n")

print("Adjacency Matrix Information:")
print(f"Type: {type(adj_matrix)}")
print(f"Shape: {adj_matrix.shape}")
print(f"Adjacency Matrix:\n{adj_matrix}")

"""
Use the bnlearn package to sample data from the DAG.
"""
num_samples = 100
data_sample = bn.sampling(model, n=num_samples,verbose=0)
print(data_sample.head())

Nodes in the Bayesian Network:
Node List: ['asia', 'tub', 'smoke', 'lung', 'bronc', 'either', 'xray', 'dysp']

Edges in the Bayesian Network:
Edge List: [('asia', 'tub'), ('tub', 'either'), ('smoke', 'lung'), ('smoke', 'bronc'), ('lung', 'either'), ('bronc', 'dysp'), ('either', 'xray'), ('either', 'dysp')]

Adjacency Matrix Information:
Type: <class 'pandas.core.frame.DataFrame'>
Shape: (8, 8)
Adjacency Matrix:
target   asia    tub  smoke   lung  bronc  either   xray   dysp
source                                                         
asia    False   True  False  False  False   False  False  False
tub     False  False  False  False  False    True  False  False
smoke   False  False  False   True   True   False  False  False
lung    False  False  False  False  False    True  False  False
bronc   False  False  False  False  False   False  False   True
either  False  False  False  False  False   False   True   True
xray    False  False  False  False  False   False  False  False
dysp    F

# Reordering the Adjacency Matrix
The adjacency matrix is rearranged based on the topological order of the nodes.

In [4]:
import networkx as nx
import pandas as pd

# create the nx.DiGraph object base on the above DAG adjacency matrix and node list
DAG = nx.DiGraph()
DAG.add_nodes_from(node_list)  
for source in node_list:
    for target in node_list:
        if adj_matrix.loc[source, target] == True:
            DAG.add_edge(source, target)

# get the topological order of the DAG
topological_order = list(nx.topological_sort(DAG))
print(f'topological_order:{topological_order}')

print(f"adj_matrix:\n {adj_matrix},\n type(adj_matrix):{type(adj_matrix)}")

# rearrange the adjacency matrix according to the topological order
order_adjacency_matrix = adj_matrix.loc[topological_order, topological_order]
print(f"order_adjacency_matrix:\n {order_adjacency_matrix},\n type(order_adjacency_matrix):{type(order_adjacency_matrix)}")


topological_order:['asia', 'smoke', 'tub', 'lung', 'bronc', 'either', 'xray', 'dysp']
adj_matrix:
 target   asia    tub  smoke   lung  bronc  either   xray   dysp
source                                                         
asia    False   True  False  False  False   False  False  False
tub     False  False  False  False  False    True  False  False
smoke   False  False  False   True   True   False  False  False
lung    False  False  False  False  False    True  False  False
bronc   False  False  False  False  False   False  False   True
either  False  False  False  False  False   False   True   True
xray    False  False  False  False  False   False  False  False
dysp    False  False  False  False  False   False  False  False,
 type(adj_matrix):<class 'pandas.core.frame.DataFrame'>
order_adjacency_matrix:
 target   asia  smoke    tub   lung  bronc  either   xray   dysp
source                                                         
asia    False  False   True  False  False   False  

# Saving and Loading the Results
The reordered adjacency matrix is saved as a CSV file for further use. The saved file is then reloaded to verify its contents.

In [5]:
import os
# save the reordered adjacency matrix to a CSV file
save_path = os.path.join(r'data_bif', f'{DAG_name}_graph.csv')
order_adjacency_matrix.to_csv(save_path, index=True, header=True) # index=True indicates to save the index(line names), header=True indicates to save the column names

# read the CSV file and print the DataFrame
load_path = os.path.join(r'data_bif', f'{DAG_name}_graph.csv')
df = pd.read_csv(load_path, index_col=0)  # index_col=0 indicates the first column is the index
# print(df)