This notebook is the belief network kitchen.

In [43]:
""" Packages we'll need. """
%load_ext autoreload
%autoreload 2
import numpy as np
import pyreadstat as prs
import os
import sys
sys.path.append('../src')
from clean_data_1 import transform_dataframe_1
from clean_data_2 import transform_dataframe_2
from make_belief_network import make_belief_network
from make_belief_network import make_conditional_belief_network
from get_basic_graph_info import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
""" Importing the GSS dataset. """

raw_df, meta = prs.read_sas7bdat("../dat/gss7222_r3.sas7bdat")

""" Cleaning the data. """
 
df, metadata = transform_dataframe_1(raw_df)    # df contains all our data, metadata contains some other random shit 

In [5]:
""" Setting the core replicating variables that we're interested in. """

variables = ["PARTYID","POLVIEWS","NATSPAC","NATENVIR","NATHEAL","NATCITY","NATCRIME","NATDRUG","NATEDUC","NATRACE","NATARMS",
"NATAID","NATFARE","NATROAD","NATSOC","NATMASS","NATPARK","NATCHLD","NATSCI","EQWLTH","SPKATH","COLATH","LIBATH","SPKRAC","COLRAC","LIBRAC","SPKCOM","COLCOM","LIBCOM","SPKMIL","COLMIL","LIBMIL","SPKHOMO",
"COLHOMO","LIBHOMO","SPKMSLM","COLMSLM","LIBMSLM","CAPPUN","GUNLAW","COURTS","GRASS","ATTEND","RELITEN","POSTLIFE","PRAYER","AFFRMACT","WRKWAYUP","HELPFUL",
"FAIR","TRUST","CONFINAN","CONBUS","CONCLERG","CONEDUC","CONFED","CONLABOR","CONPRESS","CONMEDIC","CONTV","CONJUDGE","CONSCI","CONLEGIS","CONARMY","GETAHEAD","FEPOL","ABDEFECT","ABNOMORE","ABHLTH","ABPOOR","ABRAPE","ABSINGLE","ABANY","SEXEDUC","DIVLAW","PREMARSX","TEENSEX","XMARSEX","HOMOSEX","PORNLAW",
"SPANKING","LETDIE1","SUICIDE1","SUICIDE2","POLHITOK","POLABUSE","POLMURDR","POLESCAP","POLATTAK","NEWS","TVHOURS","FECHLD","FEPRESCH","FEFAM","RACDIF1","RACDIF2","RACDIF3",
"RACDIF4","HELPPOOR","MARHOMO", "PRESLAST_NONCONFORM", "PRESLAST_DEMREP", "VOTELAST"]

In [79]:
""" Creating unconditioned belief networks. """

""" Belief networks are constructed for a given timeframe and set methodological parameters. """

# Timeframe - specify the start year and duration of the timeframe
start_year = 1972
duration = 2020 - start_year
timeframe = list(range(start_year, start_year+duration))

# Parameters
method = "spearman"     # method for calculating correlation
threshold = 0           # threshold for correlation
sample_threshold = 0    # threshold for sample size
regularisation = 0.2    # regularisation parameter for partial correlation

""" Note: for now, we keep the threshold and sample threshold at 0. 
    Regularisation can be set between around 1.5 and 2.5. """

BN, variables_list, correlation_matrix_partial = make_belief_network(df, variables, timeframe, method=method, is_partial=True, threshold=threshold, 
                                                                     sample_threshold=sample_threshold, regularisation=regularisation)

""" Print some basic information about the belief network. """
print_network_info(get_network_info(correlation_matrix_partial, variables_list))

""" Save the graphml, correlation matrix (csv), variables list (csv). """
save = False
if save:
    name = f"{start_year}-{start_year+duration}, R={regularisation}, Condition=None"
    output_dir = f"../out/belief networks/{name}"
    os.makedirs(output_dir, exist_ok=True)
    nx.write_graphml(BN, f"{output_dir}/graph_object.graphml", named_key_ids=True)
    np.savetxt(f"{output_dir}/correlation_matrix_partial.csv", correlation_matrix_partial, delimiter=",")
    np.savetxt(f"{output_dir}/variables_list.csv", variables_list, delimiter=",", fmt="%s")



NETWORK INFORMATION

Top 5 Nodes by Degree Centrality:
  1. HOMOSEX: 0.2871
  2. PREMARSX: 0.2277
  3. SUICIDE1: 0.1980
  4. ATTEND: 0.1881
  5. ABANY: 0.1782

Top 5 Nodes by Betweenness Centrality:
  1. HOMOSEX: 0.1706
  2. CONCLERG: 0.0969
  3. ATTEND: 0.0910
  4. RACDIF4: 0.0899
  5. POLVIEWS: 0.0893

Top 5 Nodes by Eigenvector Centrality:
  1. HOMOSEX: 0.3265
  2. PREMARSX: 0.2913
  3. SUICIDE1: 0.2683
  4. PORNLAW: 0.2499
  5. ABANY: 0.2390

Number of Components: 11

Basic Info:
  - Size:           102
  - Average Degree: 8.82

Strongest Correlations:
  1. PARTYID <--> PRESLAST_DEMREP (Strength: 0.4391)
  2. SPKHOMO <--> COLHOMO (Strength: 0.3684)
  3. ATTEND <--> RELITEN (Strength: 0.3321)
  4. SPKMSLM <--> COLMSLM (Strength: 0.3251)
  5. LETDIE1 <--> SUICIDE1 (Strength: 0.3033)

Global Network Properties:
  - Average Path Length:     inf
  - Clustering Coefficient:  0.50
  - Network Diameter:        inf



In [87]:
""" Creating conditioned belief networks. """

# Timeframe - specify the start year and duration of the timeframe
start_year = 2000
duration = 4
timeframe = list(range(start_year, start_year+duration))

# Conditioning - specify a list of variables to condition on and a list of corresponding values
conditioning = "PARTYID"
contidion_method = "negpos" # "negpos" (bins the variable to negative or positive and creates two BNs) or "unique" (ceates a BN for each unique value of the variable)

# Parameters
method = "spearman"     # method for calculating correlation
threshold = 0           # threshold for correlation
sample_threshold = 0    # threshold for sample size
regularisation = 0.2    # regularisation parameter for partial correlation


conditioned_BN, conditioned_variables_list, conditioned_correlation_matrix_partial = make_conditional_belief_network(conditioning, df, condition_method=contidion_method, variables_of_interest=variables, 
                                                                                 years_of_interest=timeframe, method=method, is_partial=True, threshold=threshold, 
                                                                                 sample_threshold=sample_threshold, regularisation=regularisation)



{'graph': <networkx.classes.graph.Graph object at 0x000001CD8C1BC590>, 'vars': ['PARTYID', 'POLVIEWS', 'NATSPAC', 'NATENVIR', 'NATHEAL', 'NATCITY', 'NATCRIME', 'NATDRUG', 'NATEDUC', 'NATRACE', 'NATARMS', 'NATAID', 'NATFARE', 'NATROAD', 'NATSOC', 'NATMASS', 'NATPARK', 'NATCHLD', 'NATSCI', 'EQWLTH', 'SPKATH', 'COLATH', 'LIBATH', 'SPKRAC', 'COLRAC', 'LIBRAC', 'SPKCOM', 'COLCOM', 'LIBCOM', 'SPKMIL', 'COLMIL', 'LIBMIL', 'SPKHOMO', 'COLHOMO', 'LIBHOMO', 'CAPPUN', 'GUNLAW', 'COURTS', 'GRASS', 'ATTEND', 'RELITEN', 'POSTLIFE', 'PRAYER', 'AFFRMACT', 'WRKWAYUP', 'HELPFUL', 'FAIR', 'TRUST', 'CONFINAN', 'CONBUS', 'CONCLERG', 'CONEDUC', 'CONFED', 'CONLABOR', 'CONPRESS', 'CONMEDIC', 'CONTV', 'CONJUDGE', 'CONSCI', 'CONLEGIS', 'CONARMY', 'GETAHEAD', 'FEPOL', 'ABDEFECT', 'ABNOMORE', 'ABHLTH', 'ABPOOR', 'ABRAPE', 'ABSINGLE', 'ABANY', 'SEXEDUC', 'DIVLAW', 'PREMARSX', 'TEENSEX', 'XMARSEX', 'HOMOSEX', 'PORNLAW', 'SPANKING', 'LETDIE1', 'SUICIDE1', 'SUICIDE2', 'POLHITOK', 'POLABUSE', 'POLMURDR', 'POLESCAP', '

KeyError: 2