In [10]:
## Hypothesis generator for the GSS Data:
from hypothesis import Hypothesis

import os
from quasinet import qnet
from cognet.cognet import cognet as cg
from cognet.dataFormatter import dataFormatter
from cognet.model import model 
from IPython.display import Image
from IPython.core.display import HTML

yr = '2018'
GSSDATA = 'csv_files/gss_'+yr+'.csv'
qnet_dir = 'qnets/'
outdirname = 'dotfiles/'
qnet_path = qnet_dir + 'gss_'+yr+'.joblib'

In [12]:
## read Qnet file and load it into Qnet model. If it doesn't exist, fit data into model and generate file.

data_obj=dataFormatter(GSSDATA)
model_obj = model()
if os.path.exists(qnet_path):
    model_obj.load(qnet_path)
else:
    model_obj.fit(data_obj = data_obj)
    model_obj.save(qnet_path)

In [13]:
cognet_obj = cg()


cognet_obj.load_from_model(model_obj, data_obj, 'all')
cognet_obj.dmat_filewriter("GSS_cognet.py", qnet_path,
                           MPI_SETUP_FILE="GSS_mpi_setup.sh",
                           MPI_RUN_FILE="GSS_mpi_run.sh",
                           MPI_LAUNCHER_FILE="GSS_mpi_launcher.sh",
                           YEARS='2018',NODES=4,T=14)
cognet_obj.samples


Unnamed: 0,wrkstat,HRS1,HRS2,evwork,wrkslf,wrkgovt,OCC10,PRESTG10,PRESTG105PLUS,INDUS10,...,neisafe,rlooks,rgroomed,rweight,rhlthend,wtss,wtssnr,wtssall,vstrat,vpsu
0,temp not working,e,c,,someone else,private,b,c,c,c,...,very safe,,,,,e,e,e,3301,1
1,working fulltime,c,e,,someone else,private,b,d,d,c,...,very safe,,,,,c,c,c,3301,1
2,working fulltime,c,e,,someone else,private,c,d,d,c,...,very safe,attractive,about average,about the right weight,,c,c,c,3301,1
3,working fulltime,e,e,,someone else,private,c,c,c,c,...,somewhat safe,about average,about average,slightly underweight,excellent,c,c,c,3301,1
4,working fulltime,c,e,,self-employed,private,c,c,c,c,...,very unsafe,very unattractive,very poorly groomed,very overweight,poor,c,c,c,3301,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1779,retired,e,e,yes,someone else,private,d,c,c,c,...,very safe,,,,,c,c,c,3378,2
1780,school,e,e,no,,,e,e,e,e,...,somewhat safe,about average,about average,slightly underweight,excellent,d,d,d,3378,2
1781,working fulltime,c,e,,someone else,government,c,c,c,c,...,very safe,,,,,c,c,c,3378,2
1782,working parttime,c,e,,someone else,private,c,b,b,c,...,very safe,about average,about average,slightly overweight,excellent,c,c,c,3378,2


In [None]:
## Generating tree files

model_obj.save(qnet_path)
qnet_model = qnet.load_qnet(qnet_path)

if not os.path.exists(outdirname):
    os.mkdir(outdirname)
    
for idx, feature_name in enumerate(qnet_model.feature_names):
    qnet.export_qnet_tree(qnet_model, idx, os.path.join(outdirname, '{}.dot'.format(feature_name)), 
                          outformat = "graphviz", detailed_output = True)

! cd 'dotfiles'; for i in `ls *dot`; do dot -Tpng $i -o ${i/dot/png}; done >& /dev/null

In [9]:
! ls -lhS dotfiles/*png | head -n 3
## Viewing the generated tree file
Image(filename= outdirname + 'abmoral.png')

ls: cannot access dotfiles/*png: No such file or directory


FileNotFoundError: [Errno 2] No such file or directory: 'dotfiles/abmoral.png'

In [7]:
## Generating Hypothesis

hypothesis = Hypothesis(model_path = qnet_path, detailed_labels = True,
                       no_self_loops = False)

#hypothesis.to_dot('hypothesis_GSS.dot')
hypothesis.hypotheses.sort_values('src')


Unnamed: 0,src,tgt,time_tgt,lomar,pvalue
