In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
from IPython.display import display
import sys
import os
import logging

sys.path.insert(0,'../src')
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 100

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

logger = logging.getLogger("feat_viz")
logging.basicConfig(format='[%(name)s %(levelname)s] %(message)s', level=logging.INFO)

## Load the processed data

In [4]:
from main_methods import run_procedure, run_unsupervised, evaluate_result
from liver_info import load_processsed_hepatocyte_data, load_zonation_result, output_processed_data

dat_dir = "/Users/jjzhu/Google Drive/_GLISS/data/liver2k"
zone_vals = load_zonation_result(dat_dir, just_vals=True)
main_dir = os.path.join(dat_dir, "analysis_on_data_original")
x_k, x_d, obs_df, var_df = load_processsed_hepatocyte_data(main_dir, center=False, scale=False)
RDIR = "/Users/jjzhu/Google Drive/_GLISS/data/analysis_050719"

output_processed_data(main_dir, os.path.join(RDIR, "hepa_data"))

params = {
    "method": None,
    "n_perms": 10000,
    "perm_method": "pool",
    "alpha": 0.05, 
    "graph_k": 10,
}

[feat_viz INFO] Data directory: /Users/jjzhu/Google Drive/_GLISS/data/liver2k/analysis_on_data_original/data
[feat_viz INFO] Loaded data: x (1415, 6) and y (1415, 8883)
[feat_viz INFO] Saved outputs to: /Users/jjzhu/Google Drive/_GLISS/data/analysis_050719/hepa_data/data


### Run purely unsupervised methods

In [5]:
for method in ["graph", "pc"]:
    params["method"] = method
    name = "unsup_{}".format(method)
    fn =  "lam_{}_unsupervised.plk".format(method)
    fn = os.path.join(RDIR, fn)
    result = run_unsupervised(x_k, x_d, params, fn=fn)

[feat_viz INFO] Saving results to: /Users/jjzhu/Google Drive/_GLISS/data/analysis_050719/lam_graph_unsupervised.plk
[feat_viz INFO] Saving results to: /Users/jjzhu/Google Drive/_GLISS/data/analysis_050719/lam_pc_unsupervised.plk


### Run our prescribed procedures and variations

In [None]:
for method in ["pc", "graph", "hybrid"]:
    for mtype in ["vanilla", "oracle"]:
        if mtype == "oracle":
            lam_in = zone_vals
        else:
            lam_in = None
        params["method"] = method
        fn =  "result_{}_{}.plk".format(method, mtype)
        fn = os.path.join(RDIR, fn)
        result = run_procedure(x_k, x_d, params, lam_in=lam_in, fn=fn)
        evaluate_result(result, lam_ref=zone_vals)

[feat_viz INFO] Running pc-based procedure
[feat_viz INFO] Inferred initial latent variables
[feat_viz INFO] Selecting pc-based features...
[feat_viz INFO] Updated latent variables...
[feat_viz INFO] Saving results to: /Users/jjzhu/Google Drive/_GLISS/data/analysis_050719/result_pc_vanilla.plk
[feat_viz INFO] Number of selected variables: 1615
[feat_viz INFO] Correlation: 0.90132 -> 0.77951
[feat_viz INFO] Using pre-computed latent variable
[feat_viz INFO] Selecting pc-based features...
[feat_viz INFO] Updated latent variables...
