In [None]:
from __future__ import print_function

import os, sys
#os.environ['KERAS_BACKEND'] = 'tensorflow'

from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
from sklearn.model_selection import train_test_split
from scipy.ndimage.interpolation import rotate
from numpy import unique
from numpy import random 
from glob import glob
import numpy as np
import pandas as pd
import json
from math import ceil
from collections import defaultdict
import time
from random import shuffle
import io
import zlib
import scipy
import pickle
from datetime import datetime
from sklearn.externals import joblib
import re
from collections import Counter
import smtplib
from sklearn import metrics

from sklearn.preprocessing import MinMaxScaler
import random


def isNaN(num):
    return num != num

def threshlab(x, thresh=None):
    out = 0
    try:
        if float(x)*thresh[1] >= thresh[0]*thresh[1]:
            out = 1
    except:
        pass
    return out

def dfinterpolate(df, maxlen):
    
    if len(df) < maxlen:
        count = 0
        shape = df.shape
        for i in range(shape[0]):
            for j in range(int(np.ceil(float(maxlen)/float(shape[0])))):
                if count == 0:
                    df_out = pd.DataFrame(df.iloc[i]).T
                    count+=1
                else:
                    df_out = df_out.append(pd.DataFrame(df.iloc[i]).T)
                if len(df_out) == maxlen:
                    break

        df_out = df_out.reset_index(drop=True)
        df_out.iloc[:maxlen]
    else:
        df_out = df
    
    return df_out

def hour_delta(time_init, time_end):
    tdelta = time_end - time_init
    return tdelta.total_seconds()/3600

def extract_num_df(df):
    for col in df.columns.tolist():
        try:
            df[col]=pd.to_numeric(df[col], errors='coerce')
        except:
            pass
    return df

def balanced_sample_maker(X, y, random_seed=None):
    """ return a balanced data set by oversampling minority class 
        current version is developed on assumption that the positive
        class is the minority.

    Parameters:
    ===========
    X: {numpy.ndarrray}
    y: {numpy.ndarray}
    """
    uniq_levels = unique(y)
    uniq_counts = {level: sum(y == level) for level in uniq_levels}

    if not random_seed is None:
        random.seed(random_seed)

    # find observation index of each class levels
    groupby_levels = {}
    for ii, level in enumerate(uniq_levels):
        obs_idx = [idx for idx, val in enumerate(y) if val == level]
        groupby_levels[level] = obs_idx

    # oversampling on observations of positive label
    sample_size = uniq_counts[0]
    over_sample_idx = random.choice(groupby_levels[1], size=sample_size, replace=True).tolist()
    balanced_copy_idx = groupby_levels[0] + over_sample_idx
    random.shuffle(balanced_copy_idx)

    return [X[i] for i in balanced_copy_idx], [y[i] for i in balanced_copy_idx]


def compute_multilabel_class_weights(Y, cptcodes = None):
    weights_zero = len(Y)/(len(Y)-np.sum(Y, axis=0))
    weights_one = len(Y)/np.sum(Y, axis=0)

    class_weights = []
    for i in range(len(cptcodes)):
        a = weights_zero[i]
        b = weights_one[i]

        if a == float('Inf'):
            a = 1
        if b == float('Inf'):
            b = 1
        class_weights.append({0:a,1:b})
        
    return class_weights

def getlabelsfromicd(ICDs, diagnosis_df):
    MRN = []
    for i in range(len(diagnosis_df)):
        intersection = list(set(ICDs) & set(diagnosis_df.iloc[i]))
        if len(intersection) > 0:
            MRN.append(diagnosis_df['MRN'].iloc[i])
        else:
            pass
    return MRN

def comorbiditiesfromicd(ICDs, diagnosis_df):
    label = []
    for list_ in ICDs:
        intersection = list(set(list_) & set(diagnosis_df))
        if len(intersection) > 0:
            label.append(1)
        else:
            label.append(0)
    return label

def ffillarr(arr):
    try:
        mask = np.isnan(arr)
        idx = np.where(~mask, np.arange(mask.shape[0]), 0)
        np.maximum.accumulate(idx, axis=0, out=idx)
        out = arr[idx]
    except Exception as e:
        print(e)
        out = arr
    return out


def get_rox(mrn, pt_data = None, data = None):
    X, X_c, Y = pt_data[mrn]
    rox_pred = []
    for i in range(X.shape[0]):
        o2sat = X[i,:,5]   
        rr = X[i,:,13]
        hrs = X[i,:,0]


        pt_df = dfinterpolate(data[['hrs','INHALED O2 CONCENTRATION','CAC - R FIO2 OXYGEN']][(data['MRN']=='E795792') & (data['hrs'].between(hrs[0],hrs[-1]))].ffill(axis = 0).bfill(axis ='rows'), maxlen=50)
        fio2_1 = np.array(pt_df['INHALED O2 CONCENTRATION'])
        fio2_2 = np.array(pt_df['CAC - R FIO2 OXYGEN'])

        if np.isnan(fio2_1).sum()/len(fio2_1) > np.isnan(fio2_2).sum()/len(fio2_2):
            fio2 = fio2_2

        elif np.isnan(fio2_1).sum()/len(fio2_1) < np.isnan(fio2_2).sum()/len(fio2_2):
            fio2 = fio2_1

        elif (np.isnan(fio2_1).sum()/len(fio2_1) == 1) and (np.isnan(fio2_2).sum()/len(fio2_2) == 1):
            fio2 = np.ones((1,50))*21

        elif (np.isnan(fio2_1).sum()/len(fio2_1) == 0) and (np.isnan(fio2_2).sum()/len(fio2_2) == 0):
            fio2 = fio2_1

        rox = 100*(o2sat/fio2)/rr
        rox[rox == float("inf")] = np.nan
        rox_pred.append(rox[-1])
    return ffillarr(np.array(rox_pred))

import datetime
def getventhrs(demo):
    timestr = '04_17_2020'
    venttime = []
    for i in range(len(demo)):
        try:
            if demo['intubation_label'].iloc[i] == 1:
                starttime =datetime.datetime.strptime(demo['Intubation_Time'].iloc[i], '%Y-%m-%d %H:%M:%S.%f')
                try:
                    endtime = datetime.datetime.strptime(demo['Extubation_Time'].iloc[i], '%Y-%m-%d %H:%M:%S.%f')
                except  Exception as e:
                    try:
                        endtime = datetime.datetime.strptime(demo['Death_time'].iloc[i], '%Y-%m-%d %H:%M:%S.%f')
                        endtime = datetime.datetime.strptime(demo['Discharge_Date'].iloc[i], '%Y-%m-%d %H:%M:%S.%f')
                    except:
                        endtime = timestr+' 04:00:00.000'
                        endtime = datetime.datetime.strptime(endtime, '%Y-%m-%d %H:%M:%S.%f')
                        
                venttime.append(hour_delta(starttime, endtime))
            else:
                venttime.append(np.nan)
        except:
            venttime.append(np.nan)

    demo['Intubation_Hours'] = venttime
    return demo

def LOS(demo):
    timestr = '04_17_2020'
    venttime = []
    for i in range(len(demo)):
        try:            
            starttime =datetime.datetime.strptime(demo['First_Admit_Date'].iloc[i], '%Y-%m-%d %H:%M:%S.%f')
            if type(demo['Discharge_Date'].iloc[i]) == str:
                try:
                    if '.' in demo['Discharge_Date'].iloc[i]:
                        endtime = datetime.datetime.strptime(demo['Discharge_Date'].iloc[i], '%Y-%m-%d %H:%M:%S.%f')
                    else:
                        endtime = datetime.datetime.strptime(demo['Discharge_Date'].iloc[i], '%Y-%m-%d %H:%M:%S')
                except Exception as e:
                    pass
                
            elif type(demo['Death_time'].iloc[i]) == str:
                if '.' in demo['Death_time'].iloc[i]:
                    endtime = datetime.datetime.strptime(demo['Death_time'].iloc[i], '%Y-%m-%d %H:%M:%S.%f')
                else:
                    endtime = datetime.datetime.strptime(demo['Death_time'].iloc[i], '%Y-%m-%d %H:%M:%S')
                    
            elif type(demo['DECEASED_DATE'].iloc[i]) == str:
                if '.' in demo['DECEASED_DATE'].iloc[i]:
                    endtime = datetime.datetime.strptime(demo['DECEASED_DATE'].iloc[i], '%d-%b-%Y %H:%M:%S %p')
                else:
                    endtime = datetime.datetime.strptime(demo['DECEASED_DATE'].iloc[i], '%d-%b-%Y %H:%M:%S %p')

            else:
                endtime = timestr+' 04:00:00.000'
                endtime = datetime.datetime.strptime(endtime, '%Y-%m-%d %H:%M:%S.%f')
                
            venthrs = hour_delta(starttime, endtime)
            if venthrs < 0:
                venthrs = 0
                
            venttime.append(venthrs)
        except Exception as e:
            venttime.append(np.nan)

    demo['LOS_hours'] = venttime
    return demo

mapping = {
     "congestive_heart_failure":["I099","I110","I130","I132","I255","I420","I425","I426","I427","I428","I429","I43","I50","P290"],
     "cardiac_arrhythmia":["I441","I442","I443","I456","I459","I47","I48","I49","R000","R001","R008","T821","Z450","Z950"],
     "valvular_disease":["A520","I05","I06","I07","I08","I091","I098","I34","I35","I36","I37","I38","I39","Q230","Q231","Q232","Q233","Z952","Z953","Z954"],
     "pulmonary_circulation_disorder":["I26","I27","I280","I288","I289"],
     "peripheral vascular_disorder":["I70","I71","I731","I738","I739","I771","I790","I792","K551","K558","K559","Z958","Z959"],
     "hypertension_uncomplicated":["I10"],
     "hypertension_complicated":["I11","I12","I13","I15"],
     "paralysis":["G041","G114","G801","G802","G81","G82","G830","G831","G832","G833","G834","G839"],
     "other_neurological_disorder":["G10","G11","G12","G13","G20","G21","G22","G254","G255","G312","G318","G319","G32","G35","G36","G37","G40","G41","G931","G934","R470","R56"],
     "chronic_pulmonary_disease":["I278","I279","J40","J41","J42","J43","J44","J45","J46","J47","J60","J61","J62","J63","J64","J65","J66","J67","J684","J701","J703"],
     "diabetes_uncomplicated":["E100","E101","E109","E110","E111","E119","E120","E121","E129","E130","E131","E139","E140","E141","E149"],
     "diabetes_complicated":["E102","E103","E104","E105","E106","E107","E108","E112","E113","E114","E115","E116","E117","E118","E122","E123","E124","E125","E126","E127","E128","E132","E133","E134","E135","E136","E137","E138","E142","E143","E144","E145","E146","E147","E148"],
     "hypothyroidism":["E00","E01","E02","E03","E890"],
     "renal_failure":["I120","I131","N18","N19","N250","Z490","Z491","Z492","Z940","Z992"],
     "liver_disease":["B18","I85","I864","I982","K70","K711","K713","K714","K715","K717","K72","K73","K74","K760","K762","K763","K764","K765","K766","K767","K768","K769","Z944"],
     "peptic_ulcer_disease_excluding_bleeding":["K257","K259","K267","K269","K277","K279","K287","K289"],
     "aids_hiv":["B20","B21","B22","B24"],
     "lymphoma":["C81","C82","C83","C84","C85","C88","C96","C900","C902"],
     "metastatic_cancer":["C77","C78","C79","C80"],
     "solid_tumor_wo_metastasis":["C00","C01","C02","C03","C04","C05","C06","C07","C08","C09","C10","C11","C12","C13","C14","C15","C16","C17","C18","C19","C20","C21","C22","C23","C24","C25","C26","C30","C31","C32","C33","C34","C37","C38","C39","C40","C41","C43","C45","C46","C47","C48","C49","C50","C51","C52","C53","C54","C55","C56","C57","C58","C60","C61","C62","C63","C64","C65","C66","C67","C68","C69","C70","C71","C72","C73","C74","C75","C76","C97"],
     "rheumatoid_arhritis":["L940","L941","L943","M05","M06","M08","M120","M123","M30","M310","M311","M312","M313","M32","M33","M34","M35","M45","M461","M468","M469"],
     "coagulopathy":["D65","D66","D67","D68","D691","D693","D694","D695","D696"],
     "obesity":["E66"],
     "weight_loss":["E40","E41","E42","E43","E44","E45","E46","R634","R64"],
     "fluid_and_electrolyte_disorders":["E222","E86","E87"],
     "blood_loss_anemia":["D500"],
     "deficiency_anemia":["D508","D509","D51","D52","D53"],
     "alcohol_abuse":["F10","E52","G621","I426","K292","K700","K703","K709","T51","Z502","Z714","Z721"],
     "drug_abuse":["F11","F12","F13","F14","F15","F16","F18","F19","Z715","Z722"],
     "psychoses":["F20","F22","F23","F24","F25","F28","F29","F302","F312","F315"],
     "depression":["F204","F313","F314","F315","F32","F33","F341","F412","F432"]
}

dx_cols = ['DX_0', 'DX_1', 'DX_2', 'DX_3', 'DX_4', 'DX_5', 'DX_6', 'DX_7', 'DX_8', 'DX_9', 'DX_10', 'DX_11', 'DX_12', 'DX_13', 'DX_14', 'DX_15', 'DX_16', 'DX_17', 'DX_18', 'DX_19', 'DX_20', 'DX_21', 'DX_22', 'DX_23', 'DX_24', 'DX_25', 'DX_26', 'DX_27', 'DX_28', 'DX_29', 'DX_30', 'DX_31', 'DX_32', 'DX_33', 'DX_34', 'DX_35', 'DX_36', 'DX_37', 'DX_38', 'DX_39', 'DX_40', 'DX_41', 'DX_42', 'DX_43', 'DX_44', 'DX_45', 'DX_46', 'DX_47', 'DX_48', 'DX_49', 'DX_50', 'DX_51', 'DX_52', 'DX_53', 'DX_54', 'DX_55', 'DX_56', 'DX_57', 'DX_58', 'DX_59', 'DX_60', 'DX_61', 'DX_62', 'DX_63', 'DX_64', 'DX_65', 'DX_66', 'DX_67', 'DX_68', 'DX_69', 'DX_70', 'DX_71', 'DX_72', 'DX_73', 'DX_74', 'DX_75', 'DX_76', 'DX_77', 'DX_78', 'DX_79', 'DX_80', 'DX_81', 'DX_82', 'DX_83', 'DX_84', 'DX_85', 'DX_86', 'DX_87', 'DX_88', 'DX_89', 'DX_90', 'DX_91', 'DX_92', 'DX_93', 'DX_94', 'DX_95', 'DX_96', 'DX_97', 'DX_98', 'DX_99']

In [None]:
def create_dataset(maxlen = None, window = None, forecast_time = None, data_path = None, demographics_data_path = None, diagnoses_data_path = None, train = True):
#     all_vars = ['systolic','diastolic','CAC - TEMPERATURE','CAC - PULSE','CAC - RESPIRATIONS','hrs', 'PO2 - VEN', 'PCO2 - VEN', 'O2 SATURATION - VEN', 'HCO3 - VEN']
    all_vars = ['hrs', 'PAO2', 'PACO2', 'HCO3', 'PH_x', 'Oxygen saturation','C REACTIVE PROTEIN', 'Creatinine', 'D-DIMER', 'Platelets', 'WBC_x', 'CAC - TEMPERATURE', 'CAC - PULSE', 'CAC - RESPIRATIONS', 'systolic', 'diastolic']
    
    print('Loading data')
    print(data_path)
    print(demographics_data_path)
    print(diagnoses_data_path)

    train_data = pd.read_csv(data_path)
    dx_df = pd.read_csv(diagnoses_data_path)
    demographics = pd.read_csv(demographics_data_path,encoding = "ISO-8859-1")
    train_ids = list(set(train_data['MRN'].tolist()))
    pt_data_dict = defaultdict()
    
    if train == True:
        means = extract_num_df(train_data[all_vars]).mean()
        pickle.dump( means, open( "means.p", "wb" ) )
    if train == False:
        means = pickle.load( open( "means.p", "rb" ) )

    train_data[all_vars] = extract_num_df(train_data[all_vars])
    train_data[all_vars] = train_data[all_vars].fillna(means)
    train_ids = [x for x in train_ids if x in train_data['MRN'].tolist()]
    train_ids = [x for x in train_ids if type(x) == str]

    print('Data loaded.')  
    print(' Total number of total visits: {}'.format(len(train_ids)))

    ################## Fit Min/Max Scaler#################
#     from sklearn.preprocessing import MinMaxScaler
#     scaler = MinMaxScaler()
#     scaler.fit(train_data[all_vars])

#     scaler_filename = "scaler.save"
#     joblib.dump(scaler, scaler_filename, protocol=2) 

    ################## FORMAT VAL DATA#################

    ###Include lists for comorbidities via ICD here###
    copd = ['I27.8', 'J40','J41','J41.1','J41.8','J42','J43','J43.0','J43.1','J43.2','J43.8','J43.9','J44','J44.0','J44.0','J44.1','J44.8','J44.9','J45','J45.0','J45.1','J45.8','J45.9','J46','J47','J60','J61','J62','J62.0','J62.8','J63','J63.0','J63.1','J63.2','J63.3','J63.4','J63.5','J63.8','J64','J65','J66','J66.0','J66.1','J66.2','J66.8','J67','J87.0','J67.1','J67.2','J67.3','J67.4','J67.5','J67.6','J67.7','J67.8','J67.9','J68.4','J70.1','J70.3']
    dm = ['E10.0', 'E10.1', 'E10.6', 'E10.8', 'E10.9', 'E11.0', 'E11.1', 'E11.6', 'E11.8', 'E11.9', 'E12.0', 'E12.1', 'E12.6', 'E12.8', 'E12.9', 'E13.0', 'E13.1', 'E13.6', 'E13.8', 'E13.9', 'E14.0', 'E14.1', 'E14.6', 'E14.8', 'E14.9']
    dmwc = ['E10.2', 'E10.3', 'E10.4', 'E10.5', 'E10.7', 'E11.2', 'E11.3', 'E11.4', 'E11.5', 'E11.7', 'E12.2', 'E12.3', 'E12.4', 'E12.5', 'E12.7', 'E13.2', 'E13.3', 'E13.4', 'E13.5', 'E13.7', 'E14.2', 'E14.3', 'E14.4', 'E14.5', 'E14.7']
    rd = ['I12.0', 'I13.1', 'N03.2', 'N03.3', 'N03.4', 'N03.5', 'N03.6', 'N03.7', 'N05.2', 'N05.3', 'N05.4', 'N05.5', 'N05.6', 'N05.7', 'N18', 'N18.0', 'N18.8', 'N18.9', 'N19', 'N25.0', 'Z49.0', 'Z49.1', 'Z49.2', 'Z94.0', 'Z99.2']
    ld = ['I85.0', 'I85.9', 'I86.4', 'I98.2', 'K70.4', 'K71.1', 'K72.1', 'K72.9', 'K76.5', 'K76.6', 'K76.7']
    htn = ['I10']
    htwc = ['I11', 'I11.0', 'I11.9', 'I12', 'I12.0', 'I12.9', 'I13', 'I13.0', 'I13.1', 'I13.2', 'I13.9', 'I15', 'I15.0', 'I15.1', 'I15.2', 'I15.8', 'I15.9']
    dx_cols = ['DX_0','DX_1','DX_2','DX_3','DX_4','DX_5','DX_6','DX_7','DX_8','DX_9','DX_10','DX_11','DX_12','DX_13','DX_14','DX_15','DX_16','DX_17','DX_18','DX_19','DX_20','DX_21','DX_22','DX_23','DX_24','DX_25','DX_26','DX_27','DX_28','DX_29','DX_30','DX_31','DX_32','DX_33','DX_34','DX_35','DX_36','DX_37','DX_38','DX_39','DX_40','DX_41','DX_42','DX_43','DX_44','DX_45','DX_46','DX_47','DX_48','DX_49','DX_50','DX_51','DX_52','DX_53','DX_54','DX_55','DX_56','DX_57','DX_58','DX_59','DX_60','DX_61','DX_62','DX_63','DX_64','DX_65','DX_66','DX_67','DX_68','DX_69','DX_70','DX_71','DX_72','DX_73','DX_74','DX_75','DX_76','DX_77','DX_78','DX_79','DX_80','DX_81','DX_82','DX_83','DX_84','DX_85','DX_86','DX_87','DX_88','DX_89','DX_90','DX_91','DX_92','DX_93','DX_94','DX_95','DX_96','DX_97','DX_98','DX_99']
    comorbid_icds = [copd, dm, dmwc, rd, ld, htn, htwc]

    ##############################################################################
    counter = 0
    
    for id_ in train_ids:
      train_X_comorbidities = []
      X = []
      Y = []
    
      df = train_data[all_vars][(train_data['MRN'] == id_)].dropna().astype(float)
      time_interval = (np.min(df['hrs'])+window, np.max(df['hrs'])-window)
      time_interval_list = list(range(int(time_interval[0]), int(time_interval[1]),window//2))

      if time_interval[1] > float(24*7*8):
          continue
      else:
          for i in range(len(time_interval_list)):
              try:
                pt_data = train_data[all_vars][(train_data['MRN'] == id_) & (train_data['hrs'].between(time_interval_list[i], time_interval_list[i]+window))]
                list_ = pt_data[all_vars]
                list_ = dfinterpolate(list_, maxlen=maxlen)
#                 list_ = scaler.transform(list_)[-1*maxlen:, :] commented out because you don't need to scale for RF
                list_ = np.array(list_)[-1*maxlen:, :]
                if len(demographics['intubation_label'][(demographics['MRN']==id_) & (demographics['intubation_label']==1)]) > 0:
                    admit_time = datetime.strptime(str(demographics['Admit_Date'][demographics['MRN']==id_].iloc[0]), '%Y-%m-%d %H:%M:%S.%f')
                    event_time = datetime.strptime(str(demographics['Intubation_Time'][demographics['MRN']==id_].iloc[0]),'%Y-%m-%d %H:%M:%S.%f') #%Y%m%d%H%M%S
                    try:
                        event_end_time = datetime.strptime(str(demographics['Extubation_Time'][demographics['MRN']==id_].iloc[0]),'%Y-%m-%d %H:%M:%S.%f') #%Y%m%d%H%M%S
                    except:
                        try:
                            event_end_time = datetime.strptime(str(demographics['Discharge_Date'][demographics['MRN']==id_].iloc[0]),'%Y-%m-%d %H:%M:%S.%f') #%Y%m%d%H%M%S
                        except:
                            event_end_time = datetime.now()
                    hrs_to_event = hour_delta(admit_time, event_time)
                    hrs_to_eventend = hour_delta(admit_time, event_end_time)

                    timetoevent = hrs_to_event - pt_data['hrs'].iloc[-1]
                    timetoeventend = hrs_to_eventend - pt_data['hrs'].iloc[-1]

                    if ((timetoevent < forecast_time) & (timetoeventend > forecast_time)):
                        Y_val = 1
                    else:
                        Y_val = 0
                else:
                    Y_val = 0
                train_X_comorbidities.append(comorbiditiesfromicd(comorbid_icds,dx_df[dx_cols][dx_df['MRN']==id_].iloc[0]))
                X.append(list_)
                Y.append(Y_val)

              except Exception as e:
                pass
            
      counter+=1
        
      try:
        train_X = np.array(X)
        train_Y = np.array(Y)
        train_X_comorbidities = np.array(train_X_comorbidities)
        
        pt_data_dict[id_] = (train_X, train_X_comorbidities, train_Y)
      except Exception as e:
        pass
    
      print('Processing pt {}/{}'.format(counter,len(train_ids)),end='\r')
        
    return pt_data_dict

In [None]:
timestr = '04_17_2020'
datadir = '/Users/varunarvind/Desktop/'

PATH_TO_DATA = os.path.join('/home/jkim/varun/data/',timestr,'holdout.csv')
PATH_TO_DEMOGRAPHICS = os.path.join('/home/jkim/varun/data/',timestr,timestr+'_demographics.csv')
PATH_TO_DIAGNOSES = os.path.join('/home/jkim/varun/data/',timestr,timestr+'_diagnoses.csv')

TIME_WINDOW = 24
FORECAST_WINDOW = 72
MAXLEN = 50
TRAIN = True

pt_data = create_dataset(
    maxlen = MAXLEN,
    window = TIME_WINDOW,
    forecast_time = FORECAST_WINDOW,
    data_path = PATH_TO_DATA,
    demographics_data_path = PATH_TO_DEMOGRAPHICS,
    diagnoses_data_path = PATH_TO_DIAGNOSES,
    train = TRAIN)

In [None]:
#Save data
import pickle
    
with open('/home/jkim/varun/data/holdout_ptdata.pickle', 'wb') as handle:
    pickle.dump(pt_data, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
#Load data
with open('/home/jkim/varun/data/holdout_ptdata.pickle', 'rb') as handle:
    pt_data = pickle.load(handle)
