In [1]:
# Main library: https://square.github.io/pysurvival/index.html
#----------------------------Reproducible------------------------------------------
import numpy as np
import random as rn
import pandas as pd
import os
import gc

seed=0
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
rn.seed(seed)

#----------------------------Reproducible------------------------------------------

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

from sklearn.model_selection import train_test_split
from pysurvival.models.semi_parametric import NonLinearCoxPHModel
from pysurvival.utils.metrics import concordance_index
from pysurvival.utils.display import integrated_brier_score
from pysurvival.utils.display import display_loss_values
from pysurvival.utils.display import compare_to_actual

from lifelines import KaplanMeierFitter
from matplotlib import pyplot as plt

import heapq
import seaborn as sns

In [2]:
def csv_read(p_path):
    pd_data=pd.read_csv(p_path, sep=',')
    column_names=pd_data.columns.values.tolist()
    print(column_names)
    return pd_data,column_names

def csv_read_arr(p_path):
    pd_data=pd.read_csv(p_path, sep=',')
    column_names=pd_data.columns.values.tolist()
    print(column_names)
    return pd_data,np.array(pd_data),column_names

def write_to_csv(p_data,p_path):
    dataframe = pd.DataFrame(p_data)
    dataframe.to_csv(p_path, mode='a',header=False,index=False,sep=',')
    del dataframe
    gc.collect()

# Step 1: Preprocess - Filtering

In [3]:
data,column_names=csv_read('./data/survival_data.csv')

['NACCID', 'NACCAVST', 'NACCFDYS', 'NACCREAS', 'NACCREFR', 'BIRTHMO', 'BIRTHYR', 'SEX', 'HISPANIC', 'HISPOR', 'HISPORX', 'RACE', 'RACEX', 'RACESEC', 'RACESECX', 'RACETER', 'RACETERX', 'PRIMLANG', 'PRIMLANX', 'EDUC', 'MARISTAT', 'NACCLIVS', 'INDEPEND', 'RESIDENC', 'HANDED', 'INBIRMO', 'INBIRYR', 'INSEX', 'NEWINF', 'INHISP', 'INHISPOR', 'INHISPOX', 'NACCNINR', 'INRACE', 'INRACEX', 'INRASEC', 'INRASECX', 'INRATER', 'INRATERX', 'INEDUC', 'INRELTO', 'INRELTOX', 'INKNOWN', 'INLIVWTH', 'INVISITS', 'INCALLS', 'INRELY', 'NACCFAM', 'NACCMOM', 'NACCDAD', 'NACCAM', 'NACCAMX', 'NACCAMS', 'NACCAMSX', 'NACCFM', 'NACCFMX', 'NACCFMS', 'NACCFMSX', 'NACCOM', 'NACCOMX', 'NACCOMS', 'NACCOMSX', 'NACCFADM', 'NACCFFTD', 'ANYMEDS', 'DRUG1', 'DRUG2', 'DRUG3', 'DRUG4', 'DRUG5', 'DRUG6', 'DRUG7', 'DRUG8', 'DRUG9', 'DRUG10', 'DRUG11', 'DRUG12', 'DRUG13', 'DRUG14', 'DRUG15', 'DRUG16', 'DRUG17', 'DRUG18', 'DRUG19', 'DRUG20', 'DRUG21', 'DRUG22', 'DRUG23', 'DRUG24', 'DRUG25', 'DRUG26', 'DRUG27', 'DRUG28', 'DRUG29', 'D

  """Entry point for launching an IPython kernel.


In [4]:
Del_indexes=['NACCID','HISPORX','RACEX','RACESECX','RACETERX','PRIMLANX','INHISPOX','INRACEX','INRASECX','INRATERX','INRELTOX','NACCAMX','NACCAMSX','NACCFMX','NACCFMSX','NACCOMX','NACCOMSX','CVOTHRX','NCOTHRX','ARTHTYPX','OTHSLEEX','ABUSX','PSYCDISX','CVDIMAGX','SPEECHX','FACEXPX', 'TRESTFAX', 'TRESTRHX', 'TRESTLHX', 'TRESTRFX', 'TRESTLFX', 'TRACTRHX', 'TRACTLHX', 'RIGDNEX',  'RIGDUPRX', 'RIGDUPLX', 'RIGDLORX', 'RIGDLOLX', 'TAPSRTX', 'TAPSLFX', 'HANDMVRX', 'HANDMVLX', 'HANDATRX', 'HANDATLX', 'LEGRTX','LEGLFX',  'ARISINGX', 'POSTUREX','GAITX', 'POSSTABX','BRADYKIX','NPIQINFX','OTHNEURX','COGOTHRX','NACCCGFX', 'COGMODEX', 'BEOTHRX', 'NACCBEFX', 'BEMODEX','MOMODEX','MMSELANX','NPSYLANX', 'MOCALANX','OTHBIOMX','OTHMUTX','FTLDSUBX','OTHPSYX','COGOTHX','COGOTH2X', 'COGOTH3X','ARTYPEX', 'SLEEPOTX', 'ANTIENCX', 'OTHCONDX','RESPOTHX','FTDLTFAS', 'FTDLIMB', 'FTDBULB', 'FTDGSEV', 'FTDGSEVX', 'FTDGTYP', 'FTDGTYPG', 'FTDGTYPX', 'FTDPPASL', 'FTDPPAPO', 'FTDPPAIW', 'FTDPPASW', 'FTDPPAPK', 'FTDPPAGS', 'FTDPPAEH', 'FTDPPACS', 'FTDPPASS', 'FTDPPASR', 'FTDPPASD', 'FTDCPPA', 'FTDCPPAS', 'FTDBVCLN', 'FTDBVDIS', 'FTDBVAPA', 'FTDBVLOS', 'FTDBVRIT', 'FTDBVHYP', 'FTDBVNEU', 'FTDBVIDL', 'FTDBVFT', 'FTDEMGPV', 'FTDEMGPY', 'FTDEMGMN', 'FTDPABVF', 'FTDWORRC', 'FTDWORRS', 'FTDWORRR', 'FTDWORIC', 'FTDWORIS', 'FTDWORIR', 'FTDWORIP', 'FTDSEMMT', 'FTDSEMAA', 'FTDSEMTA', 'FTDSEMSU', 'FTDANASW', 'FTDANAOW', 'FTDANATS', 'FTDSENAS', 'FTDSENOS', 'FTDSENSR', 'FTDSENPR', 'FTDNOUNC', 'FTDVERBC', 'FTDRATIO', 'FTDREAAS', 'FTDREAOS', 'FTDREASR', 'FTDREAPR', 'FTDCPC2F', 'FTDHAIRD', 'FTDSPIT', 'FTDNOSE', 'FTDCOAGE', 'FTDCRY', 'FTDCUT', 'FTDYTRIP', 'FTDEATP', 'FTDTELLA', 'FTDOPIN', 'FTDLAUGH', 'FTDSHIRT', 'FTDKEEPM', 'FTDPICKN', 'FTDOVER', 'FTDEATR', 'FTDHAIRL', 'FTDSHIRW', 'FTDMOVE', 'FTDHUGS', 'FTDLOUD', 'FTDLOST', 'FTDSNTOT', 'FTDSNTBS', 'FTDSNTOS', 'FTDSNRAT', 'FTDSELF', 'FTDBADLY', 'FTDDEPR', 'FTDEMOTD', 'FTDLSELF', 'FTDDISR', 'FTDBELCH', 'FTDGIGG', 'FTDPRIV', 'FTDNEGAT', 'FTDECOMM', 'FTDINAPJ', 'FTDFAILA', 'FTDRESIS', 'FTDINTER', 'FTDVERBA', 'FTDPHYSI', 'FTDTOPIC', 'FTDPROTO', 'FTDPREO', 'FTDFINI', 'FTDACTED', 'FTDABS', 'FTDFEEDB', 'FTDFRUST', 'FTDANXI', 'FTDNERVO', 'FTDNDIAG', 'FTDSTIMB', 'FTDSTIME', 'FTDOBJEC', 'FTDCIRCU', 'FTDPERSE', 'FTDREPEA', 'FTDANECD', 'FTDDINIT', 'FTDDELAY', 'FTDADDVE', 'FTDFLUCT', 'FTDLOSTT', 'FTDREPRU', 'FTDTRAIN', 'FTDDISCL', 'FTDSPONT', 'FTDSPONR', 'FTDSTOOD', 'FTDTOUCH', 'FTDDSOCI', 'FTDEXAGG', 'FTDSBTOT', 'FTDSBCTO', 'FTDLENGT', 'FTDCPC4F', 'FTDWORKU', 'FTDMIST', 'FTDCRIT', 'FTDWORR', 'FTDBAD', 'FTDPOOR', 'FTDFFEAR', 'FTDBIST', 'FTDCPC5F', 'FTDINSEX', 'FTDINFYR', 'FTDINFMO', 'FTDINFRE', 'FTDFEEL', 'FTDDIFF', 'FTDSORR', 'FTDSIDE', 'FTDADVAN', 'FTDIMAG', 'FTDMISF', 'FTDWASTE', 'FTDPITY', 'FTDQTOUC', 'FTDSIDES', 'FTDSOFTH', 'FTDUPSET', 'FTDCRITI', 'FTDIRIEC', 'FTDIRIPT', 'FTDCPC6F', 'FTDALTER', 'FTDEMOT', 'FTDACROS', 'FTDCONV', 'FTDINTUI', 'FTDJOKE', 'FTDIMAGP', 'FTDINAPP', 'FTDCHBEH', 'FTDADBEH', 'FTDLYING', 'FTDGOODF', 'FTDREGUL', 'FTDSMSCR', 'FTDSPSCR', 'FTDRSMST', 'FTDSMRI', 'FTDSMDY', 'FTDSMYR', 'FTDSMMO', 'FTDSMDIC', 'FTDSMDIS', 'FTDSMADN', 'FTDSMADV', 'FTDSMMAN', 'FTDSMMAO', 'FTDSMMAM', 'FTDSMFS', 'FTDSMFSO', 'FTDSMQU', 'FTDFDGPT', 'FTDFPYR', 'FTDFPMO', 'FTDFPDY', 'FTDFDDIC', 'FTDFDDID', 'FTDFDADN', 'FTDFDADV', 'FTDFDMAN', 'FTDFDMAO', 'FTDFDMAM', 'FTDFDQU', 'FTDAMYPT', 'FTDAMDY', 'FTDAMYR', 'FTDAMMO', 'FTDAMDIC', 'FTDAMDID', 'FTDAMLIG', 'FTDAMLIO', 'FTDAMADN', 'FTDAMADV', 'FTDAMMAN', 'FTDAMMAO', 'FTDAMMAM', 'FTDAMQU', 'FTDOTHER', 'FTDOTDOP', 'FTDOTSER', 'FTDOTCHO', 'FTDOTANO', 'FTDOTANS', 'FTDIDIAG', 'FTDSMRIO', 'FTDMRIFA', 'FTDMRIRF', 'FTDMRILF', 'FTDMRIRT', 'FTDMRILT', 'FTDMRIRM', 'FTDMRILM', 'FTDMRIRP', 'FTDMRILP', 'FTDMRIRB', 'FTDMRILB', 'FTDMRIOB', 'FTDMRIOS', 'FTDFDGPE', 'FTDFDGFH', 'FTDFDGRF', 'FTDFDGLF', 'FTDFDGRT', 'FTDFDGLT', 'FTDFDGRM', 'FTDFDGLM', 'FTDFDGRP', 'FTDFDGLP', 'FTDFDGRB', 'FTDFDGLB', 'FTDFDGOA', 'FTDFDGOS', 'FTDAMYP', 'FTDAMYVI', 'FTDAMYRF', 'FTDAMYLF', 'FTDAMYRT', 'FTDAMYLT', 'FTDAMYRM', 'FTDAMYLM', 'FTDAMYRP', 'FTDAMYLP', 'FTDAMYRB', 'FTDAMYLB', 'FTDAMYOA', 'FTDAMYOS', 'FTDCBFSP', 'FTDCBFVI', 'FTDCBFRF', 'FTDCBFLF', 'FTDCBFRT', 'FTDCBFLT', 'FTDCBFRM', 'FTDCBFLM', 'FTDCBFRP', 'FTDCBFLP', 'FTDCBFRB', 'FTDCBFLB', 'FTDCBFOA', 'FTDCBFOS', 'FTDOTHI', 'FTDOTHIS','NGDSWGAC', 'NGDSWEAC', 'NGDSGWAC', 'NGDSEXAC', 'NGDSEXOM','ADGCGWAS','NGDSGWAS','ADGCEXR','ADGCRND','NGDSWGS','NGDSWES','ADGCEXOM','NPSEX','NPFIXX','NPTANX', 'NPABANX', 'NPASANX', 'NPTDPANX','NPHISOX', 'NPPATHOX', 'NPFAUT1', 'NPFAUT2', 'NPFAUT3', 'NPFAUT4','NPNIT', 'NPCERAD', 'NPADRDA', 'NPOCRIT','NPOTH1X',  'NPOTH2X', 'NPOTH3X','LBSSALIV', 'LBSSWALL', 'LBSINSEX', 'LBSPRSEX', 'LBSWEIGH', 'LBSSMELL', 'LBSSWEAT', 'LBSTOLCD', 'LBSTOLHT', 'LBSDBVIS', 'LBSCONST', 'LBSHDSTL', 'LBSLSSTL', 'LBSUBLAD', 'LBSUSTRM', 'LBSUPASS', 'LBSDZSTU', 'LBSDZSTN', 'LBSFAINT', 'LBSPSYM', 'LBPSYAGE', 'LBSSUPSY', 'LBSSUPDI', 'LBSSUPHT', 'LBSSTNSY', 'LBSSTNDI', 'LBSSTNHT', 'LBSAGERM', 'LBSAGESM', 'LBSAGEGT', 'LBSAGEFL', 'LBSAGETR', 'LBSAGEBR', 'LBSSCLAU', 'LBSSCLVR', 'LBSSCLOT', 'LBSSCOR', 'LBUDSPCH', 'LBUDSALV', 'LBUDSWAL', 'LBUWRITE', 'LBUDFOOD', 'LBUDRESS', 'LBUDHYGN', 'LBUDTURN', 'LBUDFALL', 'LBUDFRZ', 'LBUDWALK', 'LBUDTREM', 'LBUDSENS', 'LBUMSPCH', 'LBUMSPCX', 'LBUMFACE', 'LBUMFACX', 'LBUMTRFA', 'LBUTRFAX', 'LBUMTRRH', 'LBUTRRHX', 'LBUMTRLH', 'LBUTRLHX', 'LBUMTRRF', 'LBUTRRFX', 'LBUMTRLF', 'LBUTRLFX', 'LBUMATRH', 'LBUATRHX', 'LBUMATLH', 'LBUATLHX', 'LBUMRGNK', 'LBURGNKX', 'LBUMRGRU', 'LBURGRUX', 'LBUMRGLU', 'LBURGLUX', 'LBUMRGRL', 'LBURGRLX', 'LBUMRGLL', 'LBURGLLX', 'LBUMFTRH', 'LBUFTRHX', 'LBUMFTLH', 'LBUFTLHX', 'LBUMHMRH', 'LBUHMRHX', 'LBUMHMLH', 'LBUHMLHX', 'LBUMPSRH', 'LBUPSRHX', 'LBUMPSLH', 'LBUPSLHX', 'LBUMLGRL', 'LBULGRLX', 'LBUMLGLL', 'LBULGLLX', 'LBUMRISE', 'LBUMRISX', 'LBUMPOST', 'LBUMPOSX', 'LBUMGAIT', 'LBUMGAIX', 'LBUPSTBL', 'LBUPSTBX', 'LBUMBRAD', 'LBUMBRAX', 'LBUMHNYR', 'LBUMHNYX', 'LBDELUS', 'LBDHURT', 'LBDSTEAL', 'LBDAFFR', 'LBDGUEST', 'LBDIMPOS', 'LBDHOME', 'LBDABAND', 'LBDPRES', 'LBDOTHER', 'LBDELFRQ', 'LBDELSEV', 'LBDELDST', 'LBHALL', 'LBHVOICE', 'LBHPEOPL', 'LBHNOTPR', 'LBHODOR', 'LBHFEEL', 'LBHTASTE', 'LBHOTSEN', 'LBHALFRQ', 'LBHALSEV', 'LBHALDST', 'LBANXIET', 'LBANEVNT', 'LBANRELX', 'LBANBRTH', 'LBANBUTT', 'LBANPLAC', 'LBANSEPR', 'LBANOTHR', 'LBANXFRQ', 'LBANXSEV', 'LBANXDST', 'LBAPATHY', 'LBAPSPNT', 'LBAPCONV', 'LBAPAFF', 'LBAPCHOR', 'LBAPINT', 'LBAPFAML', 'LBAPINTR', 'LBAPOTH', 'LBAPAFRQ', 'LBAPASEV', 'LBAPADST', 'LBDOPAM', 'LBDAGE', 'LBDDRUG1', 'LBDDOSE1', 'LBDAGE2', 'LBDDRUG2', 'LBDDOSE2', 'LBDELAGE', 'LBDELMED', 'LBDELMD1', 'LBDELMD2', 'LBHALAGE', 'LBHALMED', 'LBHALMD1', 'LBHALMD2', 'LBANXAGE', 'LBANXMED', 'LBANXMD1', 'LBANXMD2', 'LBAPAAGE', 'LBAPAMED', 'LBAPAMD1', 'LBAPAMD2', 'LBMLTHRG', 'LBMSLEEP', 'LBMDISRG', 'LBMSTARE', 'LBSPCGIM', 'LBSPDRM', 'LBSPYRS', 'LBSPMOS', 'LBSPINJS', 'LBSPINJP', 'LBSPCHAS', 'LBSPMOVE', 'LBSPLEGS', 'LBSPNERV', 'LBSPURGL', 'LBSPSENS', 'LBSPWORS', 'LBSPWALK', 'LBSPAWAK', 'LBSPBRTH', 'LBSPTRT', 'LBSPCRMP', 'LBSPALRT', 'LBSCLIV', 'LBSCSLP', 'LBSCBEHV', 'LBSCDRM', 'LBSCYRS', 'LBSCMOS', 'LBSCINJS', 'LBSCINJP', 'LBSCCHAS', 'LBSCMOVE', 'LBSCLEGS', 'LBSCNERV', 'LBSCSENS', 'LBSCWORS', 'LBSCWALK', 'LBSCAWAK', 'LBSCBRTH', 'LBSCTRT', 'LBSCCRMP', 'LBSCALRT','PACOGIMP', 'PANSFALL', 'PANSWKOF', 'PANSLYAW', 'PANSWKER', 'PANSLTTL', 'SCPARATE', 'PADSUNEX', 'PADSSITP', 'PADSWATV', 'PADSTALK', 'PADSAWDY', 'PADSFLDY', 'CONSFALL', 'CONSWKOF', 'CONSLYAW', 'CONSWKER', 'CONSLTTL', 'SCCORATE', 'CODSUNEX', 'CODSSITP', 'CODSWATV', 'CODSTALK', 'CODSAWDY', 'CODSFLDY', 'SCCOFRST', 'SCCOAGEN', 'SCCOAGED', 'SCCOCOMP', 'SCCOSCVR', 'SCCOOTH', 'SCCOSCOR', 'LBNSWORD', 'LBNSCOLR', 'LBNSCLWD', 'LBNPFACE', 'LBNPNOIS', 'LBNPTCOR', 'LBNPPARD', 'LBCDSCOG', 'LBCCMEM', 'LBCCLANG', 'LBCCATT', 'LBCCEXDE', 'LBCCVIS', 'LBCDSMOV', 'LBCMBRAD', 'LBCMRIGD', 'LBCMRTRM', 'LBCMPTRM', 'LBCMATRM', 'LBCMMYOC', 'LBCMGAIT', 'LBCMPINS', 'LBCDSBEV', 'LBCBDEP', 'LBCBAPA', 'LBCBANX', 'LBCBHALL', 'LBCBDEL', 'LBCDSAUT', 'LBCAREM', 'LBCAAPN', 'LBCALGSL', 'LBCARSLE', 'LBCADTSL', 'LBCACGFL', 'LBCAHYPT', 'LBCACONS', 'LBCAHYPS', 'LBCAFALL', 'LBCASYNC', 'LBCASNAP', 'LBCOGST', 'LBCOGDX', 'LBGLRRK2', 'LBGLRKIS', 'LBGPARK2', 'LBGPK2IS', 'LBGPARK7', 'LBGPK7IS', 'LBGPINK1', 'LBGPNKIS', 'LBGSNCA', 'LBGSNCIS', 'LBGGBA', 'LBGGBAIS', 'LBGOTHR', 'LBGOTHIS', 'LBGOTHX', 'LBISMRI', 'LBISMMO', 'LBISMDY', 'LBISMYR', 'LBISMQAV', 'LBISMHIP', 'LBISMAVL', 'LBISMDCM', 'LBISMFMT', 'LBISMADN', 'LBISMVER', 'LBISMMAN', 'LBISMOM', 'LBISMSTR', 'LBISMOS', 'LBIFPET', 'LBIFPYR', 'LBIFPMO', 'LBIFPDY', 'LBIFPQAV', 'LBIFPOCC', 'LBIFPTPP', 'LBIFPISL', 'LBIFPAVL', 'LBIFPDCM', 'LBIFPFMT', 'LBIFPADN', 'LBIFPVER', 'LBIFPMAN', 'LBIFPOM', 'LBIAPET', 'LBIAPYR', 'LBIAPMO', 'LBIAPDY', 'LBIAPQAV', 'LBIAPAVL', 'LBIAPDCM', 'LBIAPFMT', 'LBIAPLIG', 'LBIAPOL', 'LBIAPADN', 'LBIAPVER', 'LBIAPMAN', 'LBIAPOM', 'LBITPET', 'LBITPDY', 'LBITPYR', 'LBITPMO', 'LBITPQAV', 'LBITPAVL', 'LBITPDCM', 'LBITPFMT', 'LBITPLIG', 'LBITPOL', 'LBITPADN', 'LBITPVER', 'LBITPMAN', 'LBITPOM', 'LBIDATS', 'LBIDSYR', 'LBIDSMO', 'LBIDSDY', 'LBIDSQAV', 'LBIDSABN', 'LBOPOLYS', 'LBOPOSYR', 'LBOPOSMO', 'LBOPOSDY', 'LBOPOPOS', 'LBOPOAVL', 'LBOCMIBG', 'LBOCMYR', 'LBOCMMO', 'LBOCMDY', 'LBOCMPOS', 'LBOCMAVL', 'LBOANOS', 'LBOANYR', 'LBOANMO', 'LBOANDY', 'LBOANPOS', 'LBOANAVL', 'LBOANVER', 'LBOANOTH', 'LBOEEG', 'LBOEGYR', 'LBOEGMO', 'LBOEGDY', 'LBOEGPOS', 'LBOEGAVL', 'LBOMSLT', 'LBOMSYR', 'LBOMSMO', 'LBOMSDY', 'LBOMSPOS', 'LBOMSAVL', 'LBOTILT', 'LBOTLYR', 'LBOTLMO', 'LBOTLDY', 'LBOTLPOS', 'LBOTLAVL', 'LBOQSART', 'LBOQSYR', 'LBOQSMO', 'LBOQSDY', 'LBOQSPOS', 'LBOSGAVL', 'LBOTHERM', 'LBOTHDY', 'LBOTHYR', 'LBOTHMO', 'LBOTHPOS', 'LBOTHAVL', 'LBOCGAIT', 'LBOCGYR', 'LBOCGMO', 'LBOCGDY', 'LBOCGPOS', 'LBOCGAVL','NPVOTH', 'NPLEWYCS', 'NPGENE', 'NPFHSPEC', 'NPTAUHAP', 'NPPRNP', 'NPCHROM', 'NPPNORM', 'NPCNORM', 'NPPADP', 'NPCADP', 'NPPAD', 'NPCAD', 'NPPLEWY', 'NPCLEWY', 'NPPVASC', 'NPCVASC', 'NPPFTLD', 'NPCFTLD', 'NPPHIPP', 'NPCHIPP', 'NPPPRION', 'NPCPRION', 'NPPOTH1', 'NPCOTH1', 'NPPOTH2', 'NPCOTH2', 'NPPOTH3', 'NPCOTH3','OTHCOGX','CANCSITE','NACCFDYS', 'NACCAVST']#CDRGLOB

In [5]:
data_store=data.copy()
data_store_=data_store.drop(Del_indexes, axis=1)
data_store_.shape

data_store_title=data_store_.columns.values.tolist()
data_store_

Unnamed: 0,NACCREAS,NACCREFR,BIRTHMO,BIRTHYR,SEX,HISPANIC,HISPOR,RACE,RACESEC,RACETER,...,NPBNKB,NACCFORM,NACCPARA,NACCCSFP,NPBNKF,NPFAUT,NACCDAGE,NACCINT,SurvivalTime,Event
0,1,2,4,1926,2,0,88,1,88,88,...,0,1,1,1,0,0,89,43,1787,True
1,1,8,3,1925,2,0,88,1,3,88,...,1,0,1,0,0,0,93,10,3659,False
2,2,2,12,1939,2,0,88,1,88,88,...,1,1,1,0,0,0,75,37,2151,False
3,2,2,4,1954,1,0,88,1,88,88,...,1,1,1,0,0,0,63,32,0,False
4,1,2,3,1963,1,0,88,1,88,88,...,1,1,1,0,0,0,53,10,490,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
624,2,2,4,1927,2,0,88,1,88,88,...,0,1,1,0,0,0,88,60,644,True
625,1,2,12,1948,2,0,88,1,88,88,...,1,0,1,0,0,0,67,4,0,False
626,1,1,3,1945,1,0,88,1,88,88,...,1,1,1,1,0,9,72,23,1238,False
627,2,2,7,1940,1,0,88,1,88,88,...,0,1,1,0,0,0,76,9,939,True


In [6]:
output_path="./data/survival_data_filtered_1.csv"
title=np.array(data_store_title)
write_to_csv(title.reshape(1,title.shape[0]),output_path)

write_to_csv(data_store_,output_path)

In [7]:
NPPATHO_index=np.where(title=='NPPATHO')[0][0]

## Drug

In [8]:
str_DRUG='DRUG'
list_DRUG=[]
for i in np.arange(1,41,1):
    list_DRUG.append(str_DRUG+str(i))

#----------------------------------------------------------------
DRUG_index_list=[]
for list_DRUG_i in list_DRUG:
    DRUG_index_list.append(np.where(title==list_DRUG_i)[0][0])
    
#----------------------------------------------------------------
data_store_arr=np.array(data_store_)

DRUG_data=data_store_arr[:,np.array(DRUG_index_list)]
DRUG_data_fill=np.array(pd.DataFrame(DRUG_data).fillna("-1"))

#----------------------------------------------------------------
Dict_DRUG_data_fill={}
DRUG_data_fill_unique=np.unique(DRUG_data_fill)

DRUG_data_fill_unique_drop_neg1_index=np.where(DRUG_data_fill_unique!='-1')[0]
Dict_DRUG_data_fill["-1"]=-1

DRUG_data_fill_unique_drop_neg1=DRUG_data_fill_unique[DRUG_data_fill_unique_drop_neg1_index]

for DRUG_data_fill_index_i in np.arange(len(DRUG_data_fill_unique_drop_neg1)):
    DRUG_data_fill_i=DRUG_data_fill_unique_drop_neg1[DRUG_data_fill_index_i]
    Dict_DRUG_data_fill[DRUG_data_fill_i]=DRUG_data_fill_index_i
    
for i in np.arange(DRUG_data_fill.shape[0]):
    for j in np.arange(DRUG_data_fill.shape[1]):
        DRUG_data_fill_i_j=DRUG_data_fill[i,j]
        data_store_arr[i,DRUG_index_list[0]+j]=Dict_DRUG_data_fill[DRUG_data_fill_i_j]

## NACCWRI

In [9]:
list_NACCWRI=['NACCWRI1','NACCWRI2','NACCWRI3']

#----------------------------------------------------------------
NACCWRI_index_list=[]
for list_NACCWRI_i in list_NACCWRI:
    NACCWRI_index_list.append(np.where(title==list_NACCWRI_i)[0][0])
    
#----------------------------------------------------------------
NACCWRI_data=data_store_arr[:,np.array(NACCWRI_index_list)]
NACCWRI_data_fill=np.array(pd.DataFrame(NACCWRI_data).fillna("-1"))

#----------------------------------------------------------------
Dict_NACCWRI_data_fill={}
NACCWRI_data_fill_unique=np.unique(NACCWRI_data_fill)

NACCWRI_data_fill_unique_drop_neg1_index=np.where(NACCWRI_data_fill_unique!='-1')[0]
Dict_NACCWRI_data_fill["-1"]=-1

NACCWRI_data_fill_unique_drop_neg1=NACCWRI_data_fill_unique[NACCWRI_data_fill_unique_drop_neg1_index]

for NACCWRI_data_fill_index_i in np.arange(len(NACCWRI_data_fill_unique_drop_neg1)):
    NACCWRI_data_fill_i=NACCWRI_data_fill_unique_drop_neg1[NACCWRI_data_fill_index_i]
    Dict_NACCWRI_data_fill[NACCWRI_data_fill_i]=NACCWRI_data_fill_index_i
    
for i in np.arange(NACCWRI_data_fill.shape[0]):
    for j in np.arange(NACCWRI_data_fill.shape[1]):
        NACCWRI_data_fill_i_j=NACCWRI_data_fill[i,j]
        data_store_arr[i,NACCWRI_index_list[0]+j]=Dict_NACCWRI_data_fill[NACCWRI_data_fill_i_j]
        
data_store_arr[:,NPPATHO_index]=np.array(pd.DataFrame(data_store_arr[:,NPPATHO_index]).fillna(-1))[:,0]

In [10]:
output_path="./data/survival_data_filtered_2.csv"
title=np.array(data_store_title)
write_to_csv(title.reshape(1,title.shape[0]),output_path)

write_to_csv(data_store_arr,output_path)

# Step 2: Preprocess - drop "Not available" and "missed"

In [11]:
data,data_arr,column_names=csv_read_arr('./data/survival_data_filtered_2.csv')

['NACCREAS', 'NACCREFR', 'BIRTHMO', 'BIRTHYR', 'SEX', 'HISPANIC', 'HISPOR', 'RACE', 'RACESEC', 'RACETER', 'PRIMLANG', 'EDUC', 'MARISTAT', 'NACCLIVS', 'INDEPEND', 'RESIDENC', 'HANDED', 'INBIRMO', 'INBIRYR', 'INSEX', 'NEWINF', 'INHISP', 'INHISPOR', 'NACCNINR', 'INRACE', 'INRASEC', 'INRATER', 'INEDUC', 'INRELTO', 'INKNOWN', 'INLIVWTH', 'INVISITS', 'INCALLS', 'INRELY', 'NACCFAM', 'NACCMOM', 'NACCDAD', 'NACCAM', 'NACCAMS', 'NACCFM', 'NACCFMS', 'NACCOM', 'NACCOMS', 'NACCFADM', 'NACCFFTD', 'ANYMEDS', 'DRUG1', 'DRUG2', 'DRUG3', 'DRUG4', 'DRUG5', 'DRUG6', 'DRUG7', 'DRUG8', 'DRUG9', 'DRUG10', 'DRUG11', 'DRUG12', 'DRUG13', 'DRUG14', 'DRUG15', 'DRUG16', 'DRUG17', 'DRUG18', 'DRUG19', 'DRUG20', 'DRUG21', 'DRUG22', 'DRUG23', 'DRUG24', 'DRUG25', 'DRUG26', 'DRUG27', 'DRUG28', 'DRUG29', 'DRUG30', 'DRUG31', 'DRUG32', 'DRUG33', 'DRUG34', 'DRUG35', 'DRUG36', 'DRUG37', 'DRUG38', 'DRUG39', 'DRUG40', 'TOBAC30', 'TOBAC100', 'SMOKYRS', 'PACKSPER', 'QUITSMOK', 'ALCOCCAS', 'ALCFREQ', 'CVHATT', 'HATTMULT', 'HATTYE

In [12]:
column_names_del=[]
for col in np.arange(data_arr.shape[1]):
    if np.sum(data_arr[:,col]<0)>0: # drop "Not available" and "missed"    
        column_names_del.append(column_names[col])
        
data_preprocess=data.drop(columns=column_names_del)

data_preprocess_arr=np.array(data_preprocess)
data_store_title=data_preprocess.columns.values.tolist()

CDRGLOB_index=np.where(np.array(data_store_title)=='CDRGLOB')[0][0]
Event_index=np.where(np.array(data_store_title)=='Event')[0][0]

del_non_change_status=np.where((data_preprocess_arr[:,CDRGLOB_index]!=2) | (data_preprocess_arr[:,Event_index]!=False))[0]

data_preprocess_drop=data_preprocess.drop('CDRGLOB', axis=1)
data_store_title_drop=data_preprocess_drop.columns.values.tolist()

data_preprocess_drop_arr=np.array(data_preprocess_drop)[del_non_change_status]

In [13]:
output_path="./data/survival_data_filtered_3.csv"
title=np.array(data_store_title_drop)
write_to_csv(title.reshape(1,title.shape[0]),output_path)

write_to_csv(data_preprocess_drop_arr,output_path)

In [14]:
title.shape

(245,)

In [15]:
data_preprocess_drop_arr.shape

(508, 245)

# Step 3: Calculation

In [16]:
data,column_names=csv_read('./data/survival_data_filtered_3.csv')

Events=np.array(data.loc[:,'Event'])
print("%.2f%% samples are right censored in whole data." % (np.sum(~Events) * 100. / len(Events)))

['NACCREAS', 'NACCREFR', 'BIRTHMO', 'BIRTHYR', 'SEX', 'HISPANIC', 'HISPOR', 'RACE', 'RACESEC', 'RACETER', 'PRIMLANG', 'EDUC', 'MARISTAT', 'NACCLIVS', 'INDEPEND', 'RESIDENC', 'HANDED', 'NACCFADM', 'NACCFFTD', 'MEMORY', 'ORIENT', 'JUDGMENT', 'COMMUN', 'HOMEHOBB', 'PERSCARE', 'CDRSUM', 'DECSUB', 'DECIN', 'COGMEM', 'COGJUDG', 'COGLANG', 'COGVIS', 'COGATTN', 'COGOTHR', 'NACCCOGF', 'COGMODE', 'DECAGE', 'BEAPATHY', 'BEDEP', 'BEVHALL', 'BEAHALL', 'BEDEL', 'BEDISIN', 'BEIRRIT', 'BEAGIT', 'BEPERCH', 'BEOTHR', 'NACCBEHF', 'BEMODE', 'MOGAIT', 'MOFALLS', 'MOTREM', 'MOSLOW', 'NACCMOTF', 'MOMODE', 'COURSE', 'FRSTCHG', 'NORMCOG', 'DEMENTED', 'NACCPPA', 'NACCBVFT', 'NACCLBDS', 'NACCTMCI', 'NACCMCIL', 'NACCMCIA', 'NACCMCIE', 'NACCMCIV', 'NACCMCII', 'IMPNOMCI', 'NACCALZD', 'NACCALZP', 'NACCLBDE', 'NACCLBDP', 'PARK', 'PSP', 'PSPIF', 'CORT', 'CORTIF', 'DOWNS', 'DOWNSIF', 'HUNT', 'HUNTIF', 'PRION', 'PRIONIF', 'BRNINJ', 'BRNINJIF', 'HYCEPH', 'HYCEPHIF', 'NEOP', 'NEOPIF', 'DEP', 'DEPIF', 'OTHPSY', 'OTHPSYIF',

In [17]:
X=data.loc[:,'NACCREAS':'NACCINT']
y_=data.loc[:,'SurvivalTime':'Event']
y=np.array([(y__[1],y__[0]) for y__ in np.array(y_)], dtype=[('Event', 'bool'), ('SurvivalTime', '<f8')])

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2, random_state=0)

time_train=y_train['SurvivalTime']
event_train=y_train['Event']
time_test=y_test['SurvivalTime']
event_test=y_test['Event']

# Building the model
structure = [ {'activation': 'BentIdentity', 'num_units': 150},  ]
nonlinear_coxph = NonLinearCoxPHModel(structure=structure)
nonlinear_coxph.fit(X_train, time_train, event_train, num_epochs = 2000,lr=1e-3, init_method='glorot_uniform')

# Model Performances
c_index = concordance_index(nonlinear_coxph, X_test, time_test, event_test) #0.81
print('C-index: {:.2f}'.format(c_index))

ibs = integrated_brier_score(nonlinear_coxph, X_test, time_test, event_test, t_max=None, figure_size=(20, 6.5) )
print('IBS: {:.2f}'.format(ibs))

% Completion:   6%|**                                             |Loss: 727.65

KeyboardInterrupt: 

In [None]:
display_loss_values(nonlinear_coxph)

In [None]:
max_loss=int(np.max(nonlinear_coxph.loss_values))+1
step=int(max_loss/5)

plt.figure(figsize=(12,6))
plt.plot(nonlinear_coxph.loss_values)
plt.xlabel('Epoch',fontsize=18)
plt.ylabel('Loss',fontsize=18)
plt.xticks(fontsize=18)
plt.yticks(np.arange(0,max_loss,step),fontsize=18)
plt.show()

In [None]:
results = compare_to_actual(nonlinear_coxph, X_test, time_test, event_test,
                            is_at_risk = False,  figure_size=(16, 6),
                            metrics = ['rmse', 'mean', 'median'])

In [None]:
kmf = KaplanMeierFitter()
fig, ax = plt.subplots(figsize=(12, 6))

# Randomly extracting a data-point that experienced an event 
choices = np.argwhere((event_test==1.)&(time_test>=1)).flatten()
k = np.random.choice( choices, 1)[0]

# Saving the time of event
t = time_test[k]

# Computing the Survival function for all times t
predicted = nonlinear_coxph.predict_survival(X_test.values[k, :]).flatten()

# Displaying the functions
plt.plot(nonlinear_coxph.times, predicted, color='blue', label='Predicted', lw=2)

# Actual time
plt.axvline(x=t, color='black', ls ='--')
ax.annotate('T={:.1f}'.format(t), xy=(t, 0.5), xytext=(t, 0.5), fontsize=18)

kmf.fit(time_test, event_observed=event_test)
ax = kmf.plot_survival_function(ax=ax)

# Show everything
title = "Comparing Survival functions between KM_estimate and Predicted"
plt.legend(fontsize=18)
plt.title(title, fontsize=18)
plt.ylim(0, 1.05)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.show()

In [None]:
cs=['blue','darkseagreen','red','green','olive','cyan','tan','silver','purple','fuchsia','chocolate','darkcyan','aqua','pink','orange','bisque','lightsteelblue']

fig, ax = plt.subplots(figsize=(12, 6))

# Randomly extracting a data-point that experienced an event 
choices = np.argwhere((event_test==1.)&(time_test>=1)).flatten()
k_list = np.random.choice( choices, 5,replace=False)

# Saving the time of event
for i in np.arange(len(k_list)):
    k=k_list[i]
    t = time_test[k]

    # Computing the Survival function for all times t
    predicted = nonlinear_coxph.predict_survival(X_test.values[k, :]).flatten()

    # Displaying the functions
    plt.plot(nonlinear_coxph.times, predicted, color=cs[i], label='Patient '+str(k), lw=2)
    
    # Actual time
    plt.axvline(x=t, color=cs[i], ls ='--')
    ax.annotate('T={:.1f}'.format(t), xy=(t, 0.5), xytext=(t, (0.1+i/10)), fontsize=18,color=cs[i])

plt.legend(loc=[1.02,0.58],fontsize=18, ncol=1)
plt.ylim(0, 1.05)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))

# Randomly extracting a data-point that experienced an event 
choices = np.argwhere((event_test==1.)&(time_test>=1)).flatten()
k_list_ = k_list[0:3]

# Saving the time of event
for i in np.arange(len(k_list_)):
    k=k_list_[i]
    t = time_test[k]

    # Computing the Survival function for all times t
    predicted = nonlinear_coxph.predict_survival(X_test.values[k, :]).flatten()

    # Displaying the functions
    plt.plot(nonlinear_coxph.times, predicted, color=cs[i], label='Patient '+str(k), lw=2)
    
    # Actual time
    plt.axvline(x=t, color=cs[i], ls ='--')
    ax.annotate('T={:.1f}'.format(t), xy=(t, 0.5), xytext=(t, (0.1+i/10)), fontsize=18,color=cs[i])

plt.legend(loc=[0.1,1.05],fontsize=18, ncol=3)
plt.ylim(0, 1.05)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.show()

In [None]:
nonlinear_coxph.model

In [None]:
first_layer_weight=np.array(np.transpose(nonlinear_coxph.model.state_dict()['model.0.weight']))
feature_importace=np.linalg.norm(first_layer_weight,1, axis=1)
feature_importace

In [None]:
column_names

In [None]:
k=20

top_k=heapq.nlargest(k, range(len(feature_importace)), feature_importace.take)
top_k

In [None]:
feature_importace[top_k]

In [None]:
np.array(column_names[:-2])[top_k]

In [None]:
mn, mx = first_layer_weight.min(), first_layer_weight.max()

fig, ax = plt.subplots(figsize = (8,6))
heatmp=sns.heatmap(first_layer_weight, cmap='PiYG', vmin=-1, vmax=1)
cbar = heatmp.collections[0].colorbar
# here set the labelsize by 18
cbar.ax.tick_params(labelsize=18)
plt.xlabel('')
plt.xticks([])
plt.yticks(np.arange(0,np.array(column_names[:-2]).shape[0],20),np.arange(0,np.array(column_names[:-2]).shape[0],20),fontsize=18)
plt.ylabel('Feature index',fontsize=18)
plt.show()

In [None]:
mn, mx = first_layer_weight.min(), first_layer_weight.max()
first_layer_weight_scaled = (first_layer_weight - mn) / (mx - mn)

fig, ax = plt.subplots(figsize = (8,6))
heatmp=sns.heatmap(first_layer_weight_scaled, cmap='PiYG', vmin=0, vmax=1)
cbar = heatmp.collections[0].colorbar
# here set the labelsize by 18
cbar.ax.tick_params(labelsize=18)
plt.xlabel('')
plt.xticks([])
plt.yticks(np.arange(0,np.array(column_names[:-2]).shape[0],20),np.arange(0,np.array(column_names[:-2]).shape[0],20),fontsize=18)
plt.ylabel('Feature index',fontsize=18)
plt.show()