In [1]:
from process import PHOTOMICS
import pandas as pd
from keras.optimizers import SGD,Adam,RMSprop
import tensorflow as tf
from numpy.random import seed
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import load_model

import nnet_survival
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


In [2]:
ALGO = 'tsne'
OMICS = 'meth'
PH = 'non-PH'
obj = PHOTOMICS(OMICS, PH=False, clinical=False)

In [3]:
clinical = pd.read_csv('data/clinical_data_subsets/clinical_data.csv')
dataset_mrna, dataset_meth, dataset_mirna, training_list = obj.input_process1(ALGO+'_training_data_mrna', ALGO+'_training_data_meth', ALGO+'_training_data_mirna')
print(len(dataset_meth),'|', len(dataset_mirna),'|', len(dataset_mrna))
t, f, sample, age, breaks, n_intervals, y_train_array, indices, rand_range = obj.input_process2(training_list, clinical)
clinical_feat, train_id_clinical = obj.process3(clinical, training_list)

Data processing-I...
100 images to array
200 images to array
300 images to array
400 images to array
  0%|          | 2/458 [00:00<00:25, 17.54it/s]All meth images done!
458 | 458 | 458
Data processing-II...
100%|██████████| 458/458 [00:24<00:00, 18.66it/s]
100%|██████████| 458/458 [00:00<00:00, 1196134.02it/s]Done!

Processing clinical features
Features Processed



In [4]:
len(dataset_mrna), len(dataset_meth), len(dataset_mirna), len(clinical_feat)

(458, 458, 458, 458)

In [5]:
results = pd.DataFrame({'Conc': [], 'Brier': [], 'p_value': [], 'ConcVal': [], 'BrierVal': [], 'PVAlueVal': [], 'ConcBm': [], 'BrierBm': [], 'p_valueBm': [], 'ConcValBm': [], 'BrierValBm': [], 'PVAlueVal_Bm': []})

for random in range(20):
    seed(123)
    tf.random.set_random_seed(123)

    #Parameters for model
    indices = range(len(f))
    #random=3
    split_ratio = 0.2
    batch_size = 8
    sgd  = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True)
    early_stopping = EarlyStopping(monitor='val_loss', patience=30)
    filepath='checkpoints/'+PH+'/'+ALGO+'_'+OMICS+'/two_dense_weights-improvement-' + str(random) + '.hdf5'
    model_checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

    #Initilize and compile model
    obj.start_sess()
    cox=obj.architecture(n_intervals)
    #cox.summary()
    cox.compile(loss=nnet_survival.surv_likelihood(n_intervals), optimizer=sgd)

    #Test-train split
    X_train_mrna, X_test_mrna, y_train, y_test, ind_train_1, ind_test_1 = train_test_split(dataset_mrna, y_train_array,indices, test_size=split_ratio, random_state=random)
    X_train_meth, X_test_meth, y_train, y_test, ind_train_2, ind_test_2 = train_test_split(dataset_meth, y_train_array,indices, test_size=split_ratio, random_state=random)
    X_train_mirna, X_test_mirna, y_train, y_test, ind_train_2, ind_test_2 = train_test_split(dataset_mirna, y_train_array,indices, test_size=split_ratio, random_state=random)
    clinical_train, clinical_test, placeholder_train, placeholder_test, ind_train, ind_test = train_test_split(clinical_feat, y_train_array, indices, test_size=split_ratio, random_state=random)
    T_train, T_test, F_train, F_test, TF_ind_train, TF_ind_test = train_test_split(t, f,indices, test_size=split_ratio, random_state=random)

    train_omics_data = [X_train_meth]
    test_omics_data = [X_test_meth]

    history=cox.fit(train_omics_data, y_train, batch_size=batch_size, epochs=500, verbose=1, validation_data=(test_omics_data,y_test), callbacks=[early_stopping,model_checkpoint])

    #Load saved best model
    if PH=="PH":
        cox_bm = load_model('checkpoints/'+PH+'/'+ALGO+'_'+OMICS+'/two_dense_weights-improvement-'+str(random)+'.hdf5', custom_objects={'PropHazards': nnet_survival.PropHazards(n_intervals), 'loss': nnet_survival.surv_likelihood(n_intervals)})
    elif PH=="non-PH":
        cox_bm = load_model('checkpoints/'+PH+'/'+ALGO+'_'+OMICS+'/two_dense_weights-improvement-'+str(random)+'.hdf5', custom_objects={'loss': nnet_survival.surv_likelihood(n_intervals)})


    #Generate training and testing results for last saved and best model
    y_pred, y_pred_val = obj.train_val_results(cox, train_omics_data, test_omics_data,batch_size)
    y_pred_bm, y_pred_val_bm = obj.train_val_results(cox_bm, train_omics_data, test_omics_data,batch_size)

    #Calculate surv prob and medians for last saved and best model
    #Function surv_prob takes three arguments [training prediction(pred_y), validation prediction (y_pred_val), time(t) in years ]
    one_year_survival_prob, one_year_survival_prob_val, one_yr_median, one_yr_median_val = obj.surv_prob(y_pred, y_pred_val,breaks, 1)
    five_year_survival_prob, five_year_survival_prob_val, five_yr_median, five_yr_median_val = obj.surv_prob(y_pred, y_pred_val, breaks, 5)

    one_year_survival_prob_bm, one_year_survival_prob_val_bm, one_yr_median_bm, one_yr_median_val_bm = obj.surv_prob(y_pred_bm, y_pred_val_bm, breaks, 1)
    five_year_survival_prob_bm, five_year_survival_prob_val_bm, five_yr_median_bm, five_yr_median_val_bm = obj.surv_prob(y_pred_bm, y_pred_val_bm, breaks, 5)

    #Calculate concordance index and brier scores for last saved and best model
    five_yr_train_concordance, five_yr_train_brier, five_yr_p_value = obj.metrices(T_train, five_year_survival_prob, F_train, y_train, 5, 'train', five_yr_median, breaks)
    five_yr_val_concordance, five_yr_val_brier, five_yr_p_value_val = obj.metrices(T_test, five_year_survival_prob_val, F_test, y_test, 5, 'test', five_yr_median_val, breaks)
    five_yr_train_concordance_bm, five_yr_train_brier_bm, five_yr_p_value_bm = obj.metrices(T_train, five_year_survival_prob_bm, F_train, y_train, 5, 'train', five_yr_median_bm, breaks)
    five_yr_val_concordance_bm, five_yr_val_brier_bm, five_yr_p_value_val_bm = obj.metrices(T_test, five_year_survival_prob_val_bm, F_test, y_test, 5, 'test', five_yr_median_val_bm, breaks)

    five_yr_ipcw = obj.ipcw(F_train, F_test, T_train, T_test, five_year_survival_prob_val)
    five_yr_ipcw_bm = obj.ipcw(F_train, F_test, T_train, T_test, five_year_survival_prob_val_bm)

    df = {'Conc': five_yr_train_concordance,'Brier':five_yr_train_brier,'p_value':five_yr_p_value, 'ConcVal': five_yr_val_concordance,'BrierVal':five_yr_val_brier, 'PVAlueVal':five_yr_p_value_val, 'ipcw':five_yr_ipcw, 'ConcBm': five_yr_train_concordance_bm,'BrierBm':five_yr_train_brier_bm,'p_valueBm':five_yr_p_value_bm, 'ConcValBm': five_yr_val_concordance_bm,'BrierValBm':five_yr_val_brier_bm, 'PVAlueVal_Bm':five_yr_p_value_val_bm, 'ipcwBm':five_yr_ipcw_bm}

    results = results.append(df, ignore_index=True)
    results.to_csv(ALGO+'_models/'+PH+'/'+OMICS+'/res_' + str(random) + '.csv')
    obj.reset_keras()
results.to_csv(ALGO+'_models/'+PH+'/'+OMICS+'/res_total.csv')

97 - val_loss: 1.5499

Epoch 00025: val_loss did not improve from 1.40633
Epoch 26/500

Epoch 00026: val_loss did not improve from 1.40633
Epoch 27/500

Epoch 00027: val_loss did not improve from 1.40633
Epoch 28/500

Epoch 00028: val_loss did not improve from 1.40633
Epoch 29/500

Epoch 00029: val_loss did not improve from 1.40633
Epoch 30/500

Epoch 00030: val_loss did not improve from 1.40633
Epoch 31/500

Epoch 00031: val_loss did not improve from 1.40633
Epoch 32/500

Epoch 00032: val_loss did not improve from 1.40633
Epoch 33/500

Epoch 00033: val_loss did not improve from 1.40633
Epoch 34/500

Epoch 00034: val_loss did not improve from 1.40633
Epoch 35/500

Epoch 00035: val_loss did not improve from 1.40633
Epoch 36/500

Epoch 00036: val_loss did not improve from 1.40633
Epoch 37/500

Epoch 00037: val_loss did not improve from 1.40633
Epoch 38/500

Epoch 00038: val_loss did not improve from 1.40633
Epoch 39/500

Epoch 00039: val_loss did not improve from 1.40633
Epoch 40/500

Ep

In [None]:
struct_train = np.zeros(len(F_train), dtype={'names':('F_train', 'T_train'),'formats':('?','i4')})
struct_test = np.zeros(len(F_test), dtype={'names':('F_test', 'T_test'),'formats':('?','i4')})
struct_train['F_train'] = F_train.astype('bool')
struct_train['T_train'] = T_train
struct_test['F_test'] = F_test.astype('bool')
struct_test['T_test'] = T_test

c_ipcw = '%.5g'%(1-concordance_index_ipcw(struct_train, struct_test, five_year_survival_prob_val)[0])


In [6]:
cox.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 122, 122, 1)  0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 122, 122, 1)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 120, 120, 256 2560        input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_3 (Conv2D)               (None, 120, 120, 256 2560        input_2[0][0]                    
____________________________________________________________________________________________

In [None]:
    # #One-year
    # one_yr_train_concordance, one_yr_train_brier, one_yr_p_value = obj.metrices(T_train, one_year_survival_prob, F_train, y_train, 1, 'train', one_yr_median)
    # one_yr_val_concordance, one_yr_val_brier, one_yr_p_value_val = obj.metrices(T_test, one_year_survival_prob_val, F_test, y_test, 1, 'test', one_yr_median_val)
    # one_yr_train_concordance_bm, one_yr_train_brier_bm, one_yr_p_value_bm = obj.metrices(T_train, one_year_survival_prob_bm, F_train, y_train, 1, 'train', one_yr_median_bm) 
    # one_yr_val_concordance_bm, one_yr_val_brier_bm, one_yr_p_value_val_bm = obj.metrices(T_test, one_year_survival_prob_val_bm, F_test, y_test, 1, 'test', one_yr_median_val_bm)
    #Five-year