In [1]:
import os
import random as rn
import sys

import pandas as pd
from numpy.random import seed
from tensorflow import set_random_seed

from TL4HDR.data.preProcess import get_one_race, get_n_years, normalize_dataset, \
    standarize_dataset, get_dataset
from TL4HDR.examples.classify_util import run_mixture_cv, run_one_race_cv, \
    run_unsupervised_transfer_cv, run_supervised_transfer_cv, run_CCSA_transfer

Using TensorFlow backend.


In [2]:
seed(11111)
set_random_seed(11111)
os.environ['PYTHONHASHSEED'] = '0'
os.environ["KERAS_BACKEND"] = "tensorflow"
rn.seed(11111)

In [3]:
pd.set_option('display.width', 1000)
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

In [4]:
def run_cv(cancer_type, feature_type, target, years=3, groups=("WHITE", "BLACK")):
    print (cancer_type, feature_type, target, years)
    # dataset = get_dataset_integ(cancer_type=cancer_type, feature_type=feature_type, target=target, groups=groups)
    dataset = get_dataset(cancer_type=cancer_type, feature_type=feature_type, target=target, groups=groups)
    if dataset['X'].shape[0] < 10: return None
    dataset = standarize_dataset(dataset)
    dataset_w = get_one_race(dataset, 'WHITE')
    if dataset_w['X'].shape[0] < 5: return None
    dataset_w = get_n_years(dataset_w, years)
    dataset_b = get_one_race(dataset, 'BLACK')
    if dataset_b['X'].shape[0] < 5: return None
    dataset_b = get_n_years(dataset_b, years)

    dataset_tl = normalize_dataset(dataset)
    dataset_tl = get_n_years(dataset_tl, years)

    dataset = get_n_years(dataset, years)
    k = 200 if 'mRNA' in feature_type or 'methylation' in feature_type else -1

    # print(numpy.count_nonzero(numpy.isnan(dataset['X'])))
    X, Y, R, y_sub, y_strat = dataset
    df = pd.DataFrame(y_strat, columns=['RY'])
    df['R'] = R
    df['Y'] = Y
    print(X.shape)
    Dict = df['RY'].value_counts()
    print (Dict)
    if len(Dict) < 4: return None
    Dict = dict(Dict)
    print (Dict)
    for key in Dict:
        print (key, Dict[key])
        if Dict[key] < 5:
            return None

    parametrs_mix = {'fold': 3, 'k': k, 'val_size':0.0, 'batch_size':20,'momentum':0.9,
                     'learning_rate':0.01, 'lr_decay':0.03, 'dropout':0.5,
                     'L1_reg': 0.001, 'L2_reg': 0.001, 'hidden_layers': [128, 64]}
    parametrs_w = {'fold': 3, 'k': k, 'val_size':0.0, 'batch_size':20,
                     'learning_rate':0.01, 'lr_decay':0.0, 'dropout':0.5,
                     'L1_reg': 0.001, 'L2_reg': 0.001, 'hidden_layers': [128, 64]}
    parametrs_b = {'fold': 3, 'k': k, 'val_size':0.0, 'batch_size':4,
                     'learning_rate':0.01, 'lr_decay':0.0, 'dropout':0.5,
                     'L1_reg': 0.001, 'L2_reg': 0.001, 'hidden_layers': [128, 64]}

    parametrs_tl = {'fold': 3, 'k': k, 'val_size':0.0, 'batch_size':20, 'tune_epoch':500,
                     'learning_rate':0.01, 'lr_decay':0.03, 'dropout':0.5, 'tune_lr':0.002,
                     'L1_reg': 0.001, 'L2_reg': 0.001, 'hidden_layers': [128, 64], 'tune_batch':10}

    parametrs_tl_unsupervised = {'fold': 3, 'k': k, 'val_size':0.0, 'batch_size':20,
                     'learning_rate':0.001, 'lr_decay':0.03, 'dropout':0.0, 'n_epochs':100,
                     'L1_reg': 0.001, 'L2_reg': 0.001, 'hidden_layers': [100]}

    # parametrs_tl_sa = {'fold': 3, 'k': k, 'val_size':0.0, 'batch_size':20,
    #                  'learning_rate':0.005, 'lr_decay':0.0, 'dropout':0.5,
    #                  'L1_reg': 0.001, 'L2_reg': 0.001, 'hidden_layers': [128, 64]}


    parameters_CCSA = {'fold': 3, 'n_features': k, 'alpha':0.3, 'batch_size':32, 'learning_rate':0.01,
                       'hidden_layers': [100], 'dr':0.0, 'momentum':0.0,
                       'decay':0.0, 'sample_per_class':2}

    print("Begin classifier training")
    
    seed = 0
    df_m, mixture_classifiers = run_mixture_cv(seed, dataset, **parametrs_mix)
    df_w, w_classifiers = run_one_race_cv(seed, dataset_w, **parametrs_w)
    df_w = df_w.rename(columns={"Auc": "W_ind"})
    df_b, b_classifiers = run_one_race_cv(seed, dataset_b, **parametrs_b)
    df_b = df_b.rename(columns={"Auc": "B_ind"})
    
    print("Supervised transfer")
    df_tl_supervised, supervised_transfer_classifiers = run_supervised_transfer_cv(seed, dataset, **parametrs_tl)
    df_tl_supervised = df_tl_supervised.rename(columns={"TL_Auc": "XY_TL"})

    print("Unsupervised transfer")
    df_tl_unsupervised, unsupervised_transfer_classifiers = run_unsupervised_transfer_cv(seed, dataset, **parametrs_tl_unsupervised)
    df_tl_unsupervised = df_tl_unsupervised.rename(columns={"TL_Auc": "X_TL"})

    print("CCSA transfer")
    df_tl, ccsa_transfer_models = run_CCSA_transfer(seed, dataset_tl, **parameters_CCSA)
    df_tl = df_tl.rename(columns={"TL_Auc": "CCSA_TL"})

    df1 = pd.concat([df_m, df_w['W_ind'], df_b['B_ind'], df_tl['CCSA_TL'],
                    # df_tl_unsupervised['X_TL'],
                     df_tl_supervised['XY_TL']],
                    sort=False, axis=1)

    print(df1)
    
    return mixture_classifiers, w_classifiers, b_classifiers, supervised_transfer_classifiers, unsupervised_transfer_classifiers, ccsa_transfer_models


In [5]:
mixture_classifiers, w_clasifiers, b_classifiers, supervised_transfer_classifiers, unsupervised_transfer_classifiers, ccsa_transfer_models \
    = run_cv('BRCA', 'Protein', 'OS', years=4)

('BRCA', 'Protein', 'OS', 4)
(811, 190)
(810, 192)
(322, 189)
1WHITE    216
0WHITE     57
1BLACK     39
0BLACK     10
Name: RY, dtype: int64
{'1BLACK': 39, '0WHITE': 57, '1WHITE': 216, '0BLACK': 10}
('1BLACK', 39)
('0WHITE', 57)
('1WHITE', 216)
('0BLACK', 10)
Begin classifier training
Supervised transfer
Unsupervised transfer


  theano.Param(corruption_level, default=0.2),
  theano.Param(learning_rate, default=0.1)


CCSA transfer
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Domain Adaptation Task: WHITE_to_BLACK
Creating pairs for repetition: 1 and sample_per_class: 2
('Training the model - Epoch 100', ' total trainings:', (1092, 189), (1092, 189))
Instructions for updating:
Use tf.cast instead.
10->20->30->40->50->60->70->80->90->99
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 189)          0                                            
__________________________________________________________________________________________________
sequential_1 (Sequential)       (None, 100)          19000       input_1[0][0]                    
                                       

In [16]:
def get_weights(classifier):
    for layer in classifier.hidden_layers:
        yield layer.W.get_value()

        
def get_biases(classifier):
    for layer in classifier.hidden_layers:
        yield layer.b.get_value()


print list(get_biases(supervised_transfer_classifiers[0]))

[array([ 1.04915988e-02,  8.37883239e-04,  1.91491770e-02,  3.51250904e-03,
        4.81389053e-03,  9.58516550e-02, -5.66567156e-04,  1.31758499e-02,
        4.37043427e-03, -1.97529055e-02,  1.37175652e-02,  1.84210043e-02,
        1.06994965e-02,  2.99562243e-02, -3.97641531e-03, -7.05080735e-03,
       -1.93903544e-05, -6.67663833e-03, -1.16883685e-02,  7.15843895e-03,
       -6.63184337e-03,  3.08919558e-02, -1.44428527e-02, -1.70714761e-03,
        6.53477055e-03,  2.10528239e-02,  1.64386709e-03,  2.12950454e-02,
       -4.00343005e-03,  6.79839384e-03,  4.29851999e-03,  1.14604706e-02,
        4.68669283e-03, -1.49554840e-03, -3.19759575e-03,  1.41054041e-02,
       -6.27269086e-03,  3.97933320e-04,  6.08317610e-03,  3.30308064e-03,
        6.97976752e-03,  2.59847433e-02, -1.02808933e-04,  7.97273316e-03,
       -5.17123350e-04,  1.09123180e-02,  1.86446402e-02,  1.46361518e-02,
        2.78710332e-02, -2.96362668e-03,  5.79078209e-02,  1.42729802e-02,
       -2.52907171e-03, 