In [5]:
import os
import sys 
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler
import keras
from hyperopt import hp, fmin, tpe, Trials, STATUS_FAIL, STATUS_OK
from hyperopt.mongoexp import MongoTrials
import motif_processor
import cv_engine
from math import log
import common_utils


In [None]:
import theano

In [6]:
%matplotlib inline

In [7]:
HepG2MotifProcObj = motif_processor.MotifProcessor(ENCODE_only=False, 
                                                   cell_line="HepG2", 
                                                   meme_file="/home/alvin/Dropbox/Lab/CNN/data/meme/HepG2_pos_steme/steme.txt")
HepG2_motif_tensor, HepG2_motif_names = HepG2MotifProcObj.generate_custom_CNN_filters(max_length = 18, padding = 0, truncate = False)

K562MotifProcObj = motif_processor.MotifProcessor(ENCODE_only=False, 
                                                   cell_line="K562", 
                                                   meme_file="/home/alvin/Dropbox/Lab/CNN/data/meme/K562_pos_steme/steme.txt")
K562_motif_tensor, K562_motif_names = K562MotifProcObj.generate_custom_CNN_filters(max_length = 18, padding = 0, truncate = False)


LCLMotifProcObj = motif_processor.MotifProcessor(ENCODE_only=False, 
                                                   cell_line="LCL", 
                                                   meme_file="/home/alvin/Dropbox/Lab/CNN/data/meme/LCL_pos_steme/steme.txt")
LCL_motif_tensor, LCL_motif_names = LCLMotifProcObj.generate_custom_CNN_filters(max_length = 18, padding = 0, truncate = False)


CrossValProcObj = common_utils.CrossValProcessor(output_dirs = ["./HEPG2_act_1_25/", "./HEPG2_rep_1_25/", 
                                                                      "./K562_act_1_25/", "./K562_act_1_25/",
                                                                      "./LCL_act_alt_1_25/"])

HepG2_binObj = CrossValProcObj.HepG2_binObj
K562_binObj = CrossValProcObj.K562_binObj
LCL_binObj = CrossValProcObj.LCL_binObj
binObjs = [HepG2_binObj, K562_binObj, LCL_binObj]
CvDicts = [CrossValProcObj.HepG2_dict, CrossValProcObj.K562_dict, CrossValProcObj.LCL_dict]
motif_tensors = [HepG2_motif_tensor, K562_motif_tensor, LCL_motif_tensor]
motif_names = [HepG2_motif_names, K562_motif_names, LCL_motif_names]
names=["HepG2", "K562", "LCL"]
dirs = ["./HepG2_1_25/", "./K562_1_25/", "./LCL_1_25/"]


In [8]:
dirs = ["./HepG2_1_25/", "./K562_1_25/", "./LCL_1_25/"]


In [40]:
idx = 0
if idx == 0:
    other_indices = [1,2]
elif idx == 1:
    other_indices = [0,2]
else:
    other_indices = [0,1]

index = 0

In [203]:
import cv_engine
reload(cv_engine)


<module 'cv_engine' from 'cv_engine.py'>

In [204]:
def func(kwargs):
    try:
        print('starting func')
        print kwargs
        cvObj = cv_engine.CvEngine(binObjs[idx], CvDicts[idx], dirs[idx], names[idx], motif_tensors[idx], 
                           motif_names[idx], debug=True)        
        cvObj.start_CV_NN(kwargs)
        other1 = cvObj.predOther(binObjs[other_indices[0]], name = names[other_indices[0]])
        other2 = cvObj.predOther(binObjs[other_indices[1]], name = names[other_indices[1]])
        other1_name = names[other_indices[0]]
        other2_name = names[other_indices[1]]
        cvObj.summarize(prefix="%0.05f_%0.05f_%0.05f"%(float(kwargs['dropout']),float(kwargs['L1']),float(kwargs['L2'])))
        print cvObj.return_combined_auroc()
        return_dict ={'loss': -1 * cvObj.return_combined_auroc(), 'status': STATUS_OK, 'attachments':{other1_name:other1,other2_name:other2}}
        #return_dict ={'loss': -1 * cvObj.return_combined_auroc(), 'status': STATUS_OK, 'attachments':{other1_name:other1}}
        cvObj.wipe_results()
        print "I'm returning!"
        return return_dict
    except Exception as e:
        print "Exception"
        print e
        return {'status': STATUS_FAIL}


In [205]:
spaces = { 
    'dropout':hp.uniform('dropout', 0.0, 0.25),
    'L1':hp.loguniform('L1', log(1e-7), log(10)),
    'L2':hp.loguniform('L2', log(1e-7), log(10)),
    'L1_W1':hp.loguniform('L1_W1', log(1e-7), log(10)),
    'L2_W2':hp.loguniform('L2_W2', log(1e-7), log(10)),
    'pool_width':hp.quniform('pool_width', 2,25,1),
}


In [206]:
trials=Trials()
best = fmin(func, space=spaces,algo=tpe.suggest,max_evals=250, trials=trials)

starting func
{'L1_W1': 0.0045494082817599, 'L2': 2.5844260256967105e-06, 'L1': 0.01603719592047824, 'dropout': 0.09356556550770209, 'L2_W2': 3.2656790388918897, 'pool_width': 5.0}
{'L1_W1': 0.0045494082817599, 'L2': 2.5844260256967105e-06, 'L1': 0.01603719592047824, 'dropout': 0.09356556550770209, 'L2_W2': 3.2656790388918897, 'pool_width': 5.0}
Starting CV NN
{'trainable': True, 'name': 'shared_motif_layer', 'input_shape': (1, 4, 150)}
Elemwise{mul,no_inplace}.0
Elemwise{mul,no_inplace}.0
0.5128	75.1025	0.8294	0.5116	0.8875	0.0062	7.4074	9.8765	14.8148	162.0000	1057.0000
Completed NN on chromsome chr1 in 116.0367 seconds
0.5407	74.8980	0.8243	0.5093	0.8849	0.0061	7.3620	9.8160	14.7239	163.0000	1057.0000
{'trainable': True, 'name': 'shared_motif_layer', 'input_shape': (1, 4, 150)}
Elemwise{mul,no_inplace}.0
Elemwise{mul,no_inplace}.0
nan	76.8034	0.8530	0.5369	0.9221	0.0006	3.9498	6.7712	19.3103	1595.0000	11398.0000
Completed predicting on K562 in 140.48 seconds
Printing result obj
nan	

In [202]:
import pickle
idx=0
pickle.dump(trials, open('./hyperopt_%i/%0.04f_%0.04f_%0.04f.pickle'%(idx, best['dropout'], best['L1'], best['L2']), 'wb'))


In [176]:
trials1.results

[{'loss': -0.7293764619161767, 'status': 'ok'},
 {'loss': -0.7571376334225236, 'status': 'ok'}]

In [177]:
trials1.best_trial

{'book_time': datetime.datetime(2017, 1, 26, 6, 51, 8, 312000),
 'exp_key': None,
 'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
  'idxs': {'L1': [1],
   'L1_W1': [1],
   'L2': [1],
   'L2_W2': [1],
   'dropout': [1],
   'pool_width': [1]},
  'tid': 1,
  'vals': {'L1': [0.38744979475827807],
   'L1_W1': [1.8326118947104384e-05],
   'L2': [4.05832556457045],
   'L2_W2': [3.0150891764390848e-05],
   'dropout': [0.22937737707000913],
   'pool_width': [13.0]},
  'workdir': None},
 'owner': None,
 'refresh_time': datetime.datetime(2017, 1, 26, 6, 52, 6, 794000),
 'result': {'loss': -0.7571376334225236, 'status': 'ok'},
 'spec': None,
 'state': 2,
 'tid': 1,
 'version': 0}

In [187]:
best

{'L1': 0.38744979475827807,
 'L1_W1': 1.8326118947104384e-05,
 'L2': 4.05832556457045,
 'L2_W2': 3.0150891764390848e-05,
 'dropout': 0.22937737707000913,
 'pool_width': 13.0}

In [189]:
dir(trials1)

['__class__',
 '__delattr__',
 '__dict__',
 '__doc__',
 '__format__',
 '__getattribute__',
 '__getitem__',
 '__hash__',
 '__init__',
 '__iter__',
 '__len__',
 '__long__',
 '__module__',
 '__native__',
 '__new__',
 '__nonzero__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__unicode__',
 '__weakref__',
 '_dynamic_trials',
 '_exp_key',
 '_ids',
 '_insert_trial_docs',
 '_trials',
 'aname',
 'argmin',
 'assert_valid_trial',
 'async',
 'attachments',
 'average_best_error',
 'best_trial',
 'count_by_state_synced',
 'count_by_state_unsynced',
 'delete_all',
 'fmin',
 'idxs',
 'idxs_vals',
 'insert_trial_doc',
 'insert_trial_docs',
 'losses',
 'miscs',
 'new_trial_docs',
 'new_trial_ids',
 'next',
 'refresh',
 'results',
 'source_trial_docs',
 'specs',
 'statuses',
 'tids',
 'trial_attachments',
 'trials',
 'vals',
 'view']