In [3]:
import warnings
warnings.filterwarnings("ignore")
import tensorflow as tf
import keras_tuner
from tensorflow import keras
import numpy as np
seq_len=118
border_mode = 'same'

class MyHyperModel(keras_tuner.HyperModel) :
     def build(self, hp) : 
        model = keras.Sequential()
        layers=hp.Int("num_conv", min_value=1, max_value=4, step=1)
        nodes=hp.Choice("neurons", [40,80,100, 200])
        nbr_filters=hp.Choice("filters", [40,80,100, 200])
        filter_len=hp.Choice("filter_len", [4,8, 12])
        drop_rate = hp.Float("drop_rate", min_value=0, max_value=0.35, step=0.10)
        if layers >= 1:
            model.add(keras.layers.Conv1D(activation="relu", input_shape=(seq_len, 4), padding=border_mode, filters=nbr_filters, kernel_size=filter_len))
        if layers >= 2:
            model.add(keras.layers.Conv1D(activation="relu", input_shape=(seq_len, 1), padding=border_mode, filters=nbr_filters, kernel_size=filter_len))
            model.add(keras.layers.Dropout(drop_rate))
        if layers >= 3:
            model.add(keras.layers.Conv1D(activation="relu", input_shape=(seq_len, 1), padding=border_mode, filters=nbr_filters, kernel_size=filter_len))
            model.add(keras.layers.Dropout(drop_rate))
        model.add(keras.layers.Flatten())

        model.add(keras.layers.Dense(nodes,kernel_initializer='glorot_normal',bias_initializer='zeros'))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.Dropout(drop_rate))

        model.add(keras.layers.Dense(1,kernel_initializer='glorot_normal',bias_initializer='zeros'))
        model.add(keras.layers.Activation('linear'))

        #compile the model
        learning_rate=hp.Choice('lr', [0.01, 0.005,0.001])
        adam = keras.optimizers.Adam(learning_rate=learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-06)
        model.compile(loss="mean_squared_error", optimizer=adam)
        return model
     
     
     def fit(self, hp, model,x, *args, **kwargs) :
         
         return model.fit( x, 
                          *args,
                          shuffle=hp.Boolean("shuffle"),
                          **kwargs)

hp = keras_tuner.HyperParameters()
hypermodel = MyHyperModel()
model = hypermodel.build(hp)
# hypermodel.fit(hp, model, np.random.rand(BATCH_SIZE, 400, 400,3), np.random.rand(BATCH_SIZE, classes))



In [4]:
import os
import random
import numpy as np
import pandas as pd
from sklearn import preprocessing


def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    tf.config.experimental.enable_op_determinism()

def one_hot_encode(df, col='seq', seq_len=44):
    # Dictionary returning one-hot encoding of nucleotides. 
    nuc_d = {'a':[1,0,0,0],'c':[0,1,0,0],'g':[0,0,1,0],'t':[0,0,0,1], 'n':[0,0,0,0]}
    
    # Creat empty matrix.
    vectors=np.empty([len(df),seq_len,4])
    
    # Iterate through UTRs and one-hot encode
    for i,seq in enumerate(df[col].str[:seq_len]): 
        seq = seq.lower()
        lst = [nuc_d[x] for x in seq]
        if seq_len>len(seq):
            lst += [nuc_d['n']]*(seq_len-len(seq))
        a = np.array(lst)
        vectors[i] = a
    return vectors


set_seed(42)
name="pl5-2-1"
datadir=f"/Users/john/data/sev/results/{name}/{name}_"
# datadir="./data"
e_train = pd.read_csv(f"{datadir}train.csv")
e_test= pd.read_csv(f"{datadir}test.csv")
print(e_train.shape, e_test.shape)

seq_e_train = one_hot_encode(e_train,seq_len=seq_len)
seq_e_test = one_hot_encode(e_test, seq_len=seq_len)
label = 'score' # abs_score:0.2987 0.6760
e_test.loc[:,'scaled_rl'] = preprocessing.StandardScaler().fit_transform(e_test.loc[:,label].values.reshape(-1,1))
e_train.loc[:,'scaled_rl'] = preprocessing.StandardScaler().fit_transform(e_train.loc[:,label].values.reshape(-1,1))
hp = keras_tuner.HyperParameters()
hypermodel = MyHyperModel()
model = hypermodel.build(hp)
tuner = keras_tuner.BayesianOptimization(
                         hypermodel=MyHyperModel(),
                         objective = "val_loss",
                         max_trials =20, #max candidates to test
                         overwrite=True,
                         directory='./data/keras_tuner',
                         project_name='sev')
tuner.search(seq_e_train, e_train['scaled_rl'], epochs=2, batch_size=32,
              validation_data=(seq_e_test,e_test["scaled_rl"]))
tuner.get_best_hyperparameters(1)[0].values
# best_hps = tuner.get_best_hyperparameters(1)
#  h_model = MyHyperModel()
#  model = h_model.build(best_hps[0])

Trial 20 Complete [00h 00m 06s]
val_loss: 1.002669095993042

Best val_loss So Far: 0.9794211387634277
Total elapsed time: 00h 02m 12s


{'num_conv': 2,
 'neurons': 200,
 'filters': 40,
 'filter_len': 4,
 'drop_rate': 0.30000000000000004,
 'lr': 0.001,
 'shuffle': True}