In [1]:
%pylab notebook

# Run on CPU not GPU
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import tensorflow as tf
from tensorflow.keras import layers

import primo.models
import primo.datasets
import primo.tools.filepath as filepaths
import primo.tools.sequences as seqtools
import pandas as pd


Matplotlib created a temporary config/cache directory at /tmp/matplotlib-i5y905a1 because the default path (/tf/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


Populating the interactive namespace from numpy and matplotlib


In [2]:
encoder = primo.models.Encoder('/tf/primo/data/models/encoder-model-short.h5')

# TODO: Replace the yield_predictor with the nucleaseq Cas9 predictor, use that here instead. https://github.com/uwmisl/cas9-similarity-search/issues/3 
#yield_predictor = primo.models.Predictor('/tf/primo/data/models/yield-model.h5')
yield_predictor = primo.models.PredictorFunction()
encoder.model.compile()
yield_predictor.model.compile()
encoder_trainer = primo.models.EncoderTrainer(encoder, yield_predictor)

encoder_trainer.model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 2, 4096)]    0                                            
__________________________________________________________________________________________________
lambda_1 (Lambda)               ((None, 4096), (None 0           input_1[0][0]                    
__________________________________________________________________________________________________
encoder (Sequential)            (None, 20, 4)        8554576     lambda_1[0][0]                   
                                                                 lambda_1[0][1]                   
__________________________________________________________________________________________________
lambda_3 (Lambda)               (None, 20, 4, 2)     0           encoder[0][0]              

In [3]:
# Load queries and see what they encode to

query_features_filepath = filepaths.get_query_features_path(isDocker=True)
query_features = pd.read_hdf(query_features_filepath)
query_seqs = encoder.encode_feature_seqs(query_features)
print(f"Query Seqs: {query_seqs}")

def seq_str_to_input(seq):
    return np.transpose(seqtools.seqs_to_onehots(seq), [1, 0, 2])

Query Seqs: ['TAAAAAAAAAAAAGAAAAAA' 'TAAAAAAAAAAAAGAAAAAA' 'GAAAAAAAAAAAAGAAAAAA']


In [11]:
same_pair = np.array([[query_features.loc['callie_janelle'], query_features.loc['callie_janelle']]])
a = encoder_trainer.model.predict(same_pair)
print(f"Full model: {a}")

b = encoder_trainer.predictor.model.predict(np.array([
    np.concatenate([
        seq_str_to_input('TAAAAAAAAAAAAGAAAAAA'),
        seq_str_to_input('TAAAAAAAAAAAAGAAAAAA'),
    ]),
]))
print(f"Predictor with sequences: {b}")

c = encoder_trainer.calcseq.predict(same_pair)
print(f"Encoded onehot seqs: {c}")
print(c.shape)
print(f"as strings: {seqtools.onehots_to_seqs(c[0])}")

Full model: [[0.0324022]]
Predictor with sequences: [1.]
Encoded onehot seqs: [[[[1.2642966e-04 7.3050702e-01 1.2006029e-04 2.6924643e-01]
   [2.5000000e-01 2.5000000e-01 2.5000000e-01 2.5000000e-01]
   [2.5000000e-01 2.5000000e-01 2.5000000e-01 2.5000000e-01]
   [2.5000000e-01 2.5000000e-01 2.5000000e-01 2.5000000e-01]
   [2.5000000e-01 2.5000000e-01 2.5000000e-01 2.5000000e-01]
   [2.5000000e-01 2.5000000e-01 2.5000000e-01 2.5000000e-01]
   [2.5000000e-01 2.5000000e-01 2.5000000e-01 2.5000000e-01]
   [2.5000000e-01 2.5000000e-01 2.5000000e-01 2.5000000e-01]
   [2.5000000e-01 2.5000000e-01 2.5000000e-01 2.5000000e-01]
   [2.5000000e-01 2.5000000e-01 2.5000000e-01 2.5000000e-01]
   [2.5000000e-01 2.5000000e-01 2.5000000e-01 2.5000000e-01]
   [2.5000000e-01 2.5000000e-01 2.5000000e-01 2.5000000e-01]
   [2.5000000e-01 2.5000000e-01 2.5000000e-01 2.5000000e-01]
   [1.4607755e-06 1.4607755e-06 1.4607755e-06 9.9999559e-01]
   [2.5000000e-01 2.5000000e-01 2.5000000e-01 2.5000000e-01]
   [9.9