In [9]:
%pylab notebook

import tensorflow as tf
import tensorflow_probability as tfp
import tensorflow.keras as keras
from tensorflow.keras import layers
import tensorflow.keras.backend as K
from copy import deepcopy
import secrets
import os

import primo.models
import primo.datasets
import primo.tools.sequences as seqtools
from primo.models.cas9_keras import log_multisite_predictor

# Enable memory growth so that we only use as much GPU memory as needed.
# By default, tensorflow will reserve nearly all of the GPU memory.
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

Populating the interactive namespace from numpy and matplotlib
1 Physical GPUs, 1 Logical GPUs


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [10]:
train_dataset = primo.datasets.OpenImagesTrain(
    '/tf/open_images/train/', switch_every=5*10**4
)
validation_dataset = primo.datasets.OpenImagesVal('/tf/open_images/validation/')

# To see how this value was derived, please consult the Materials and Methods subsection under 
# Feature Extraction section in Bee et. al. 2021. 
similarity_threshold = 75
# Intuitively determined:
batch_size = 20
val_batch_size = 150

def keras_batch_generator(dataset_batch_generator):
    # Yield datasets
    # Each sample is a triplet with known similar and dissimilar images, so the y_true
    # value is unused in the loss function.
    while True:
        # This tuple contains:
        # indices: a positive integer uniquely identifying an image. This index is obtained by
        #   enumerating all the images in the dataset (before splitting them into 
        #   test/train/validate datasets)
        # triplets: A set of three image feature vectos containing anchor, positive (similar) 
        #   image, and negative (dissimilar) image
        indices, triplets = next(dataset_batch_generator)
        yield triplets, np.zeros(len(triplets))

train_batch_generator = keras_batch_generator(
    primo.datasets.dataset.triplet_batch_generator(
        train_dataset.random_features(batch_size),
        similarity_threshold
    )
)

val_batch_generator = keras_batch_generator(
    primo.datasets.dataset.triplet_batch_generator(
        validation_dataset.random_features(val_batch_size),
        similarity_threshold
    )
)

train_inputs, train_targets = next(train_batch_generator)

switching to train_f.h5 and train_4.h5


In [11]:
# The reason the default encoder input is a 4096-dimensional vector is
# because we're representing our images through an embedding that was learned
# by a computer vision model known as VGG [1]. We're borrowing the output of the
# 2nd fully-connected layer (i.e. the FC2), which spits out a 4096-by-1 vector.
#
# If you're very curious about VGG's innerworkings, you can see an example tensorflow
# implementation here [2, 3].
#
# Note for future users: If you ever decide to use a different model VGG16,
# you'd probably want to change the input dimension here.
#
# [1] - https://neurohive.io/en/popular-networks/vgg16/
# [2] - https://www.cs.toronto.edu/~frossard/post/vgg16/
# [3] - https://github.com/kentsommer/VGG16-Image-Retrieval/blob/master/vgg16_example.py#L237
INPUT_FEATURE_SIZE = 4096 

# Each CAS site is 20nt, so the output length should be 20nt for single
# site, and a multiple of 20nt for multiple sites.
OUTPUT_LEN = 20

# Temperature used for softmax calculation
SOFTMAX_TEMP = 1.0

# Optionally, load a previously saved model to continue training
MODEL_FILE=None

def hardmax(temperature):
    def hardmax_f(x):
        y = tf.nn.softmax(x / temperature)
        y_hard = tf.one_hot(tf.argmax(y, -1), 4)
        y = tf.stop_gradient(y_hard - y) + y
        return y
    return hardmax_f

# Create the keras model for a single encoder.
encoder = tf.keras.Sequential([
    layers.Dense(4096, activation = 'relu', activity_regularizer=tf.keras.regularizers.l2(0.0000)),
    layers.Dropout(0.2),
    layers.Dense(4096, activation = 'relu', activity_regularizer=tf.keras.regularizers.l2(0.0000)),
    layers.Dropout(0.2),
    layers.Dense(OUTPUT_LEN * 4, activation='relu'),
    layers.Reshape([OUTPUT_LEN, 4]),
    layers.Lambda(hardmax(SOFTMAX_TEMP)),
], name='encoder')

# Start with previously trained model if a filename is provided
if MODEL_FILE is not None:
    encoder = tf.keras.models.load_model(MODEL_FILE)

# First input is anchor, second is a positive sample, third is a negative sample
X_triplets = layers.Input([3, INPUT_FEATURE_SIZE])

# Essentially, we started with a batch of feature-vector triplets...
# ...And turned them into a triplet of feature-vector batches.
X1, X2, X3 = layers.Lambda(lambda X: (X[:,0,:], X[:,1,:], X[:,2,:]))(X_triplets)

# Layer to compute euclidean distances between the triplet pairs for convenience
distances = layers.Lambda(lambda Xs:
                          (
                              tf.sqrt(tf.reduce_sum(tf.square(Xs[0]-Xs[1]), axis=1)),
                              tf.sqrt(tf.reduce_sum(tf.square(Xs[0]-Xs[2]), axis=1))
                          ))([X1,X2,X3])

# Independently transforms the batches of feature vectors into one-hot encoded DNA sequences.
S1 = encoder(X1)
S2 = encoder(X2)
S3 = encoder(X3)

# Glue them back together! Back into a batch of feature vector triplets.
S_triplets = layers.Lambda(
    lambda Ss: tf.stack(Ss, axis=-1)
)([S1,S2,S3])

# Dimensions: (batch_size x 80 x 4 x 3 ) (i.e. batch size x DNA length x # of nucleotides x 3)
# Swaps dimensions for the loss function, which wants (batch-size x 3 x DNA length x 4)
S_triplets_T = layers.Lambda(lambda S: tf.transpose(S, [0, 3, 1, 2]))(S_triplets)

encoder_trainer = tf.keras.Model(inputs=X_triplets, outputs=S_triplets_T)

In [12]:
# Defined the loss function, and metrics used for training

def UniquenessMetric(y_true, y_pred):
    seqs = encoder_trainer(train_inputs)
    # Reduce seqs to a single base (e.g. [0.3, 0.1, 0.5, 0.1] -> [2])
    rounded_seqs = K.argmax(seqs[:, 0, :, :], axis=-1)
    unique_seqs = np.unique(K.get_value(rounded_seqs), axis=0)
    # Return ratio of unique to input
    return float(len(unique_seqs)) / len(train_inputs)

def RecallMetric(y_true, y_pred):
    Yp = log_multisite_predictor(tf.gather(y_pred, [0, 1], axis=1))
    return tf.reduce_mean(tf.cast(Yp > -2.0, tf.float32))

def NegRecallMetric(y_true, y_pred):
    Yn = log_multisite_predictor(tf.gather(y_pred, [0, 2], axis=1))
    return tf.reduce_mean(tf.cast(Yn > -2.0, tf.float32))

class EarlyStopCallback(keras.callbacks.Callback):
    def __init__(self, **kwargs):
        uid = secrets.token_hex(2)
        self.__file = f'/tf/primo/signals/{uid}'
        print(f"Touch {self.__file} to terminate training early")
        super().__init__(**kwargs)


    def on_epoch_end(self, epoch, logs={}):
        if os.path.exists(self.__file):
          print (f"\nStopping after Epoch {epoch}")
          self.model.stop_training = True

class TripletLoss(object):

    def __call__(self, y_true, y_pred):
        """
        y_pred is triplets of (anchor, positive, negative), with dimensions
        batch_size x 3 x 20 x 4
        """
        pos_distance = -log_multisite_predictor(tf.gather(y_pred, [0,1], axis=1))
        neg_distance = log_multisite_predictor(tf.gather(y_pred, [0,2], axis=1))

        # Compute loss function which penalizes low activation rate for positive 
        # pairs, and high activation rate for negative pairs, ignoring samples which
        # are above (positive) or below (negative) thresholds -- i.e. samples which 
        # are already well trained are ignored.
        return tf.maximum(pos_distance, 0.5) + tf.maximum(neg_distance, -3.0)


In [5]:
### 
# Train with full dataset
####
encoder_trainer.compile(
    tf.keras.optimizers.Adagrad(1e-4),
    TripletLoss(),
    run_eagerly=True,
    metrics=[UniquenessMetric, RecallMetric, NegRecallMetric,])
history = encoder_trainer.fit(
    train_batch_generator,
    validation_data=val_batch_generator,
    validation_steps=5,
    steps_per_epoch=100,
    epochs=800,
    callbacks=[EarlyStopCallback()],
)

Touch /tf/primo/signals/05fc to terminate training early
Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 12/800
Epoch 13/800
Epoch 14/800
Epoch 15/800
Epoch 16/800
Epoch 17/800
Epoch 18/800
Epoch 19/800
Epoch 20/800
Epoch 21/800
Epoch 22/800
Epoch 23/800
Epoch 24/800
Epoch 25/800
Epoch 26/800
Epoch 27/800


Epoch 28/800
Epoch 29/800
Epoch 30/800
Epoch 31/800
Epoch 32/800
Epoch 33/800
Epoch 34/800
Epoch 35/800
Epoch 36/800
Epoch 37/800
Epoch 38/800
Epoch 39/800
Epoch 40/800
Epoch 41/800
Epoch 42/800
Epoch 43/800
Epoch 44/800
Epoch 45/800
Epoch 46/800
Epoch 47/800
Epoch 48/800
Epoch 49/800
Epoch 50/800
Epoch 51/800
Epoch 52/800
Epoch 53/800
Epoch 54/800


Epoch 55/800
Epoch 56/800
Epoch 57/800
Epoch 58/800
Epoch 59/800
Epoch 60/800
Epoch 61/800
Epoch 62/800
Epoch 63/800
Epoch 64/800
Epoch 65/800
Epoch 66/800
Epoch 67/800
Epoch 68/800
Epoch 69/800
Epoch 70/800
Epoch 71/800
Epoch 72/800
Epoch 73/800
Epoch 74/800
Epoch 75/800
Epoch 76/800
Epoch 77/800
Epoch 78/800
Epoch 79/800
Epoch 80/800
Epoch 81/800


Epoch 82/800
Epoch 83/800
Epoch 84/800
Epoch 85/800
Epoch 86/800
Epoch 87/800
Epoch 88/800
Epoch 89/800
Epoch 90/800
Epoch 91/800
Epoch 92/800
Epoch 93/800
Epoch 94/800
Epoch 95/800
Epoch 96/800
Epoch 97/800
Epoch 98/800
Epoch 99/800
Epoch 100/800
Epoch 101/800
Epoch 102/800
Epoch 103/800
Epoch 104/800
Epoch 105/800
Epoch 106/800
Epoch 107/800


Epoch 108/800
Epoch 109/800
Epoch 110/800
Epoch 111/800
Epoch 112/800
Epoch 113/800
Epoch 114/800
Epoch 115/800
 23/100 [=====>........................] - ETA: 49s - loss: -0.9740 - UniquenessMetric: 1.0000 - RecallMetric: 0.8674 - NegRecallMetric: 0.4217switching to train_7.h5 and train_5.h5
Epoch 116/800
Epoch 117/800
Epoch 118/800
Epoch 119/800
Epoch 120/800
 20/100 [=====>........................] - ETA: 52s - loss: -1.0014 - UniquenessMetric: 1.0000 - RecallMetric: 0.8600 - NegRecallMetric: 0.4025switching to train_4.h5 and train_5.h5
Epoch 121/800
Epoch 122/800
Epoch 123/800
Epoch 124/800
Epoch 125/800
 16/100 [===>..........................] - ETA: 54s - loss: -0.8949 - UniquenessMetric: 1.0000 - RecallMetric: 0.7969 - NegRecallMetric: 0.4187switching to train_5.h5 and train_e.h5
Epoch 126/800
Epoch 127/800
Epoch 128/800
Epoch 129/800
Epoch 130/800
 14/100 [===>..........................] - ETA: 56s - loss: -0.8882 - UniquenessMetric: 1.0000 - RecallMetric: 0.8250 - NegRecallMet

Epoch 135/800
 13/100 [==>...........................] - ETA: 56s - loss: -1.0735 - UniquenessMetric: 1.0000 - RecallMetric: 0.8385 - NegRecallMetric: 0.3577switching to train_3.h5 and train_e.h5
Epoch 136/800
Epoch 137/800
Epoch 138/800
Epoch 139/800
Epoch 140/800
 11/100 [==>...........................] - ETA: 57s - loss: -0.8869 - UniquenessMetric: 1.0000 - RecallMetric: 0.8364 - NegRecallMetric: 0.4273switching to train_e.h5 and train_6.h5
Epoch 141/800
Epoch 142/800
Epoch 143/800
Epoch 144/800
Epoch 145/800
  8/100 [=>............................] - ETA: 59s - loss: -0.8972 - UniquenessMetric: 1.0000 - RecallMetric: 0.8063 - NegRecallMetric: 0.4438 switching to train_0.h5 and train_b.h5
Epoch 146/800
Epoch 147/800
Epoch 148/800
Epoch 149/800
Epoch 150/800
  4/100 [>.............................] - ETA: 1:01 - loss: -0.8165 - UniquenessMetric: 1.0000 - RecallMetric: 0.8250 - NegRecallMetric: 0.4625switching to train_5.h5 and train_c.h5
Epoch 151/800
Epoch 152/800
Epoch 153/800
Epoc

Epoch 162/800
Epoch 163/800
Epoch 164/800
Epoch 165/800
Epoch 166/800
Epoch 167/800
Epoch 168/800
Epoch 169/800
Epoch 170/800
Epoch 171/800
Epoch 172/800
Epoch 173/800
Epoch 174/800
Epoch 175/800
Epoch 176/800
Epoch 177/800
Epoch 178/800
Epoch 179/800
Epoch 180/800
Epoch 181/800
Epoch 182/800
Epoch 183/800
Epoch 184/800
Epoch 185/800
Epoch 186/800
Epoch 187/800


Epoch 188/800
Epoch 189/800
Epoch 190/800
Epoch 191/800
Epoch 192/800
Epoch 193/800
Epoch 194/800
Epoch 195/800
Epoch 196/800
Epoch 197/800
Epoch 198/800
Epoch 199/800
Epoch 200/800
Epoch 201/800
Epoch 202/800
Epoch 203/800
Epoch 204/800
Epoch 205/800
Epoch 206/800
Epoch 207/800
Epoch 208/800
Epoch 209/800
Epoch 210/800
Epoch 211/800
Epoch 212/800
Epoch 213/800
Epoch 214/800


Epoch 215/800
Epoch 216/800
Epoch 217/800
Epoch 218/800
Epoch 219/800
Epoch 220/800
Epoch 221/800
Epoch 222/800
Epoch 223/800
Epoch 224/800
Epoch 225/800
Epoch 226/800
Epoch 227/800
Epoch 228/800
Epoch 229/800
Epoch 230/800
Epoch 231/800
Epoch 232/800
Epoch 233/800
Epoch 234/800
Epoch 235/800
Epoch 236/800
Epoch 237/800
Epoch 238/800
Epoch 239/800
Epoch 240/800


Epoch 241/800
Epoch 242/800
Epoch 243/800
Epoch 244/800
Epoch 245/800
Epoch 246/800
Epoch 247/800
Epoch 248/800
Epoch 249/800
Epoch 250/800
Epoch 251/800
Epoch 252/800
Epoch 253/800
Epoch 254/800
Epoch 255/800
Epoch 256/800
Epoch 257/800
Epoch 258/800
Epoch 259/800
Epoch 260/800
Epoch 261/800
Epoch 262/800
Epoch 263/800
Epoch 264/800
Epoch 265/800
Epoch 266/800
Epoch 267/800


Epoch 268/800
Epoch 269/800
Epoch 270/800
Epoch 271/800
Epoch 272/800
Epoch 273/800
Epoch 274/800
 22/100 [=====>........................] - ETA: 49s - loss: -1.0211 - UniquenessMetric: 1.0000 - RecallMetric: 0.8705 - NegRecallMetric: 0.3818switching to train_0.h5 and train_8.h5
Epoch 275/800
Epoch 276/800
Epoch 277/800
Epoch 278/800
Epoch 279/800
 18/100 [====>.........................] - ETA: 52s - loss: -1.0657 - UniquenessMetric: 1.0000 - RecallMetric: 0.8833 - NegRecallMetric: 0.3806switching to train_1.h5 and train_9.h5
Epoch 280/800
Epoch 281/800
Epoch 282/800
Epoch 283/800
Epoch 284/800
 14/100 [===>..........................] - ETA: 55s - loss: -1.0177 - UniquenessMetric: 1.0000 - RecallMetric: 0.8321 - NegRecallMetric: 0.3929switching to train_1.h5 and train_4.h5
Epoch 285/800
Epoch 286/800
Epoch 287/800
Epoch 288/800
Epoch 289/800
 10/100 [==>...........................] - ETA: 58s - loss: -1.0256 - UniquenessMetric: 1.0000 - RecallMetric: 0.8800 - NegRecallMetric: 0.4100swi

Epoch 295/800
Epoch 296/800
Epoch 297/800
Epoch 298/800
Epoch 299/800
  2/100 [..............................] - ETA: 1:03 - loss: -0.9790 - UniquenessMetric: 1.0000 - RecallMetric: 0.9000 - NegRecallMetric: 0.5250switching to train_f.h5 and train_c.h5
Epoch 300/800
Epoch 301/800
Epoch 302/800
Epoch 303/800
Epoch 304/800
Epoch 305/800
Epoch 306/800
Epoch 307/800
Epoch 308/800
Epoch 309/800
Epoch 310/800
Epoch 311/800
Epoch 312/800
Epoch 313/800
Epoch 314/800
Epoch 315/800
Epoch 316/800
Epoch 317/800
Epoch 318/800
Epoch 319/800
Epoch 320/800


Epoch 321/800
Epoch 322/800
Epoch 323/800
Epoch 324/800
Epoch 325/800
Epoch 326/800
Epoch 327/800
Epoch 328/800
Epoch 329/800
Epoch 330/800
Epoch 331/800
Epoch 332/800
Epoch 333/800
Epoch 334/800
Epoch 335/800
Epoch 336/800
Epoch 337/800
Epoch 338/800
Epoch 339/800
Epoch 340/800
Epoch 341/800
Epoch 342/800
Epoch 343/800
Epoch 344/800
Epoch 345/800
Epoch 346/800
Epoch 347/800


Epoch 348/800
Epoch 349/800
Epoch 350/800
Epoch 351/800
Epoch 352/800
Epoch 353/800
Epoch 354/800
Epoch 355/800
Epoch 356/800
Epoch 357/800
Epoch 358/800
Epoch 359/800
Epoch 360/800
Epoch 361/800
Epoch 362/800
Epoch 363/800
Epoch 364/800
Epoch 365/800
Epoch 366/800
Epoch 367/800
Epoch 368/800
Epoch 369/800
Epoch 370/800
Epoch 371/800
Epoch 372/800
Epoch 373/800
Epoch 374/800


Epoch 375/800
Epoch 376/800
Epoch 377/800
Epoch 378/800
Epoch 379/800
Epoch 380/800
Epoch 381/800
Epoch 382/800
Epoch 383/800
Epoch 384/800
Epoch 385/800
Epoch 386/800
Epoch 387/800
Epoch 388/800
Epoch 389/800
Epoch 390/800
Epoch 391/800
Epoch 392/800
Epoch 393/800
Epoch 394/800
Epoch 395/800
Epoch 396/800
Epoch 397/800
Epoch 398/800
Epoch 399/800
Epoch 400/800


Epoch 401/800
Epoch 402/800
Epoch 403/800
Epoch 404/800
Epoch 405/800
Epoch 406/800
Epoch 407/800
Epoch 408/800
 23/100 [=====>........................] - ETA: 50s - loss: -1.2050 - UniquenessMetric: 1.0000 - RecallMetric: 0.8761 - NegRecallMetric: 0.3217switching to train_f.h5 and train_a.h5
Epoch 409/800
Epoch 410/800
Epoch 411/800
Epoch 412/800
Epoch 413/800
 19/100 [====>.........................] - ETA: 52s - loss: -1.0180 - UniquenessMetric: 1.0000 - RecallMetric: 0.8184 - NegRecallMetric: 0.3500switching to train_0.h5 and train_1.h5
Epoch 414/800
Epoch 415/800
Epoch 416/800
Epoch 417/800
Epoch 418/800
 18/100 [====>.........................] - ETA: 51s - loss: -1.1182 - UniquenessMetric: 1.0000 - RecallMetric: 0.8833 - NegRecallMetric: 0.3972switching to train_e.h5 and train_1.h5
Epoch 419/800
Epoch 420/800
Epoch 421/800
Epoch 422/800
Epoch 423/800
 15/100 [===>..........................] - ETA: 54s - loss: -0.9577 - UniquenessMetric: 1.0000 - RecallMetric: 0.8500 - NegRecallMet

Epoch 428/800
 11/100 [==>...........................] - ETA: 56s - loss: -1.1371 - UniquenessMetric: 1.0000 - RecallMetric: 0.8682 - NegRecallMetric: 0.3273switching to train_c.h5 and train_5.h5
Epoch 429/800
Epoch 430/800
Epoch 431/800
Epoch 432/800
Epoch 433/800
  8/100 [=>............................] - ETA: 59s - loss: -1.0008 - UniquenessMetric: 1.0000 - RecallMetric: 0.8812 - NegRecallMetric: 0.4375 switching to train_2.h5 and train_1.h5
Epoch 434/800
Epoch 435/800
Epoch 436/800
Epoch 437/800
Epoch 438/800
  4/100 [>.............................] - ETA: 1:04 - loss: -1.1868 - UniquenessMetric: 1.0000 - RecallMetric: 0.8625 - NegRecallMetric: 0.3125switching to train_1.h5 and train_a.h5
Epoch 439/800
Epoch 440/800
Epoch 441/800
Epoch 442/800
Epoch 443/800
switching to train_d.h5 and train_7.h5
Epoch 444/800
Epoch 445/800
Epoch 446/800
Epoch 447/800
Epoch 448/800
Epoch 449/800
Epoch 450/800
Epoch 451/800
Epoch 452/800
Epoch 453/800
Epoch 454/800


Epoch 455/800
Epoch 456/800
Epoch 457/800
Epoch 458/800
Epoch 459/800
Epoch 460/800
Epoch 461/800
Epoch 462/800
Epoch 463/800
Epoch 464/800
Epoch 465/800
Epoch 466/800
Epoch 467/800
Epoch 468/800
Epoch 469/800
Epoch 470/800
Epoch 471/800
Epoch 472/800
Epoch 473/800
Epoch 474/800
Epoch 475/800
Epoch 476/800
Epoch 477/800
Epoch 478/800
Epoch 479/800
Epoch 480/800
Epoch 481/800


Epoch 482/800
Epoch 483/800
Epoch 484/800
Epoch 485/800
Epoch 486/800
Epoch 487/800
Epoch 488/800
Epoch 489/800
Epoch 490/800
Epoch 491/800
Epoch 492/800
Epoch 493/800
Epoch 494/800
Epoch 495/800
Epoch 496/800
Epoch 497/800
Epoch 498/800
Epoch 499/800
Epoch 500/800
Epoch 501/800
Epoch 502/800
Epoch 503/800
Epoch 504/800
Epoch 505/800
Epoch 506/800
Epoch 507/800
Epoch 508/800


Epoch 509/800
Epoch 510/800
Epoch 511/800
Epoch 512/800
Epoch 513/800
Epoch 514/800
Epoch 515/800
Epoch 516/800
Epoch 517/800
Epoch 518/800
Epoch 519/800
Epoch 520/800
Epoch 521/800
Epoch 522/800
Epoch 523/800
Epoch 524/800
Epoch 525/800
Epoch 526/800
Epoch 527/800
Epoch 528/800
Epoch 529/800
Epoch 530/800
Epoch 531/800
Epoch 532/800
Epoch 533/800
Epoch 534/800


Epoch 535/800
Epoch 536/800
Epoch 537/800
Epoch 538/800
Epoch 539/800
Epoch 540/800
Epoch 541/800
Epoch 542/800
Epoch 543/800
Epoch 544/800
Epoch 545/800
Epoch 546/800
Epoch 547/800
Epoch 548/800
Epoch 549/800
Epoch 550/800
Epoch 551/800
Epoch 552/800
Epoch 553/800
Epoch 554/800
Epoch 555/800
Epoch 556/800
Epoch 557/800
Epoch 558/800
Epoch 559/800
Epoch 560/800
Epoch 561/800


Epoch 562/800
Epoch 563/800
Epoch 564/800
Epoch 565/800
Epoch 566/800
Epoch 567/800
 19/100 [====>.........................] - ETA: 52s - loss: -1.0264 - UniquenessMetric: 1.0000 - RecallMetric: 0.8553 - NegRecallMetric: 0.3684switching to train_4.h5 and train_b.h5
Epoch 568/800
Epoch 569/800
Epoch 570/800
Epoch 571/800
Epoch 572/800
 16/100 [===>..........................] - ETA: 54s - loss: -1.1414 - UniquenessMetric: 1.0000 - RecallMetric: 0.8625 - NegRecallMetric: 0.3406switching to train_7.h5 and train_0.h5
Epoch 573/800
Epoch 574/800
Epoch 575/800
Epoch 576/800
Epoch 577/800
 13/100 [==>...........................] - ETA: 56s - loss: -1.1696 - UniquenessMetric: 1.0000 - RecallMetric: 0.8654 - NegRecallMetric: 0.3385switching to train_b.h5 and train_8.h5
Epoch 578/800
Epoch 579/800
Epoch 580/800
Epoch 581/800
Epoch 582/800
 10/100 [==>...........................] - ETA: 59s - loss: -0.9255 - UniquenessMetric: 1.0000 - RecallMetric: 0.8800 - NegRecallMetric: 0.4150switching to trai

Epoch 589/800
Epoch 590/800
Epoch 591/800
Epoch 592/800
  4/100 [>.............................] - ETA: 1:02 - loss: -1.0110 - UniquenessMetric: 1.0000 - RecallMetric: 0.8875 - NegRecallMetric: 0.4500switching to train_c.h5 and train_e.h5
Epoch 593/800
Epoch 594/800
Epoch 595/800
Epoch 596/800
Epoch 597/800
Epoch 598/800
Epoch 599/800
Epoch 600/800
Epoch 601/800
Epoch 602/800
Epoch 603/800
Epoch 604/800
Epoch 605/800
Epoch 606/800
Epoch 607/800
Epoch 608/800
Epoch 609/800
Epoch 610/800
Epoch 611/800
Epoch 612/800
Epoch 613/800
Epoch 614/800


Epoch 615/800
Epoch 616/800
Epoch 617/800
Epoch 618/800
Epoch 619/800
Epoch 620/800
Epoch 621/800
Epoch 622/800
Epoch 623/800
Epoch 624/800
Epoch 625/800
Epoch 626/800
Epoch 627/800
Epoch 628/800
Epoch 629/800
Epoch 630/800
Epoch 631/800
Epoch 632/800
Epoch 633/800
Epoch 634/800
Epoch 635/800
Epoch 636/800
Epoch 637/800
Epoch 638/800
Epoch 639/800
Epoch 640/800
Epoch 641/800


Epoch 642/800
Epoch 643/800
Epoch 644/800
Epoch 645/800
Epoch 646/800
Epoch 647/800
Epoch 648/800
Epoch 649/800
Epoch 650/800
Epoch 651/800
Epoch 652/800
Epoch 653/800
Epoch 654/800
Epoch 655/800
Epoch 656/800
Epoch 657/800
Epoch 658/800
Epoch 659/800
Epoch 660/800
Epoch 661/800
Epoch 662/800
Epoch 663/800
Epoch 664/800
Epoch 665/800
Epoch 666/800
Epoch 667/800


Epoch 668/800
Epoch 669/800
Epoch 670/800
Epoch 671/800
Epoch 672/800
Epoch 673/800
Epoch 674/800
Epoch 675/800
Epoch 676/800
Epoch 677/800
Epoch 678/800
Epoch 679/800
Epoch 680/800
Epoch 681/800
Epoch 682/800
Epoch 683/800
Epoch 684/800
Epoch 685/800
Epoch 686/800
Epoch 687/800
Epoch 688/800
Epoch 689/800
Epoch 690/800
Epoch 691/800
Epoch 692/800
Epoch 693/800
Epoch 694/800


Epoch 695/800
Epoch 696/800
 23/100 [=====>........................] - ETA: 50s - loss: -1.0702 - UniquenessMetric: 1.0000 - RecallMetric: 0.8848 - NegRecallMetric: 0.3891switching to train_1.h5 and train_5.h5
Epoch 697/800
Epoch 698/800
Epoch 699/800
Epoch 700/800
Epoch 701/800
 18/100 [====>.........................] - ETA: 53s - loss: -1.0822 - UniquenessMetric: 1.0000 - RecallMetric: 0.8556 - NegRecallMetric: 0.3750switching to train_e.h5 and train_a.h5
Epoch 702/800
Epoch 703/800
Epoch 704/800
Epoch 705/800
Epoch 706/800
 16/100 [===>..........................] - ETA: 54s - loss: -0.9772 - UniquenessMetric: 1.0000 - RecallMetric: 0.8406 - NegRecallMetric: 0.4000switching to train_8.h5 and train_d.h5
Epoch 707/800
Epoch 708/800
Epoch 709/800
Epoch 710/800
Epoch 711/800
 12/100 [==>...........................] - ETA: 56s - loss: -1.0437 - UniquenessMetric: 1.0000 - RecallMetric: 0.8792 - NegRecallMetric: 0.3958switching to train_e.h5 and train_3.h5
Epoch 712/800
Epoch 713/800
Epoch 

Epoch 722/800
Epoch 723/800
Epoch 724/800
Epoch 725/800
Epoch 726/800
Epoch 727/800
Epoch 728/800
Epoch 729/800
Epoch 730/800
Epoch 731/800
Epoch 732/800
Epoch 733/800
Epoch 734/800
Epoch 735/800
Epoch 736/800
Epoch 737/800
Epoch 738/800
Epoch 739/800
Epoch 740/800
Epoch 741/800
Epoch 742/800
Epoch 743/800
Epoch 744/800
Epoch 745/800
Epoch 746/800
Epoch 747/800


Epoch 748/800
Epoch 749/800
Epoch 750/800
Epoch 751/800
Epoch 752/800
Epoch 753/800
Epoch 754/800
Epoch 755/800
Epoch 756/800
Epoch 757/800
Epoch 758/800
Epoch 759/800
Epoch 760/800
Epoch 761/800
Epoch 762/800
Epoch 763/800
Epoch 764/800
Epoch 765/800
Epoch 766/800
Epoch 767/800
Epoch 768/800
Epoch 769/800
Epoch 770/800
Epoch 771/800
Epoch 772/800
Epoch 773/800
Epoch 774/800


Epoch 775/800
Epoch 776/800
Epoch 777/800
Epoch 778/800
Epoch 779/800
Epoch 780/800
Epoch 781/800
Epoch 782/800
Epoch 783/800
Epoch 784/800
Epoch 785/800
Epoch 786/800
Epoch 787/800
Epoch 788/800
Epoch 789/800
Epoch 790/800
Epoch 791/800
Epoch 792/800
Epoch 793/800
Epoch 794/800
Epoch 795/800
Epoch 796/800
Epoch 797/800
Epoch 798/800
Epoch 799/800
Epoch 800/800


In [13]:

encoder.save('/tf/primo/data/models/encoder_untrained_1site_model_20220702.h5')



In [14]:
# Plot training history
fig = plt.figure()
lines = [k for k in history.history.keys() if not k.startswith('val_')]
axes = fig.subplots(int(len(lines)/2), 2)
for i in range(len(lines)):
    label = lines[i]
    ax = axes.flatten()[i]
    val_label = 'val_' + label
    ax.plot(history.history[label], label=label)
    if val_label in history.history:
        ax.plot(history.history[val_label], linestyle=':', label=val_label)
        ax.grid()
    ax.legend()

<IPython.core.display.Javascript object>