In [1]:
import tensorflow as tf
# from vit_keras import vit, utils
from TFMaT import *
import tensorflow_addons as tfa
import numpy as np
from tensorflow.data import Dataset
from tensorflow.keras.utils import to_categorical

In [5]:
model = build_TFMaT(
    name='TFMat_test',
    sequence_length=1000,
    motif_size=6000,
    num_layers=12,
    num_heads=12,
    hidden_size=768,
    mlp_dim=3072,
    classes=1000,
    representation_size=768,
    motif_length_max=35,
    include_top=True,
    activation='sigmoid'
)

#
optimizer = tf.keras.optimizers.Adam()
# need compile
model.compile(
        optimizer=optimizer,
        loss=tf.keras.losses.BinaryCrossentropy(from_logits=False)
    )

In [None]:
params={}
params['sequence_length']=1000
params['motif_size']=6000
params['motif_length_max']=35
params['num_layers']=12 
params['num_heads']=12
params['name']='TFMaT_test'
params['hidden_size']=768
params['mlp_dim']=3072
params['classes']=1000
params['dropout']=0.1
params['activation']='sigmoid'
params['representation_size']=768
params['motif_embedding_trainable']=False
params["lr"]=0.001

In [None]:
# model_fn
def model_fn(features, labels, mode=tf.estimator.ModeKeys.TRAIN, params=None):
    
    # x = tf.keras.layers.Input(shape=(params['sequence_length'],4))
    motif_embedding = tf.keras.layers.Conv1D(
        filters=params['motif_size'],
        kernel_size=params['motif_length_max'],
        strides=1,
        padding="same",
        name="motif_embedding",
    )
    motif_embedding.trainable = params['motif_embedding_trainable']
    y=motif_embedding(features)
    y=tf.keras.layers.Dense(
        units=params['hidden_size'],
        name="motif_to_hidden_embedding"
    )(y)
    y = layers.ClassToken(name="class_token")(y)
    y = layers.AddPositionEmbs(name="Transformer/posembed_input")(y)
    for n in range(params['num_layers']):
        y, _ = layers.TransformerBlock(
            num_heads=params['num_heads'],
            mlp_dim=params['mlp_dim'],
            dropout=params['dropout'],
            name=f"Transformer/encoderblock_{n}",
        )(y)
    y = tf.keras.layers.LayerNormalization(
        epsilon=1e-6, name="Transformer/encoder_norm"
        )(y)
    y = tf.keras.layers.Lambda(lambda v: v[:, 0],
                               name="ExtractToken")(y)

    y = tf.keras.layers.Dense(
        params['representation_size'],
        name="pre_logits",
        activation="tanh"
    )(y)
    
    logits = tf.keras.layers.Dense(
        params['classes'],
        name="head",
        activation=params['activation'])(y) 

    learning_rate = tf.constant(params["lr"])
    loss_op = None
    train_op = None
    
    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
        loss_op = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)
        )
    train_op = tf.keras.optimizers.Adam(
        learning_rate=learning_rate
        ).minimize(loss_op, global_step=tf.train.get_global_step())
    spec = CSEstimatorSpec (mode=mode, loss=loss_op, train_op=train_op)
return spec


In [6]:
model.summary()

Model: "TFMat_test"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 1000, 4)]         0         
                                                                 
 motif_embedding (Conv1D)    (None, 1000, 6000)        846000    
                                                                 
 motif_to_hidden_embedding (  (None, 1000, 768)        4608768   
 Dense)                                                          
                                                                 
 class_token (ClassToken)    (None, 1001, 768)         768       
                                                                 
 Transformer/posembed_input   (None, 1001, 768)        768768    
 (AddPositionEmbs)                                               
                                                                 
 Transformer/encoderblock_0   ((None, 1001, 768),      7

In [7]:
dna_seq=np.random.random(size=(128,1000,4))
dna_seq_exp=np.expand_dims(dna_seq,axis=-1)
labels=np.random.randint(low=0,high=1000,size=(128,1))
labels_cat=to_categorical(labels,num_classes=1000)

In [8]:
output=model.predict(dna_seq,batch_size=32)

In [9]:
output.shape

(128, 1000)

In [10]:
model.fit(dna_seq,labels_cat,batch_size=1,epochs=1)



<keras.callbacks.History at 0x26eec21f5f8>

In [11]:
params={}
params['batch_size']=64
est = tf.keras.estimator.model_to_estimator(keras_model=model)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using the Keras model provided.




INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\YYNST\\AppData\\Local\\Temp\\tmpmlkmeajy', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [35]:
# calculate potential training data size 
dna_seq=np.random.random(size=(128,1,1000,4))

dna_seq.size * dna_seq.itemsize/1024/1024 

3.90625* 3101788170/1000 *0.1 /1024/1024

In [15]:
labels_cat=np.expand_dims(labels_cat,1)

In [16]:
labels_cat.shape

(128, 1, 1000)

In [18]:
def input_fn():
    batch_size = 64
    dna_seq=np.random.random(size=(128,1,1000,4))
    dna_seq_exp=np.expand_dims(dna_seq,axis=-1)
    labels=np.random.randint(low=0,high=1000,size=(128,1))
    labels_cat=to_categorical(labels,num_classes=1000)
    labels_cat=np.expand_dims(labels_cat,1)
    
    dataset=Dataset.from_tensor_slices((dna_seq,labels_cat))
    dataset.shuffle(128, reshuffle_each_iteration=True).batch(batch_size,drop_remainder=True)
    return dataset

In [19]:
est.train(
input_fn=input_fn,
max_steps=10,
)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Warm-starting with WarmStartSettings: WarmStartSettings(ckpt_to_initialize_from='C:\\Users\\YYNST\\AppData\\Local\\Temp\\tmpmlkmeajy\\keras\\keras_model.ckpt', vars_to_warm_start='.*', var_name_to_vocab_info={}, var_name_to_prev_var_name={})
INFO:tensorflow:Warm-starting from: C:\Users\YYNST\AppData\Local\Temp\tmpmlkmeajy\keras\keras_model.ckpt
INFO:tensorflow:Warm-starting variables only in TRAINABLE_VARIABLES.
INFO:tensorflow:Warm-started 204 variables.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\YYNST\AppData\Local\Temp\tmpmlkmeajy\model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...
INFO:tensorflow:loss = 0.009471378

<tensorflow_estimator.python.estimator.estimator.EstimatorV2 at 0x26f13217438>

In [20]:
esttrain

<tensorflow_estimator.python.estimator.estimator.EstimatorV2 at 0x26f13217438>

In [None]:
# TFtrainexample

In [None]:
# model_fn
def model_fn(features, labels, mode=tf.estimator.ModeKeys.TRAIN, params=None): 
    net = tf.keras.layers.Dense(256, activation=tf.nn.relu)(features)
    net = tf.keras.layers.Dense(128, activation=tf.nn.relu)(net)
    logits = tf.keras.layers.Dense(params["num_classes"])(net)
    learning_rate = tf.constant(params["lr"])
    loss_op = None
    train_op = None
    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
        loss_op = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=logits)
        )
    train_op = tf.train.GradientDescentOptimizer(
        learning_rate=learning_rate
        ).minimize(loss_op, global_step=tf.train.get_global_step())
    spec = CSEstimatorSpec (mode=mode, loss=loss_op, train_op=train_op)
return spec
    

In [None]:
tf.estimator.EstimatorSpec(
    mode, predictions=None, loss=None, train_op=None, eval_metric_ops=None,
    export_outputs=None, training_chief_hooks=None, training_hooks=None,
    scaffold=None, evaluation_hooks=None, prediction_hooks=None
)

In [None]:
# estimator
est = tf.estimator.Estimator(model_dir=model_dir,
                                    model_fn=model_fn,
                                    params=params)

In [None]:
# Cerebras example

In [None]:
# model_fn
def model_fn(features, labels, mode=tf.estimator.ModeKeys.TRAIN, params=None):
    net = tf.keras.layers.Dense(256, activation=tf.nn.relu)(features)
    net = tf.keras.layers.Dense(128, activation=tf.nn.relu)(net)
    logits = tf.keras.layers.Dense(params["num_classes"])(net)
    learning_rate = tf.constant(params["lr"])
    loss_op = None
    train_op = None
    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
        loss_op = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=logits)
        )
    train_op = tf.train.GradientDescentOptimizer(
        learning_rate=learning_rate
        ).minimize(loss_op, global_step=tf.train.get_global_step())
    spec = CSEstimatorSpec (mode=mode, loss=loss_op, train_op=train_op)
return spec


In [None]:
# input_fn
def train_input_fn(params):
    batch_size = params["batch_size"]
    iris_dtype = np.dtype([('img', "float32", 4),
    ('lbl', "int32", ``)])
    data = np.genfromtxt(
    "./data/iris_training.csv",
    dtype=iris_dtype,
    delimiter=","
    )
    dataset = tf.data.Dataset.from_tensor_slices(
    (data["img"][:], data["lbl"][:])
    )
    dataset = dataset.shuffle(1000).repeat().batch(batch_size,
    drop_remainder=True)
return dataset

In [None]:
# config
from common_zoo.estimator.tf.run_config import CSRunConfig

config = CSRunConfig(
    cs_ip=ip,
    save_checkpoints_steps=1000,
    log_step_count_steps=10000
)

In [None]:
# estimator
est = CerebrasEstimator(
    model_fn=model_fn,
    config=config,
    params=params,
    model_dir='./out',
)

# train
est.train(input_fn=input_fn, steps=100000, use_cs=True)