In [None]:
import tensorflow as tf
from tensorflow import keras
from keras import backend 
from keras import activations
from keras import Sequential
import pydot

#### Vector, Matrix, Tensors

A tf.Tensor has the following properties:

    a single data type (float32, int32, or string, for example)
    a shape
    
    
A number of specialized tensors are available: see 
    
    tf.Variable
    tf.constant
    tf.placeholder
    tf.sparse.SparseTensor
    tf.RaggedTensor

#### tf.Variable

    A TensorFlow variable is the recommended way to represent shared, persistent state your program manipulates.

In [None]:
print_str = "VariableType={variable_type} ; Rank={rank} ; Shape={shape}"

In [None]:
# Scalar: Rank0
value = tf.Variable(451, tf.int16)
print(print_str.format(variable_type='Scalar', rank=tf.rank(value), shape=value.shape))


# Vector: Rank1
value = tf.Variable([1, 2], tf.int16)
print(print_str.format(variable_type='Vector', rank=tf.rank(value), shape=value.shape))


# Matrix: Rank2
value = tf.Variable([[1, 2, 3], [3, 4, 5]], tf.int16)
print(print_str.format(variable_type='Matrix', rank=tf.rank(value), shape=value.shape))


#Tensor: Rank3
value = tf.Variable([[[1,2,3], [4,5,6], [7,8,9]], [[1,2,3], [4,5,6], [7,8,9]]], tf.int16)
print(print_str.format(variable_type='Tensor', rank=tf.rank(value), shape=value.shape))


value = tf.Variable([[[1,2], [4,5], [7,8]], [[1,2], [4,5], [7,8]]], tf.int16)
print(print_str.format(variable_type='Tensor', rank=tf.rank(value), shape=value.shape))

#### tf.constant

    Creates a constant tensor from a tensor-like object.

In [None]:
tf.constant([1, 2, 3, 4, 5, 6])

#### tf.placeholder [Useful in v1]

A placeholder is simply a variable that we will assign data to at a later date. It allows us to create our operations and build our computation graph, without needing the data. In TensorFlowterminology, we then feed data into the graph through these placeholders.

#### tf.sparse.SparseTensor

    TensorFlow represents a sparse tensor as three separate dense tensors: 
    indices
    values
    dense_shape
 
In Python, the three tensors are collected into a SparseTensor class for ease of use. If you have separate indices, values, and dense_shape tensors, wrap them in a SparseTensor object before passing to the ops below.
    

Concretely, the sparse tensor SparseTensor(indices, values, dense_shape) comprises the following components, where N and ndims are the number of values and number of dimensions in the SparseTensor, respectively:


**indices:** A 2-D int64 tensor of shape [N, ndims], which specifies the indices of the elements in the sparse tensor that contain nonzero values (elements are zero-indexed).

**values:** A 1-D tensor of any type and shape [N], which supplies the values for each element in indices. 

**dense_shape:** A 1-D int64 tensor of shape [ndims], which specifies the dense_shape of the sparse tensor. Takes a list indicating the number of elements in each dimension. 


Example:  Represent the following using SparseTensor.

    [[1, 0, 0, 0]
     [0, 0, 2, 0]
     [0, 0, 0, 0]]

In [None]:
sparseTensor = tf.sparse.SparseTensor(indices=[[0, 0], [1, 2]], values=[11, 12], dense_shape=[3, 4])
print(sparseTensor.shape)

#### tf.RaggedTensor

A RaggedTensor is a tensor with one or more ragged dimensions, which are dimensions whose slices may have different lengths.

Dimensions whose slices all have the same length are called uniform dimensions.

The total number of dimensions in a RaggedTensor is called its rank, and the number of ragged dimensions in a RaggedTensor is called its ragged-rank. A RaggedTensor's ragged-rank is fixed at graph creation time: it can't depend on the runtime values of Tensors, and can't vary dynamically for different session runs.

In [None]:
values = [3, 1, 4, 1, 5, 9, 2, 6]
rt1 = tf.RaggedTensor.from_row_splits(values, row_splits=[0, 4, 4, 7, 8, 8])
print(rt1)

rt2 = tf.RaggedTensor.from_row_lengths(values, row_lengths=[4, 0, 3, 1, 0])
print(rt2)

rt3 = tf.RaggedTensor.from_value_rowids(values, value_rowids=[0, 0, 0, 0, 2, 2, 2, 3], nrows=5)
print(rt3)

# which specifies the start offset of each row.
rt4 = tf.RaggedTensor.from_row_starts(values, row_starts=[0, 4, 4, 7, 8])
print(rt4)

# which specifies the stop offset of each row.
rt5 = tf.RaggedTensor.from_row_limits(values, row_limits=[4, 4, 7, 8, 8])
print(rt5)

print("Ragged Rank: ",rt5.ragged_rank)

#### TensorFlow Dataset objects

    The tf.data.Dataset API supports writing descriptive and efficient input pipelines. Dataset usage follows a common pattern:
    
    1. Create a source dataset from your input data.
    2. Apply dataset transformations to preprocess the data.
    3. Iterate over the dataset and process the elements.
    
    
There are two distinct ways to create a dataset:

    A data source constructs a Dataset from data stored in memory or in one or more files.
    A data transformation constructs a dataset from one or more tf.data.Dataset objects.
    
To construct a Dataset from data in memory:

    Use tf.data.Dataset.from_tensors() or tf.data.Dataset.from_tensor_slices()

If input data is stored in a file in the recommended TFRecord format:

    Use tf.data.TFRecordDataset(), CSV 

In [None]:
dataset = tf.data.Dataset.from_tensor_slices(tf.random.uniform([4, 10], minval=1, maxval=10, dtype=tf.int32))
print(dataset.element_spec)

for z in dataset:
    print(z.numpy())
    
# Dataset containing a sparse tensor.
sparse_dataset = tf.data.Dataset.from_tensors(tf.SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4]))

In [None]:
# Convert Numpy Arrays to Dataset:
train, test = tf.keras.datasets.fashion_mnist.load_data()
images, labels = train
print(images.shape)
print(labels.shape)

dataset = tf.data.Dataset.from_tensor_slices((images, labels))

#### Keras Architecture

*Model* => Handles top level functionality. (compile, fit, evaluate, predict, save)

*Layer* => Consists of business logic. 

Losses, Metrics

Callbacks

Optimizers

Regularizers, Constraints

#### Layers

    Define a simple Dense Layer with 2 parameters => [W, b]
    
    Also we set the default value for units/neuron per layer to 32. 
 
*build() =>  This method can be used to create weights that depend on the shape(s) of the input(s)*

*call()  =>  Performs the logic*

In [None]:
class SimpleDense(keras.layers.Layer):
    def __init__(self, units=32):
        super(SimpleDense, self).__init__()
        self.units = units

    # Create the state of the layer (weights)
    def build(self, input_shape):  
        w_init = tf.random_normal_initializer()
        self.w = tf.Variable(initial_value=w_init(shape=(input_shape[-1], self.units), dtype='float32'),
                             trainable=True)
        b_init = tf.zeros_initializer()
        self.b = tf.Variable(initial_value=b_init(shape=(self.units,), dtype='float32'),
                             trainable=True)

    # Defines the computation from inputs to outputs
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

In [None]:
# Instantiates the layer.
# Number of units/neurons in a layer set to 6
linear_layer = SimpleDense(6)

# This will also call `build(input_shape)` and create the weights.
# Creates a tensor with all elements set to one (1). 
y = linear_layer(tf.ones((2, 4)))

assert len(linear_layer.weights) == 2

# These weights are trainable, so they're listed in `trainable_weights`:
assert len(linear_layer.trainable_weights) == 2

# Get Weights. Returns the list
linear_layer.w.shape

#### Keep track of variables
Layers are recursively composable. If you assign a Layer instance as attribute of another Layer, the outer layer will start tracking the weights of the inner layer.
We recommend creating such sublayers in the __init__() method (since the sublayers will typically have a build method, they will be built when the outer layer gets built).



In [None]:
# Assume we have 2 different layers with different parameters. How do we keep track of variables. 
class MyLayer(tf.keras.layers.Layer):
    def __init__(self):
        super(MyLayer, self).__init__()
        self.my_var = tf.Variable(1.0)
        self.my_var_list = [tf.Variable(x) for x in range(10)]

class MyOtherLayer(tf.keras.layers.Layer):
    def __init__(self):
        super(MyOtherLayer, self).__init__()
        self.sublayer = MyLayer()
        self.my_other_var = tf.Variable(10.0)

m = MyOtherLayer()
print(len(m.variables))  # 12 (11 from MyLayer, plus 1 from MyOtherLayer)

##### add_loss() &  add_metric() method

Similarly to add_loss(), layers also have an add_metric() method for tracking the moving average of a quantity during training. method

In [None]:
# A layer that creates an activity regularization loss
class ActivityRegularizationLayer(keras.layers.Layer):
    def __init__(self, rate=1e-2):
        super(ActivityRegularizationLayer, self).__init__()
        self.rate = rate

    def call(self, inputs):
        self.add_loss(self.rate * tf.reduce_sum(inputs))
        return inputs
    
# Outer Layer
class OuterLayer(keras.layers.Layer):
    def __init__(self):
        super(OuterLayer, self).__init__()
        self.activity_reg = ActivityRegularizationLayer(1e-2)

    def call(self, inputs):
        return self.activity_reg(inputs)


layer = OuterLayer()
assert len(layer.losses) == 0  # No losses yet since the layer has never been called

_ = layer(tf.zeros(1, 1))
assert len(layer.losses) == 1  # We created one loss value

# `layer.losses` gets reset at the start of each __call__
_ = layer(tf.zeros(1, 1))
assert len(layer.losses) == 1  # This is the loss created during the call above

#### Gradient computation & Weights update.

In [None]:
x = tf.constant(3.0)
with tf.GradientTape() as g:
    g.watch(x)
    y = x * x
dy_dx = g.gradient(y, x) # Will compute to 6.0
print(dy_dx)

In [None]:
x = tf.constant(3.0)
with tf.GradientTape() as g:
    g.watch(x)
    with tf.GradientTape() as gg:
        gg.watch(x)
        y = x * x
        dy_dx = gg.gradient(y, x)     # Will compute to 6.0
d2y_dx2 = g.gradient(dy_dx, x)  # Will compute to 2.0
print(d2y_dx2)

Compute gradient for below equations using chain rule. 

At x = 3

y = $x^{2}$

z = y + 3

$\frac{\partial z}{\partial x}$ = $\frac{\partial z}{\partial y}$.$\frac{\partial y}{\partial x}$

$\frac{\partial z}{\partial x}$ = 1 . 2x


In [None]:
x = tf.Variable(3.0)
with tf.GradientTape() as g:
    y = x * x
    z = y + 3
    dz_dx = g.gradient(z, [y, x])
    print(dz_dx)

**Let's define neural network architecture**

    1. Input layer Shape (2, 2)
    2. One Hidden Layer(Dense) with summation and RELU activation function
    3. Output Layer(Dense) with Summation and Sigmoid acitivation function

In [None]:
class DenseLayer(tf.keras.layers.Layer):
    
    def __init__(self, units, activation=None, use_bias=True, **kwargs):
        super(DenseLayer, self).__init__()
        self.units = units
        self.activation = activations.get(activation)
        self.use_bias = use_bias
        
    def build(self, input_shape):
        assert len(input_shape) >= 2
        input_dim = input_shape[-1]
        # Params => W, b
        self.w = self.add_weight(shape=(input_shape[-1], self.units), initializer='random_normal', trainable=True)
        
        if self.use_bias:
            self.b = self.add_weight(shape=(self.units,), initializer='random_normal', trainable=True)
        else:
            self.b = None
                
        
    def call(self, inputs):
        output = backend.dot(inputs, self.w)
        
        if self.use_bias:
            output = backend.bias_add(output, self.b, data_format='channels_last')
       
        if self.activation is not None:
            output = self.activation(output)
        
        return output   
    
    def compute_output_shape(self, input_shape):
        assert input_shape and len(input_shape) >= 2
        assert input_shape[-1]
        output_shape = list(input_shape)
        output_shape[-1] = self.units
        return tuple(output_shape)

    def get_config(self):
        config = {
            'units': self.units,
            'activation': activations.serialize(self.activation),
            'use_bias': self.use_bias,
            'kernel_initializer': initializers.serialize(self.kernel_initializer),
            'bias_initializer': initializers.serialize(self.bias_initializer),
            'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
            'bias_regularizer': regularizers.serialize(self.bias_regularizer),
            'activity_regularizer':
                regularizers.serialize(self.activity_regularizer),
            'kernel_constraint': constraints.serialize(self.kernel_constraint),
            'bias_constraint': constraints.serialize(self.bias_constraint)
        }
        base_config = super(Dense, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [None]:
# Training Data
features = tf.Variable([[1, 1], [2, 1], [2, 2], [1,2]], dtype='float32')
labels = tf.Variable([0, 1, 0, 1])
print(features.shape)

In [None]:
loss_function = tf.keras.losses.BinaryCrossentropy(from_logits=False)
hidden_layer = DenseLayer(6, activation='relu')
output_layer = DenseLayer(1, activation='sigmoid')
optimizer = tf.keras.optimizers.Adam()

for i in range(1):
    for x, y in zip(features.numpy(), labels.numpy()):
        with tf.GradientTape() as hidden_tape:
            hidden_ouput = hidden_layer(x.reshape(1, 2))

            with tf.GradientTape() as tape:
                output = output_layer(hidden_ouput)
                loss_value = loss_function(y, output)

            dy_dx = tape.gradient(loss_value, output_layer.trainable_weights)  
            optimizer.apply_gradients(zip(dy_dx, output_layer.trainable_weights))
        dy1_dx = hidden_tape.gradient(dy_dx, hidden_layer.trainable_weights)   
        optimizer.apply_gradients(zip(dy1_dx, hidden_layer.trainable_weights))

#### Model

     Model => Handles top level functionality. (compile, fit, evaluate, predict, save)
     Layer => Consists of business logic
     
In general, you will use the Layer class to define inner computation blocks, and will use the Model class to define the outer model -- the object you will train.

The Model class has the same API as Layer, with the following differences:

    It exposes built-in training, evaluation, and prediction loops (model.fit(), model.evaluate(), model.predict())
    It exposes the list of its inner layers, via the model.layers property.
    It exposes saving and serialization APIs (save(), save_weights()...)

In [21]:
class MyModel(tf.keras.Model):

    def __init__(self):
        super(MyModel, self).__init__()
        self.layer1 = DenseLayer(6, activation='relu')
        self.layer2 = DenseLayer(1, activation='sigmoid')
        
    def call(self, inputs):
        x = self.layer1(inputs)
        return self.layer2(x)

myModel = MyModel()

# By calling compile, Keras creates the DAG. 
myModel.compile(optimizer=tf.keras.optimizers.Adam(), loss = tf.keras.losses.BinaryCrossentropy(from_logits=False))

# Fits the model to train on given input.
myModel.fit(features.numpy(), labels.numpy(), epochs=10)

In [None]:
keras.utils.plot_model(myModel)

#### Functional Vs Sequential

**Sequential:**

    A Sequential model is appropriate for a plain stack of layers where each layer has exactly one input tensor and one output tensor.
    
**Functional:**

    The Keras functional API is a way to create models that is more flexible than the tf.keras.Sequential API. The functional API can handle models with non-linear topology, models with shared layers, and models with multiple inputs or outputs.
    
    The main idea that a deep learning model is usually a directed acyclic graph (DAG) of layers. So the functional API is a way to build graphs of layers.
    
    Will look into this on Machine Translation, Object Recognition tasks.

#### callbacks 

    A callback is a powerful tool to customize the behavior of a Keras model during training, evaluation, or inference.

    Examples include tf.keras.callbacks.TensorBoard to visualize training progress and results with TensorBoard, or tf.keras.callbacks.ModelCheckpoint to periodically save your model during training.

In [None]:
class CustomCallback(tf.keras.callbacks.Callback):
    def on_train_begin(self, logs=None):
        logs = logs or {}
        keys = list(logs.keys())
        print("Starting training; got log keys: {}".format(keys))

    def on_train_end(self, logs=None):
        logs = logs or {}
        keys = list(logs.keys())
        print("Stop training; got log keys: {}".format(keys))

    def on_epoch_begin(self, epoch, logs=None):
        logs = logs or {}
        keys = list(logs.keys())
        print("Start epoch {} of training; got log keys: {}".format(epoch, keys))

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        keys = list(logs.keys())
        print("End epoch {} of training; got log keys: {}".format(epoch, keys))

    def on_test_begin(self, logs=None):
        logs = logs or {}
        keys = list(logs.keys())
        print("Start testing; got log keys: {}".format(keys))

    def on_test_end(self, logs=None):
        logs = logs or {}
        keys = list(logs.keys())
        print("Stop testing; got log keys: {}".format(keys))

    def on_predict_begin(self, logs=None):
        logs = logs or {}
        keys = list(logs.keys())
        print("Start predicting; got log keys: {}".format(keys))

    def on_predict_end(self, logs=None):
        logs = logs or {}
        keys = list(logs.keys())
        print("Stop predicting; got log keys: {}".format(keys))

    def on_train_batch_begin(self, batch, logs=None):
        logs = logs or {}
        keys = list(logs.keys())
        print("...Training: start of batch {}; got log keys: {}".format(batch, keys))

    def on_train_batch_end(self, batch, logs=None):
        logs = logs or {}
        keys = list(logs.keys())
        print("...Training: end of batch {}; got log keys: {}".format(batch, keys))

    def on_test_batch_begin(self, batch, logs=None):
        logs = logs or {}
        keys = list(logs.keys())
        print("...Evaluating: start of batch {}; got log keys: {}".format(batch, keys))

    def on_test_batch_end(self, batch, logs=None):
        logs = logs or {}
        keys = list(logs.keys())
        print("...Evaluating: end of batch {}; got log keys: {}".format(batch, keys))

    def on_predict_batch_begin(self, batch, logs=None):
        logs = logs or {}
        keys = list(logs.keys())
        print("...Predicting: start of batch {}; got log keys: {}".format(batch, keys))

    def on_predict_batch_end(self, batch, logs=None):
        logs = logs or {}
        keys = list(logs.keys())
        print("...Predicting: end of batch {}; got log keys: {}".format(batch, keys))

In [None]:
myModel.fit(
    features.numpy(),
    labels.numpy(),
    batch_size=128,
    epochs=1,
    verbose=0,
    callbacks=[CustomCallback()]
)

#### Types Of Applications:
    1. Feed Forward Neural Networks [Classfication/Imbalanced Classification, Regression]
    2. Word2Vec Semantic Analysis 
    3. Image Classifications [CNN]
    4. RNN(LSTM/GRU) Sentiment Analysis, Machine Translation, Image Captioning
    5. RNN (LSTM/GRU) Forecast Analysis
    6. Transformer Machine Translation
    7. Transfer Learning (BERT)
    8. Generative Models
    9. AutoEncoders/Variational AutoEncoders