In [1]:
import tensorflow as tf

# Operaciones con tensores

In [2]:
a = tf.convert_to_tensor([1, 2, 3])
a

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([1, 2, 3], dtype=int32)>

In [3]:
a.shape

TensorShape([3])

In [4]:
2 * a

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([2, 4, 6], dtype=int32)>

In [5]:
b= tf.convert_to_tensor([5, 6, 7])
a + b

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([ 6,  8, 10], dtype=int32)>

# Layers de keras

## Dense

In [6]:
layer_fully_connected = tf.keras.layers.Dense(units=20, input_shape=(3,),  activation="tanh", name="fully_connected_network")

In [8]:
input_data = tf.convert_to_tensor([[1, 2, 3], [5, 6, 7]])

layer_fully_connected(input_data)

<tf.Tensor: shape=(2, 20), dtype=float32, numpy=
array([[ 0.98796105,  0.68167347, -0.8317487 ,  0.6822494 , -0.94891936,
         0.60169   , -0.87907505,  0.02645102, -0.8311211 , -0.9437486 ,
         0.85994685,  0.6153214 , -0.32393873, -0.7024941 , -0.6812314 ,
         0.95068973, -0.00235253, -0.94090223,  0.8106788 , -0.87052256],
       [ 0.9999997 ,  0.99511987, -0.9193917 ,  0.96714664, -0.99927205,
         0.99629724, -0.9976895 , -0.50894654, -0.9532794 , -0.9996165 ,
         0.9989381 ,  0.96232474,  0.19237956, -0.9941049 , -0.99594206,
         0.999439  ,  0.31012866, -0.9996526 ,  0.9464856 , -0.9986753 ]],
      dtype=float32)>

In [10]:
layer_fully_connected.weights

[<tf.Variable 'fully_connected_network/kernel:0' shape=(3, 20) dtype=float32, numpy=
 array([[ 0.48736626,  0.16643673,  0.4884714 , -0.04306218,  0.31304026,
          0.4904948 ,  0.0348767 , -0.03808188,  0.4693576 ,  0.18796355,
          0.23890507,  0.1898207 ,  0.24695343, -0.26914525, -0.3593489 ,
         -0.31046838,  0.35537618,  0.08025074, -0.2921353 , -0.28827327],
        [ 0.4090948 ,  0.46570247, -0.07663238,  0.1624394 , -0.4088944 ,
          0.16011816, -0.20517328, -0.39111662, -0.2530573 , -0.4858535 ,
          0.08680445, -0.15283197,  0.24027085, -0.11928651, -0.15066245,
          0.46990436, -0.46611965, -0.35208535, -0.04380378,  0.16856205],
        [ 0.4159351 , -0.08853608, -0.50966346,  0.18383116, -0.43868804,
         -0.03831303, -0.33207083,  0.28225744, -0.38499784, -0.32917604,
          0.29354215,  0.27776122, -0.35451218, -0.12149644, -0.05691144,
          0.40318775,  0.19150352, -0.3739791 ,  0.5029163 , -0.46136138]],
       dtype=float32)>,

## Embedding

In [11]:
embedding = tf.keras.layers.Embedding(input_dim=10, output_dim=4, name="embedding")

In [12]:
input_data = tf.convert_to_tensor([1, 3, 1])
embedding(input_data)

<tf.Tensor: shape=(3, 4), dtype=float32, numpy=
array([[-0.04214694, -0.03316452,  0.04542165,  0.04758637],
       [-0.03672953,  0.00341352, -0.00357755,  0.0393678 ],
       [-0.04214694, -0.03316452,  0.04542165,  0.04758637]],
      dtype=float32)>

## Merge layers

In [13]:
a = tf.convert_to_tensor([[1, 2, 3]])
b = tf.convert_to_tensor([[5, 6, 7]])

In [14]:
tf.keras.layers.Concatenate(name="concat")([a, b])

<tf.Tensor: shape=(1, 6), dtype=int32, numpy=array([[1, 2, 3, 5, 6, 7]], dtype=int32)>

In [15]:
tf.keras.layers.Add(name="add")([a, b])

<tf.Tensor: shape=(1, 3), dtype=int32, numpy=array([[ 6,  8, 10]], dtype=int32)>

In [16]:
tf.keras.layers.Subtract(name="subtract")([a, b])

<tf.Tensor: shape=(1, 3), dtype=int32, numpy=array([[-4, -4, -4]], dtype=int32)>

In [17]:
tf.keras.layers.Maximum(name="max")([a, b])

<tf.Tensor: shape=(1, 3), dtype=int32, numpy=array([[5, 6, 7]], dtype=int32)>

# Modelo

## Secuencial

In [18]:
model = tf.keras.models.Sequential(name="sequential_model")
model.add(tf.keras.layers.Dense(10, input_shape=(50, ), activation="relu", name="hidden_layer_1"))
model.add(tf.keras.layers.Dense(5, activation="relu", name="hidden_layer_2"))
model.add(tf.keras.layers.Dense(1, activation="sigmoid", name="final_layer"))

# ? - The choice of the activation function is pretty specific. ReLU helps us achieve non-linearity, is computationally efficient,
# ? - and avoids the vanishing gradient problem. 
# * - The sigmoid outputs a value between 0 and 1, not 0 and x. Which makes it very suitable for binary classification. 
# * - It also has very smooth gradients which is a yey for fine-tuning. 

In [19]:
model.summary()

Model: "sequential_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 hidden_layer_1 (Dense)      (None, 10)                510       
                                                                 
 hidden_layer_2 (Dense)      (None, 5)                 55        
                                                                 
 final_layer (Dense)         (None, 1)                 6         
                                                                 
Total params: 571 (2.23 KB)
Trainable params: 571 (2.23 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


## Functional

- The Sequential model in Keras is a linear stack of layers. It's straightforward and well-suited for most simple neural network architectures, where each layer has exactly one input and output tensor.
- Analogy: 
  Think of a straight water pipe, where water flows from one end to the other through various sections. Performing specific transformations on the flow. 

- The **Functional** model, is a complex network of pipes. Water can enter through multiple inlets, pass through a network of interconnected pipes, and exit through multiple outlets. 

In [23]:
 # ? - Define the input layer of the model. The shape=(50, ) parameter indicates that each input data instance is a 1-dim array of 50 features. 
input_layer = tf.keras.layers.Input(shape=(50, ), name="input")
input_layer

<KerasTensor: shape=(None, 50) dtype=float32 (created by layer 'input')>

In [28]:
 # ? - Heere we add dense layers to the model with 10 neurons. Outputs and inputs are re-fed into it. 
# ! - As no activation function has been defined in each layer, Keras defaults to a linear activation function f(x) = x.
x_1 = tf.keras.layers.Dense(10, name="hidden_layer_1")(input_layer) # * Hidden Layer 1 
x_2 = tf.keras.layers.Dense(10, name="hidden_layer_2")(x_1)         # * Hidden Layer 2 
x_3 = tf.keras.layers.Dense(10, name="final_layer")(x_2)            # * Output Layer, however no density function has been indicated
model = tf.keras.models.Model(input_layer, x_3, name="functional_model")

In [29]:
model.summary()

Model: "functional_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (InputLayer)          [(None, 50)]              0         
                                                                 
 hidden_layer_1 (Dense)      (None, 10)                510       
                                                                 
 hidden_layer_2 (Dense)      (None, 10)                110       
                                                                 
 final_layer (Dense)         (None, 10)                110       
                                                                 
Total params: 730 (2.85 KB)
Trainable params: 730 (2.85 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


We are now going to see a use case for Functional Neural networks: 
* Useful for multiple inputs and outputs. Different data sources. 
* Complex architectures for non-linear topology. Shared layers (?), residual connections (?), self-connecting layers (?)
* Multi-task learning (?), where the model learns from related tasks simultaneously to improve performance. 

In [30]:
# ? - Having a functional NN, allows us to input different types of data. 
input_numerical = tf.keras.layers.Input(shape=(1, ), name="numerical_input")
input_categorical = tf.keras.layers.Input(shape=(1, ), name="categorical_input")

# ? - For the numeric variables, we create a 10-neuron dense layer with the tanh activation function 
x_numeric = tf.keras.layers.Dense(10, activation="tanh", name="encoding_numerical")(input_numerical)

# ? - For the categorical we apply an embedding layer, and a reshape layer, explanations in the following block
x_categorical = tf.keras.layers.Embedding(input_dim=5, output_dim=3, name="embedding_categorical")(input_categorical)
x_categorical = tf.keras.layers.Reshape(target_shape=(3, ), name="flat_vector")(x_categorical)

# ? - We then merge thse layers, and apply 2 final dense layers for a single output. 
x = tf.keras.layers.Concatenate()([x_numeric, x_categorical])
x = tf.keras.layers.Dense(10, activation="tanh")(x)
x = tf.keras.layers.Dense(1)(x)

model = tf.keras.models.Model([input_numerical, input_categorical], x, name="model_with_two_inputs")

![Alt text](image-1.png) 

### Numeric variables: 
+ Dense layer, 10 neurons, tanh activation function. 
  
### Categoric variables: 
+ **Embedding Layer:** A type of layer for handling categorical data, especially with large vocab. It maps each category or word to a high-dim vector of fixed size. This makes the input more informative and suitable for the process. It's also more efficient in terms of parameters than *one-hot encoding.*
+ **Reshape Layer:** A reshape layer changes the shape of the input tensor to a specified shape without altering its data. Used when we need to alter the dimenstions to fit the expected input shape of subsequent layers. Useful in CNN's. Imagine we have a tenser of shape (32,32,3) - (width, heigh, color channel) and we want to feed it into a dense layer, which expects a 1D vector. A reshape layer can be used to flatten this tensor into shape (3072, ), making it compatible with the dense layer. 

In [34]:
model.summary(line_length=150) # * Line length serves to expand the lines when printed. 

Model: "model_with_two_inputs"
______________________________________________________________________________________________________________________________________________________
 Layer (type)                                Output Shape                                 Param #        Connected to                                 
 categorical_input (InputLayer)              [(None, 1)]                                  0              []                                           
                                                                                                                                                      
 numerical_input (InputLayer)                [(None, 1)]                                  0              []                                           
                                                                                                                                                      
 embedding_categorical (Embedding)           (None, 1, 3)      

# Compile

In [35]:
model.compile("adam", "binary_crossentropy", metrics=["accuracy", tf.keras.metrics.AUC(name="area_under_curve")])

# Fit

In [None]:
 # * 1. Probable use case for a sequential API model. 
model.fit(X_train, y_train)
# * 2. Use case for a functional API model with 2 types of input and a single output
model.fit((X_train_numerical, X_train_categorical), y_train)
# * 3. This is the same as before, however we're now defining the input as a dictionary to specify it more in detail. 
model.fit({"numerical_input": X_train_numerical, "categorical_input": X_train_numerical}, y_train)
# * 4. Introduction to the concept of a validation split => Proxy to the test dataset. 
model.fit(X_train, y_train, validation_split=0.2)
model.fit(X_train, y_train, validation_data=(X_val, y_val))

### Validation Split in Neural Network Training:
+ The validation set is used as a proxy for the test set to tune the hyperparameters of the model (like learning rate, number of layers, neurons per layer, etc.) 
+ To make decisions about the architecture of the model itself without using the test set. This helps in avoiding information leak from the test set during the model development phase.
+ The validation set allows for the ongoing monitoring of the model's generalization ability during training. 

**Test set is used strictly for the final evaluation of the model - After all tuning, training and validation has been completed.**
_It's like trying your food before you serve it, rather than just eating all of it._