# P2 - permutation-equiriant functions

In [6]:
# data preprocessing modules
from sklearn import metrics

# ML modules
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, Input, Activation, MaxPooling1D, Conv1D, Flatten
from keras.optimizers import Adam
from keras.utils import plot_model

# general stuff
import pandas as pd
import io
import os
import requests
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

### P2.1

In [2]:
X_train = pd.read_csv('./data/xtrain-2.csv').drop(['Unnamed: 0'], axis=1) # Removes one column to clean data
y_train = pd.read_csv('./data/ytrain-2.csv').drop(['Unnamed: 0'], axis=1)
X_test = pd.read_csv('./data/xtest-2.csv').drop(['Unnamed: 0'], axis=1)
y_test = pd.read_csv('./data/ytest-2.csv').drop(['Unnamed: 0'], axis=1)

print(f'X_train {X_train.shape}')

# Reshape X_train to be right dimentions for conv1D layer (below)
X_train_3d = X_train.values.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_3d = X_test.values.reshape(X_test.shape[0], X_train.shape[1], 1)
print(f'X_train_3d {X_train_3d.shape}')

X_train (200000, 15)
X_train_3d (200000, 15, 1)


In [16]:
# Define input features
n_features = X_train_3d.shape[1:]
n_features

(15, 1)

In [17]:
# Construct permutation equivariant neural network
def create_p_e_model(L, w):
  inputs = Input(shape=(X_train_3d.shape[1], X_train_3d.shape[2]))
  print(inputs.shape)
  x = inputs
  for i in range(L-1):
    x = Conv1D(5, w, padding='same', activation='relu', name='conv_layer_{}'.format(i))(x)
    x = Activation('relu')(x)
  
  x = MaxPooling1D(pool_size=x.shape[1])(x)
  flatten = Flatten()(x)
  outputs = Dense(5)(flatten)

  p_e_model = keras.Model(inputs = inputs, outputs=outputs)
  
  optimizer = Adam(learning_rate=1e-4, epsilon=1e-3)
  p_e_model.compile(optimizer=optimizer, loss='mse', metrics=['accuracy'])
  
  return p_e_model

In [18]:
p_e_model = create_p_e_model(L=10,w=5)
p_e_model.summary()

(None, 15, 1)
Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 15, 1)]           0         
                                                                 
 conv_layer_0 (Conv1D)       (None, 15, 5)             30        
                                                                 
 activation_27 (Activation)  (None, 15, 5)             0         
                                                                 
 conv_layer_1 (Conv1D)       (None, 15, 5)             130       
                                                                 
 activation_28 (Activation)  (None, 15, 5)             0         
                                                                 
 conv_layer_2 (Conv1D)       (None, 15, 5)             130       
                                                                 
 activation_29 (Activation)  (None, 15, 5)   

In [19]:
# code for generating attached image
# plot_model(p_e_model, 'p_e_model_model.png', show_shapes=True)

Explanation of implementation:
- This model uses the Functional API from Keras. I chose that, not the sequential as it provides more flexibility.
- the function accepts two parameters (L, w), which gets pased down. L being number of layers and w being inserted to the first conv1D layer as the `kernel_size`. By having the `filter`to 5 (m) it creates `mxw` dimention.
- For the L-1 layers there is a consists of an equivariant affine transformation followed by ReLU activation layer. 
- Conv1D makes sure the network is eqivariant as it independantly applyes filters to each feature channel. In that way the network learns features that are equivarant to transformations within each channel.
- Then there is a maxPooling layer that that takes the maximum value over each feature channel of the output of the last convolutional layer. This ensures to create a translation-equivariant neural network that is invariant to the ordering of the input sequence.
- It is then flattened and passed to the output layer with 5 (m) dimentions.

### P2.2

In [20]:
L = [2,2,2,2,3,3,3,3]
w = [5,10,100,200,5,10,100,200]

zipped = zip(L,w) # zips and creates tuples of each L and w value

hist_acc_p_e = []
hist_val_acc_p_e = []
hist_loss_p_e = []
hist_val_loss_p_e = []

val_split = 0.1
n_epochs = 100
batch_size = 300

for L_val,w_val in zip(L,w):
  
  print("Values: (%s, %s)" %(int(L_val), int(w_val)))
  print("-----------------------------\n")

  model = create_p_e_model(L_val,w_val)

  history_p_e = model.fit(X_train_3d,                          
                  y_train,                          
                  epochs=n_epochs,                  
                  batch_size=batch_size,  
                  validation_split=val_split
                  )
  pred = model.predict(X_test_3d)
  score = np.sqrt(metrics.mean_squared_error(pred, y_test))
  print(f"--- Final score (RMSE) : {score}")
  
  # Store the performance
  hist_acc_p_e.append(history_p_e.history['accuracy'])
  # hist_val_acc_p_e.append(history_p_e.history['val_acc'])
  hist_loss_p_e.append(history_p_e.history['loss'])
  hist_val_loss_p_e.append(history_p_e.history['val_loss'])

  print("-----------------------------\n")

Values: (2, 5)
-----------------------------

(None, 15, 1)
Epoch 1/100


2023-02-24 15:57:11.891043: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-02-24 15:57:12.365149: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-02-24 15:57:17.885014: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

2023-02-24 16:04:17.359792: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


--- Final score (RMSE) : 0.40004468907941915
-----------------------------

Values: (2, 10)
-----------------------------

(None, 15, 1)
Epoch 1/100
  9/600 [..............................] - ETA: 4s - loss: 1.4207 - accuracy: 0.2067  

2023-02-24 16:04:31.107941: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-02-24 16:04:35.345348: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

2023-02-24 16:11:37.699554: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


--- Final score (RMSE) : 0.4000198583672204
-----------------------------

Values: (2, 100)
-----------------------------

(None, 15, 1)
Epoch 1/100
  8/600 [..............................] - ETA: 4s - loss: 0.2521 - accuracy: 0.1917  

2023-02-24 16:11:48.549907: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-02-24 16:11:52.945349: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

2023-02-24 16:19:19.663998: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


--- Final score (RMSE) : 0.4000501363437163
-----------------------------

Values: (2, 200)
-----------------------------

(None, 15, 1)
Epoch 1/100
  7/600 [..............................] - ETA: 5s - loss: 0.2670 - accuracy: 0.1986  

2023-02-24 16:19:30.833872: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-02-24 16:19:35.516292: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

2023-02-24 16:27:18.949855: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


--- Final score (RMSE) : 0.4000289125262223
-----------------------------

Values: (3, 5)
-----------------------------

(None, 15, 1)
Epoch 1/100


2023-02-24 16:27:30.451476: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-02-24 16:27:35.108124: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

2023-02-24 16:35:00.300561: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


--- Final score (RMSE) : 0.3999953687227674
-----------------------------

Values: (3, 10)
-----------------------------

(None, 15, 1)
Epoch 1/100
  1/600 [..............................] - ETA: 3:10 - loss: 0.3216 - accuracy: 0.2467

2023-02-24 16:35:12.737978: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-02-24 16:35:17.264089: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

In [None]:
colors =['b','g','r','c','m','y','k'] # lists of color to each tuple

for i, (L_val, w_val) in enumerate(zip(L, w)):
    label = f"({L_val}, {w_val})"
    color = colors[i % len(colors)] # select a color based on the index
    plt.plot(hist_loss_p_e[i], '-|', label=label, color=color)
    plt.plot(hist_val_loss_p_e[i], '-x', label=label, color=color)
    
plt.title('Model Loss on Training data')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(loc='upper right')
plt.figure(figsize=(15,15))
plt.show()

Comments:
- It is hard to accuratly distinguish betweeen the train/val for each one, but the figure is good in order to see the comparison between the sets. However, the best performing (3,200) is the darkblue and we can see that the validation (x) started higher than the loss (|) and then they converge. This indicates that the more complex set of L and w gives the best performance with the current architecture. However, on 100 epochs. All converged.