In [1]:
# CONNECTING GOOGLE DRIVE FOR DATASET
from google.colab import drive
drive.mount("/content/drive/")
DATASET_PATH = "/content/drive/My Drive/Colab Notebooks/Datasets/Feed Forward Neural Network/"

Mounted at /content/drive/


In [777]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

np.random.seed(42)
NUM_FEATS = 29  # Number of features

In [778]:
def minmaxscaler(data):
  data=(data-data.min(axis=0))/(data.max(axis=0)-data.min(axis=0))
  return pd.DataFrame(data)

In [779]:
def read_data(file_name):
  data = pd.read_csv(f"{DATASET_PATH}{file_name}", header=0)
  train_Y = data['1'] # First column is the labels column
  data = data.drop(['1'], axis=1)

  train_X = minmaxscaler(data)
  return train_X, pd.DataFrame(train_Y)

In [780]:
def apply_pca(train_X):
  current_features = train_X.T
  cov_matrix = np.cov(current_features)
  values, vectors = np.linalg.eig(cov_matrix)
  explained_variances = []
  for i in range(len(values)):
      explained_variances.append(values[i] / np.sum(values))
  
  current_features = train_X.T
  cov_matrix = np.cov(current_features)
  values, vectors = np.linalg.eig(cov_matrix)
  explained_variances = []
  for i in range(len(values)):
      explained_variances.append(values[i] / np.sum(values))

  baseline_coverage = 0.92
  current_coverage = 0
  reduced_feature_count = 0
  for ev in range(len(explained_variances)):
    current_coverage += explained_variances[ev]
    if current_coverage > baseline_coverage:
      reduced_feature_count = ev
      break
  
  print(reduced_feature_count)
  NUM_FEATS = reduced_feature_count
  
  return vectors

In [828]:
class Net(object):
  def __init__(self, num_layers, num_units):
    self.num_layers = num_layers
    self.num_units = num_units

    self.biases = []
    self.weights = []

    for i in range(num_layers):
      if i == 0:  # Input Layer
        self.weights.append(np.random.uniform(-1, 1, size=(NUM_FEATS, num_units)))
      else: # Hidden Layer
        self.weights.append(np.random.uniform(-1, 1, size=(num_units, num_units)))
      self.biases.append(np.random.uniform(-1, 1, size=(num_units, 1)))

    # Output Layer
    self.weights.append(np.random.uniform(-1, 1, size=(num_units, 1)))
    self.biases.append(np.random.uniform(-1, 1, size=(1, 1)))

  def __call__(self, train_X):
    self.aggregates = list()
    self.activations = list()
    
    layer_input = train_X
    for layer in range(self.num_layers):
      aggregate = np.dot(layer_input, self.weights[layer]) + self.biases[layer].T
      activation = self.relu(aggregate)
      self.aggregates.append(aggregate)
      self.activations.append(activation)
      layer_input = activation
    # Output layer
    aggregate = np.dot(layer_input, self.weights[layer+1]) + self.biases[layer+1].T
    self.aggregates.append(aggregate)
    self.activations.append(aggregate)
    
    return aggregate

  def relu(self, input_matrix):
    return np.maximum(input_matrix, 0)

  def relu_grad(self, input_matrix):
    return input_matrix > 0

  def backward(self, x, y, y_hat, lamda):
    weight_gradients = [None]*(self.num_layers + 1)
    node_gradients = [None]*(self.num_layers + 1)

    regularized_component = 0
    weight_sum = 0
    for w in self.weights:
      regularized_component += np.sqrt(np.sum(np.square(w)))
      weight_sum += np.sum(np.abs(w))

    # Calculating node gradients
    for layer in range(self.num_layers, -1, -1):
      if layer == self.num_layers:
        node_gradients[layer] = 2*np.subtract(y_hat, y) 
      else:
        temp = np.dot(node_gradients[layer+1], self.weights[layer+1].T)
        node_gradients[layer] = np.multiply(temp, self.relu_grad(self.aggregates[layer])) 
      
    for layer in range(self.num_layers, 0, -1):
      weight_gradients[layer] = np.einsum('ij,ik->ijk', self.activations[layer-1], node_gradients[layer]) + 2 * lamda * self.weights[layer]

    layer = layer - 1
    weight_gradients[layer] = np.einsum('ij,ik->ijk', x, node_gradients[layer]) + 2 * lamda * self.weights[layer]
    
    for layer in range(self.num_layers, -1, -1):
      weight_gradients[layer] = np.mean(weight_gradients[layer], axis = 0)

    for layer in range(self.num_layers, -1, -1):
      node_gradients[layer] = np.sum(node_gradients[layer], axis = 0)
      if type(node_gradients[layer]) != np.ndarray:
        node_gradients[layer] = node_gradients[layer].to_numpy()

    for l in range(len(node_gradients)):
      node_gradients[l] = node_gradients[l].reshape(-1,1)
    return weight_gradients, node_gradients

In [829]:
class Optimizer(object):
  def __init__(self, learning_rate):
    self.learning_rate = learning_rate

  def step(self, weights, delta_weights, biases, delta_biases):
    for layer in range(len(weights)):
      delta_wt = self.learning_rate*delta_weights[layer]
      weights[layer] = np.subtract(weights[layer], delta_wt)

    for layer in range(len(biases)):
      delta_bias = self.learning_rate*delta_biases[layer]
      biases[layer] = np.subtract(biases[layer], delta_bias)
    return weights, biases

In [830]:
class AdamOptimizer(object):

    def __init__(self, learning_rate):
        self.m = []
        self.v = []
        self.t = 1
        self.learning_rate = learning_rate
        
    def step(self, weights, delta_weights , biases , delta_biases, beta1 = 0.9, beta2 = 0.999):
        """ Adam optimizer, bias correction is implemented. """
        if(self.t==1):
          for layer in range(len(weights)):
            self.m.append(np.random.uniform(0.0, 0.0, size=delta_weights[layer].shape))
            self.v.append(np.random.uniform(0.0, 0.0, size=delta_weights[layer].shape))
        # updated_params = []
        
        for  layer in range(len(weights)):
          
          self.m[layer] = beta1 * self.m[layer] + (1-beta1) * delta_weights[layer]       
          self.v[layer] = beta2 * self.v[layer] + (1-beta2) * delta_weights[layer] **2
          m_corrected = self.m[layer] / (1-beta1**self.t)
          v_corrected = self.v[layer] / (1-beta2**self.t)
          weights[layer] += -self.learning_rate * m_corrected / (np.sqrt(v_corrected) + 1e-8)
        
        self.t +=1
        
        for layer in range(len(weights)):
          delta_bias = self.learning_rate*delta_biases[layer]
     
          biases[layer] = np.subtract(biases[layer], delta_bias)
        return weights,biases

In [831]:
def loss_mse(y, y_hat):
  return (((np.subtract(y, y_hat)**2).sum())/y.shape[0])**(0.5)

In [832]:
raw_test_X = np.array(pd.read_csv(DATASET_PATH + "test.csv"))
raw_test_X = minmaxscaler(raw_test_X)

raw_train_X, train_Y = read_data('train.csv')
raw_dev_X, dev_Y = read_data("dev.csv")

columns = [x for x in range(1, 91)]
columns = columns
raw_test_X.columns.values[:] = columns
raw_train_X.columns.values[:] = columns
raw_dev_X.columns.values[:] = columns

db = pd.concat([raw_train_X, raw_dev_X, raw_test_X], axis=0)

In [833]:
db.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,81,82,83,84,85,86,87,88,89,90
0,0.625066,0.518548,0.59179,0.512467,0.315919,0.606895,0.524523,0.414843,0.528907,0.544377,...,0.262914,0.350085,0.334135,0.431003,0.387217,0.669793,0.507545,0.49978,0.765121,0.284739
1,0.391954,0.377018,0.418961,0.345425,0.429244,0.359135,0.533448,0.450449,0.351191,0.505538,...,0.355628,0.370124,0.304988,0.489212,0.427288,0.707801,0.559043,0.473614,0.754003,0.284978
2,0.552253,0.633584,0.437503,0.525843,0.452884,0.443956,0.594289,0.525178,0.388741,0.630555,...,0.246077,0.463615,0.325095,0.50398,0.552231,0.672552,0.582417,0.521172,0.724217,0.279617
3,0.761643,0.565835,0.554013,0.451897,0.417237,0.301431,0.595046,0.436198,0.395769,0.500972,...,0.243847,0.406358,0.29033,0.483724,0.464408,0.698157,0.585269,0.487985,0.740129,0.267115
4,0.606655,0.425655,0.583708,0.442717,0.478696,0.407195,0.52912,0.434133,0.358751,0.417719,...,0.302174,0.368794,0.299064,0.498369,0.49377,0.687865,0.580101,0.458438,0.734952,0.262689


In [834]:
vectors = apply_pca(db)

29


In [835]:
train_X = pd.DataFrame()
for i in range(NUM_FEATS):
    train_X[f'Feature{i}'] = raw_train_X.dot(vectors.T[i])

dev_X = pd.DataFrame()
for i in range(NUM_FEATS):
    dev_X[f'Feature{i}'] = raw_dev_X.dot(vectors.T[i])

test_X = pd.DataFrame()
for i in range(NUM_FEATS):
    test_X[f'Feature{i}'] = raw_test_X.dot(vectors.T[i])

In [848]:
FFNN = Net(1, 256) # 2 hidden layers, 64 nodes each
learning_rate = 10**-3
lamda = 0.0005

optimizer = AdamOptimizer(learning_rate)
no_of_epochs = 500

best_weights = []
best_biases = []
best_error = 10**9
batch_size = 16
flag = 0

no_of_samples = train_X.shape[0]

dev_loss = []
train_loss = []

for i in range(no_of_epochs):
  no_of_batches  = no_of_samples // batch_size
  batch_weight_gradients = []
  batch_bias_gradients = []
  for bch in range(no_of_batches):
    batch_X = train_X.iloc[bch*batch_size:(bch+1)*batch_size].copy()
    batch_Y = train_Y.iloc[bch*batch_size:(bch+1)*batch_size].copy()

    pred_Y = pd.DataFrame(FFNN(batch_X))
    del_w, del_b = FFNN.backward(batch_X, batch_Y, pred_Y, lamda)
    if batch_weight_gradients == []:
      batch_weight_gradients = del_w
      batch_bias_gradients = del_b
    else:
      for layer in range(len(batch_weight_gradients)):
        batch_weight_gradients[layer] = np.add(batch_weight_gradients[layer], del_w[layer])
      for layer in range(len(batch_bias_gradients)):
        batch_bias_gradients[layer] = np.add(batch_bias_gradients[layer], del_b[layer])

  for layer in range(len(batch_weight_gradients)):
    batch_weight_gradients[layer] /= no_of_batches
  for layer in range(len(batch_bias_gradients)):
    batch_bias_gradients[layer] /= no_of_batches

  pred_Y = pd.DataFrame(FFNN(train_X))
  mse_error = loss_mse(train_Y, pred_Y).iloc[0]
  train_loss.append(mse_error)

  predictions = FFNN(dev_X)
  dev_mse = loss_mse(dev_Y,predictions).iloc[0]
  dev_loss.append(dev_mse)

  new_w, new_b = optimizer.step(FFNN.weights, batch_weight_gradients, FFNN.biases, batch_bias_gradients)
  print(f"EPOCH {i} : RMSE ERROR : {mse_error}")

  


EPOCH 0 : RMSE ERROR : 2000.6039029367316
EPOCH 1 : RMSE ERROR : 631.9638242030176
EPOCH 2 : RMSE ERROR : 86.88972658803443
EPOCH 3 : RMSE ERROR : 13.382441525843968
EPOCH 4 : RMSE ERROR : 11.373063954304289
EPOCH 5 : RMSE ERROR : 11.43941427942765
EPOCH 6 : RMSE ERROR : 11.40340313414922
EPOCH 7 : RMSE ERROR : 11.371704288274579
EPOCH 8 : RMSE ERROR : 11.34887887381432
EPOCH 9 : RMSE ERROR : 11.332268753754628
EPOCH 10 : RMSE ERROR : 11.319887107381408
EPOCH 11 : RMSE ERROR : 11.310466195488555
EPOCH 12 : RMSE ERROR : 11.303176760469741
EPOCH 13 : RMSE ERROR : 11.297426873148536
EPOCH 14 : RMSE ERROR : 11.292805893276082
EPOCH 15 : RMSE ERROR : 11.289015028786787
EPOCH 16 : RMSE ERROR : 11.28583801358962
EPOCH 17 : RMSE ERROR : 11.28309767078712
EPOCH 18 : RMSE ERROR : 11.280660945843414
EPOCH 19 : RMSE ERROR : 11.27840858120855
EPOCH 20 : RMSE ERROR : 11.276246745617398
EPOCH 21 : RMSE ERROR : 11.27412294926763
EPOCH 22 : RMSE ERROR : 11.272022885944862
EPOCH 23 : RMSE ERROR : 11.269

In [849]:
predictions = FFNN(dev_X)
dev_mse = loss_mse(dev_Y,predictions).iloc[0]
print(f"TRAIN SET : {mse_error} | DEV SET : {dev_mse}")
print(f"TRAIN SET * DEV SET : {mse_error*dev_mse}")

TRAIN SET : 10.607373005608794 | DEV SET : 10.875862816613882
TRAIN SET * DEV SET : 10875862816.613882


In [853]:
tst = FFNN(test_X)
df = pd.DataFrame(tst)
df.index = df.index+1
df.to_csv('pred.csv', header=['Predictions'], index=True, index_label='Id')
print(tst)

[[2000.10721681]
 [2000.96907962]
 [2001.89851139]
 ...
 [1999.47686756]
 [2000.52366503]
 [1998.0668595 ]]


In [854]:
tst.min()

1987.7346258288103

In [855]:
tst.max()

2011.5280119934973