# ASSIGNMENT-02

# 1) Implementing neural networks from scratch



We will use sigmoid as the activation function in this example. We first define the sigmoid function and its derivative function:


In [None]:
import numpy as np
def sigmoid(z):
  return 1.0/ (1 + np.exp(-z))
def sigmoid_derivative(z):
  return sigmoid(z) * (1.0 - sigmoid(z))

We then define the training function, which takes in the training dataset, the number of units in the hidden layer (we will only use one hidden layer as an example), and the number of iterations:

In [None]:
def train(X, y, n_hidden, learning_rate, n_iter):
  m, n_input = X.shape
  W1 = np.random.randn(n_input, n_hidden)
  b1 = np.zeros((1, n_hidden))
  W2 = np.random.randn (n_hidden, 1)
  b2 = np.zeros((1, 1))
  for i in range(1, n_iter+1):
    Z2 = np.matmul(X, W1) + b1
    A2 = sigmoid (Z2)
    Z3 = np.matmul (A2, W2) + b2
    A3 = Z3
    dZ3 = A3 - y
    dW2 = np.matmul(A2.T, dZ3)
    db2 = np. sum(dZ3, axis=0, keepdims=True)
    dZ2 = np.matmul(dZ3, W2.T) * sigmoid_derivative(Z2)
    dW1 = np.matmul(X.T, dZ2)
    db1 = np.sum(dZ2, axis=0)
    W2 = W2 - learning_rate * dW2 / m
    b2 = b2 - learning_rate * db2 / m
    W1 = W1 - learning_rate * dW1 / m
    b1 = b1 - learning_rate * db1 / m
    if i % 100 == 0:
      cost = np.mean((y - A3) ** 2)
      print('Iteration %i, training loss: %f' % (i, cost))
  model = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
  return model

To test the model, we will use Boston house prices as the toy dataset (link). As a reminder, data normalization is usually recommended whenever gradient descent is used. Hence, we will standardize the input data by removing the mean and scaling to unit variance:

In [None]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]
num_test = 10
scaler = preprocessing.StandardScaler()
X_train = data[:-num_test, :]
X_train = scaler.fit_transform(X_train)
y_train = target[:-num_test].reshape(-1, 1)
X_test = data[-num_test:, :]
X_test = scaler.transform(X_test)
y_test = target[-num_test:]

With the scaled dataset, we can now train a one-layer neural network with 20 hidden units, a 0.1 learning rate, and 2000 iterations:

In [None]:
n_hidden = 20
learning_rate = 0.1
n_iter = 2000
model = train(X_train, y_train, n_hidden, learning_rate, n_iter)

Iteration 100, training loss: 13.145712
Iteration 200, training loss: 9.196022
Iteration 300, training loss: 7.674628
Iteration 400, training loss: 6.625280
Iteration 500, training loss: 5.854716
Iteration 600, training loss: 5.259060
Iteration 700, training loss: 4.795017
Iteration 800, training loss: 4.393364
Iteration 900, training loss: 4.071269
Iteration 1000, training loss: 3.816083
Iteration 1100, training loss: 3.613006
Iteration 1200, training loss: 3.446368
Iteration 1300, training loss: 3.303903
Iteration 1400, training loss: 3.178591
Iteration 1500, training loss: 3.066113
Iteration 1600, training loss: 2.963451
Iteration 1700, training loss: 2.868342
Iteration 1800, training loss: 2.779626
Iteration 1900, training loss: 2.696849
Iteration 2000, training loss: 2.619673


Then, we define a prediction function, which will take in a model and produce the regression results:

In [None]:
def predict(x, model):
  W1 = model['W1']
  b1 = model['b1']
  W2 = model[ 'W2']
  b2 = model['b2']
  A2 = sigmoid(np.matmul(x, W1) + b1)
  A3 = np.matmul(A2, W2) + b2
  return A3

Finally, we apply the trained model on the testing set:


In [None]:
predictions = predict(X_test, model)
print(predictions)
print(y_test)

[[22.34174478]
 [22.30836728]
 [23.08593865]
 [22.27047294]
 [23.23868622]
 [25.24146198]
 [21.52085674]
 [24.86970502]
 [24.03509811]
 [21.97159856]]
[19.7 18.3 21.2 17.5 16.8 22.4 20.6 23.9 22.  11.9]


# 2) Implementing neural networks with scikit-learn

We will utilize the MLPRegressor class (MLP stands for multi-layer perceptron, a nickname for neural networks):

In [None]:
from sklearn.neural_network import MLPRegressor
nn_scikit = MLPRegressor(hidden_layer_sizes=(16, 8),
                          activation='relu',
                          solver='adam',
                          learning_rate_init=0.001,
                          random_state=42, max_iter=2000)
nn_scikit.fit(X_train, y_train)
predictions = nn_scikit.predict(X_test)
print(predictions)
print(np.mean((y_test - predictions) ** 2))

  y = column_or_1d(y, warn=True)


[16.79582331 18.55538023 21.07961496 19.21362606 18.50955771 23.5608387
 22.27916529 27.11909153 24.70251262 22.05522035]
13.933482332708795




# 3) Implementing neural networks with TensorFlow

helps you implement best practices for data automation, model tracking, performance monitoring, and model retraining

First, we import the necessary modules and set a random seed, which is recommended for reproducible modeling
Next, we create a Keras Sequential model by passing a list of layer instances to the constructor, including two fully connected hidden layers with 20 nodes and 8 nodes, respectively. And again, ReLU activation is used
And we compile the model by using Adam as the optimizer with a learning rate of 0.02 and MSE as the learning goal
After defining the model, we now train it against the training set

In [None]:
!pip install tensorflow
import tensorflow as tf
from tensorflow import keras
tf.random.set_seed(42)

model = keras.Sequential([
    keras.layers.Dense(units=20, activation='relu'),
    keras.layers.Dense(units=8, activation='relu'),
    keras.layers.Dense(units=1)
])
model.compile(loss="mean_squared_error", optimizer=tf.keras.optimizers.Adam(0.02))
model.fit(X_train, y_train, epochs=300)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

<keras.src.callbacks.History at 0x7e61922d86d0>

Finally, we use the trained model to predict the testing cases and print out the predictions and their MSE

In [None]:
predictions = model.predict(X_test)[:, 0]
print(predictions)

[22.567854 20.502943 21.905354 22.762884 20.486935 27.698744 25.194757
 30.073343 28.37874  24.216604]


In [None]:
print(np.mean((y_test- predictions) ** 2))

33.45522216760204


# Predicting stock prices with neural networks

We will build the stock predictor with TensorFlow in this section. We will start with feature generation and data preparation, followed by network building and training. After that, we will fine-tune the network and incorporate early stopping to boost the stock predictor.

We can load the data we just downloaded as follows

In [None]:
import pandas as pd

from  google.colab import drive
drive.mount('/content/drive')

mydata = pd.read_csv("/content/drive/My Drive/Colab Notebooks/20051201_20051210.csv",index_col="Date")
mydata

Mounted at /content/drive


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2005-12-01,10806.0,10934.9,10806.0,10912.6,256932865
2005-12-02,10912.0,10921.4,10861.7,10877.5,214888854
2005-12-05,10877.0,10877.0,10810.7,10835.0,237430947
2005-12-06,10835.4,10936.2,10835.4,10856.9,264721465
2005-12-07,10856.9,10868.1,10764.0,10810.9,243543206
2005-12-08,10808.4,10847.2,10729.7,10755.1,253313750
2005-12-09,10751.8,10806.0,10729.9,10778.6,238907145


Next we define some functions to transform our data

In [None]:
def add_original_feature(df, df_new):
  df_new['open'] = df['Open']
  df_new['open_1'] = df['Open'].shift(1)
  df_new['close_1'] = df['Close'].shift(1)
  df_new['high_1'] = df['High'].shift(1)
  df_new['low_1'] = df['Low'].shift(1)
  df_new['volume_1'] = df['Volume'].shift(1)

def add_avg_price(df, df_new):
  df_new['avg_price_5'] = df['Close'].rolling(5).mean().shift(1)
  df_new['avg_price_30'] = df['Close'].rolling(21).mean().shift(1)
  df_new['avg_price_365'] = df['Close'].rolling(252).mean().shift(1)
  df_new['ratio_avg_price_5_30'] = df_new['avg_price_5'] / df_new['avg_price_30']
  df_new['ratio_avg_price_5_365'] = df_new['avg_price_5'] / df_new['avg_price_365']
  df_new['ratio_avg_price_30_365'] = df_new['avg_price_30'] / df_new['avg_price_365']

def add_avg_volume(df, df_new):
  df_new['avg_volume_5'] = df['Volume'].rolling(5).mean().shift(1)
  df_new['avg_volume_30'] = df['Volume'].rolling(21).mean().shift(1)
  df_new['avg_volume_365'] = df['Volume'].rolling(252).mean().shift(1)

  df_new['ratio_avg_volume_5_30'] = df_new['avg_volume_5'] / df_new['avg_volume_30']
  df_new['ratio_avg_volume_5_365'] = df_new['avg_volume_5'] / df_new['avg_volume_365']
  df_new['ratio_avg_volume_30_365'] = df_new['avg_volume_30'] / df_new['avg_volume_365']

def add_std_price(df, df_new):
  df_new['std_price_5'] = df['Close'].rolling(5).std().shift(1)
  df_new['std_price_30'] = df['Close'].rolling(21).std().shift(1)
  df_new['std_price_365'] = df['Close'].rolling(252).std().shift(1)
  df_new['ratio_std_price_5_30'] = df_new['std_price_5'] / df_new['std_price_30']
  df_new['ratio_std_price_5_365'] = df_new['std_price_5'] / df_new['std_price_365']
  df_new['ratio_std_price_30_365'] = df_new['std_price_30'] / df_new['std_price_365']

def add_std_volume(df, df_new):
  df_new['std_volume_5'] = df['Volume'].rolling(5).std().shift(1)
  df_new['std_volume_30'] = df['Volume'].rolling(21).std().shift(1)
  df_new['std_volume_365'] = df['Volume'].rolling(252).std().shift(1)
  df_new['ratio_std_volume_5_30'] = df_new['std_volume_5'] / df_new['std_volume_30']
  df_new['ratio_std_volume_5_365'] = df_new['std_volume_5'] / df_new['std_volume_365']
  df_new['ratio_std_volume_30_365'] = df_new['std_volume_30'] / df_new['std_volume_365']

def add_return_feature(df, df_new):
  df_new['return_1'] = ((df['Close'] - df['Close'].shift(1)) / df['Close'].shift(1)).shift(1)
  df_new['return_5'] = ((df['Close'] - df['Close'].shift(5)) / df['Close'].shift(5)).shift(1)
  df_new['return_30'] = ((df['Close'] - df['Close'].shift(21)) / df['Close'].shift(21)).shift(1)
  df_new['return_365'] = ((df['Close'] - df['Close'].shift(252)) / df['Close'].shift(252)).shift(1)
  df_new['moving_avg_5'] = df_new['return_1'].rolling(5).mean().shift(1)
  df_new['moving_avg_30'] = df_new['return_1'].rolling(21).mean().shift(1)
  df_new['moving_avg_365'] = df_new['return_1'].rolling(252).mean().shift(1)

def generate_features (df):
  """
  Generate features for a stock/index based on historical price and performance
  @param df: dataframe with columns "Open", "Close", "High", "Low", "Volume", "Adjusted Close"
  @return: dataframe, data set with new features
  """
  df_new = pd.DataFrame()
  # 6 original features
  add_original_feature(df, df_new)
  # 31 generated features
  add_avg_price(df, df_new)
  add_avg_volume (df, df_new)
  add_std_price(df, df_new)
  add_std_volume(df, df_new)
  add_return_feature(df, df_new)
  # the target
  df_new['close'] = df['Close']
  df_new = df_new.dropna(axis=0)
  return df_new

# Training a simple neural network

We construct the training set using data from 1988 to 2018 and the testing set using data from 2019

In [None]:
data_raw = pd.read_csv("/content/drive/My Drive/Colab Notebooks/19880101_20191231.csv", index_col="Date")
data = generate_features(data_raw)
print(data.round(decimals=3).head(5))

              open  open_1  close_1  high_1   low_1    volume_1  avg_price_5  \
Date                                                                           
1989-01-04  2146.6  2168.4   2144.6  2168.4  2127.1  17302883.0      2165.00   
1989-01-05  2177.7  2146.6   2177.7  2183.4  2146.6  15714720.0      2168.00   
1989-01-06  2190.5  2177.7   2190.5  2205.2  2173.0  20303094.0      2172.82   
1989-01-09  2194.3  2190.5   2194.3  2213.8  2182.3  16494441.0      2175.14   
1989-01-10  2199.5  2194.3   2199.5  2209.1  2185.0  18410324.0      2181.32   

            avg_price_30  avg_price_365  ratio_avg_price_5_30  ...  \
Date                                                           ...   
1989-01-04      2150.624       2062.113                 1.007  ...   
1989-01-05      2154.690       2062.668                 1.006  ...   
1989-01-06      2157.867       2063.218                 1.007  ...   
1989-01-09      2160.005       2064.341                 1.007  ...   
1989-01-10      216

In [None]:
start_train = '1988-01-01'
end_train = '2018-12-31'
start_test = '2019-01-01'
end_test = '2019-12-31'
data_train = data.loc[start_train:end_train]
X_train = data_train.drop('close', axis=1).values
y_train = data_train['close'].values
data_test = data.loc[start_test:end_test]
X_test = data_test.drop('close', axis=1).values
y_test = data_test['close'].values

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled_train = scaler.fit_transform(X_train)
X_scaled_test = scaler.fit_transform(X_test)


We now build a neural network model using the Keras Sequential API

After defining the model, we train it against the training set

In [None]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

model = Sequential([
    Dense(units=32, activation='relu'),
    Dense(units=1)
])
model.compile(loss="mean_squared_error", optimizer=tf.keras.optimizers.Adam(0.1))
model.fit(X_scaled_train, y_train, epochs=100, verbose=True)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7e619215a200>

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

predictions = model.predict(X_scaled_test)
print(f'MSE: {mean_squared_error(y_test,predictions):.3f}')
print(f'MAE: {mean_absolute_error(y_test, predictions):.3f}')
print(f'R^2: {r2_score(y_test, predictions):.3f}')


MSE: 259058856.272
MAE: 15610.282
R^2: -222.188


# Fine-tuning the neural network

We perform model fine-tuning in TensorFlow

After initializing the hyperparameters to optimize, we create a function to train and validate the model that will take the hyperparameters as arguments, and output the performance

In [None]:
from tensorboard.plugins.hparams import api as hp
HP_HIDDEN = hp.HParam('hidden_size', hp.Discrete([64, 32, 161]))
HP_EPOCHS = hp.HParam('epochs', hp.Discrete([300, 1000]))
HP_LEARNING_RATE = hp.HParam('learning_rate', hp.RealInterval (0.01, 0.4))

def train_test_model(hparams, logdir):
    model = Sequential([Dense(units=hparams[HP_HIDDEN], activation='relu'),Dense(units=1)])
    model.compile(loss='mean_squared_error',optimizer=tf.keras.optimizers.Adam(hparams[HP_LEARNING_RATE]),
                  metrics=['mean_squared_error'])
    model.fit(X_scaled_train, y_train,validation_data=(X_scaled_test, y_test),
              epochs=hparams[HP_EPOCHS],verbose=False,
              callbacks=[tf.keras.callbacks.TensorBoard(logdir),
              hp.KerasCallback(logdir,hparams),
              tf.keras.callbacks.EarlyStopping(monitor= 'val_loss',min_delta=0,patience=200, verbose=0,
                                                 mode='auto')])
    mse = model.evaluate(X_scaled_test, y_test)[0]
    pred = model.predict(X_scaled_test)
    r2 = r2_score (y_test, pred)
    return mse, r2


def run(hparams, logdir):
    with tf.summary.create_file_writer(logdir).as_default():
        hp.hparams_config(hparams=[HP_HIDDEN, HP_EPOCHS, HP_LEARNING_RATE],
                          metrics=[hp.Metric('mean_squared_error', display_name='mse'),
                                   hp.Metric('r2', display_name='r2')])
        mse, r2 = train_test_model(hparams, logdir)
        tf.summary.scalar('mean_squared_error', mse, step=1)
        tf.summary.scalar('r2', r2, step=1)

In [None]:

session_num = 0
for hidden in HP_HIDDEN.domain.values:
    for epochs in HP_EPOCHS.domain.values:
        for learning_rate in tf.linspace(HP_LEARNING_RATE.domain.min_value, HP_LEARNING_RATE.domain.max_value, num=5):
            hparams = {HP_HIDDEN: hidden,
                       HP_EPOCHS: epochs,
                       HP_LEARNING_RATE: float("%.2f" % float(learning_rate)),
                      }
            run_name = "run-%d" % session_num
            print('--- Starting trial: %s' % run_name)
            print({h.name: hparams[h] for h in hparams})
            run(hparams, 'logs/hparam_tuning/' + run_name)
            session_num += 1

--- Starting trial: run-0
{'hidden_size': 32, 'epochs': 300, 'learning_rate': 0.01}
--- Starting trial: run-1
{'hidden_size': 32, 'epochs': 300, 'learning_rate': 0.11}
--- Starting trial: run-2
{'hidden_size': 32, 'epochs': 300, 'learning_rate': 0.21}
--- Starting trial: run-3
{'hidden_size': 32, 'epochs': 300, 'learning_rate': 0.3}
--- Starting trial: run-4
{'hidden_size': 32, 'epochs': 300, 'learning_rate': 0.4}
--- Starting trial: run-5
{'hidden_size': 32, 'epochs': 1000, 'learning_rate': 0.01}
--- Starting trial: run-6
{'hidden_size': 32, 'epochs': 1000, 'learning_rate': 0.11}
--- Starting trial: run-7
{'hidden_size': 32, 'epochs': 1000, 'learning_rate': 0.21}
--- Starting trial: run-8
{'hidden_size': 32, 'epochs': 1000, 'learning_rate': 0.3}
--- Starting trial: run-9
{'hidden_size': 32, 'epochs': 1000, 'learning_rate': 0.4}
--- Starting trial: run-10
{'hidden_size': 64, 'epochs': 300, 'learning_rate': 0.01}
--- Starting trial: run-11
{'hidden_size': 64, 'epochs': 300, 'learning_ra

Finally, we use the optimal model to make predictions

In [None]:
model = Sequential([
    Dense (units=16, activation='relu'),
    Dense (units=1)
])
model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers. Adam(0.21))
model.fit(X_scaled_train, y_train, epochs=1000, verbose=False)
predictions = model.predict(X_scaled_test)[:, 0]

Plot the prediction along with the ground truth

In [None]:
import matplotlib.pyplot as plt
plt.plot(data_test.index, y_test, c='k')
plt.plot(data_test.index, predictions, c='b')
plt.plot(data_test.index, predictions, c='r')
plt.plot(data_test.index, predictions, c='g')
plt.xticks(range(0, 252, 10), rotation=60)
plt.xlabel('Date')
plt.ylabel('Close price')
plt.legend(['Truth', 'Neural network prediction'])
plt.show()