## *Question 1*

In [88]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split as split_data
from sklearn.metrics import mean_squared_error

In [89]:
def ReLU(vector):
    return np.maximum(0, vector)

In [90]:
def initializeParams(sizes):
    params = {}
    for i in range(1, len(sizes)):
        params['W' + str(i)] = np.random.rand(sizes[i], sizes[i-1])*0.01
        params['B' + str(i)] = np.random.rand(sizes[i], 1)*0.01
    return params

In [91]:
def forward_propagation(x, params):
    layers = len(params)//2
    weights = {}
    for i in range(1, layers + 1):
        weights['Z'+str(i)] = np.dot(params['W'+str(i)], x) + params['B'+str(i)] if i == 1 else np.dot(params['W'+str(i)], weights['A'+str(i-1)]) + params['B'+str(i)]
        weights['A'+str(i)] = weights['Z'+str(i)] if i == layers else ReLU(weights['Z'+str(i)])
    return weights

In [92]:
def compute_cost(weights, y):
    layers = len(weights)//2
    predicted_y = weights['A' + str(layers)].T
    cost = (1/(2*len(y)))*np.sum(np.square(predicted_y - y))
    return cost

In [93]:
def backward_propagation(x, y, weights, params):
    layers = len(params)//2
    m = len(y)
    gradients = {}
    for i in range(layers, 0, -1):
        dA = np.dot(params['W'+str(i+1)].T, dZ) if (i!=layers) else (1/m)*(weights['A'+str(i)]-y)
        dZ = np.multiply(dA, np.where(weights['A'+str(i)]>=0, 1, 0)) if (i!=layers) else dA
        gradients['W'+str(i)] = (1/m)*np.dot(dZ, weights['A'+str(i-1)].T) if (i!=1) else (1/m)*np.dot(dZ, x.T)
        gradients['B'+str(i)] = (1/m)*np.sum(dZ, axis=1, keepdims=True)
    return gradients

In [94]:
def updating_params(params, gradients, alpha):
    layers = len(params)//2
    updated_params = {}
    for i in range(1, layers+1):
        updated_params['W'+str(i)] = params['W'+str(i)] - alpha*(gradients['W'+str(i)])
        updated_params['B'+str(i)] = params['B'+str(i)] - alpha*(gradients['B'+str(i)])
    return updated_params

In [95]:
def train_model(x, y, layerSize, epochs, alpha):
    params = initializeParams(layerSize)
    for i in range(epochs):
        forward_predict = forward_propagation(x.T, params)
        cost = compute_cost(forward_predict, y.T)
        gradients = backward_propagation(x.T, y.T, forward_predict, params)
        params = updating_params(params, gradients, alpha)
    return params

In [96]:
def compute_accuracy(train_x, test_x, train_y, test_y, params, layerSizes):
    values_train = forward_propagation(train_x.T, params)
    values_test = forward_propagation(test_x.T, params)
    train_acc = np.sqrt(mean_squared_error(train_y, values_train['A'+str(len(layer_sizes)-1)].T))
    test_acc = np.sqrt(mean_squared_error(test_y, values_test['A'+str(len(layer_sizes)-1)].T))
    return train_acc, test_acc

In [97]:
def predict(x, params):
    values = forward_propagation(x.T, params)
    predictions = values['A'+str(len(values)//2)].T
    return predictions

In [98]:
data = pd.read_csv('boston.csv', header=0)
data.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [99]:
x = data.iloc[:, :-1]
y = data.iloc[:, -1]

In [100]:
train_x, test_x, train_y, test_y = split_data(x, y, train_size=0.8, shuffle=True)
train_x = np.array(train_x)
test_x = np.array(test_x)
train_y = np.array(train_y)
test_y = np.array(test_y)

In [103]:
epochs, alpha, layerSizes = 3000, 0.04, [13, 6, 5, 1]
parameters = train_model(train_x, train_y, layerSizes, epochs, alpha)

In [104]:
predict_y = predict(test_x, parameters)
np.round(predict_y-test_y, 2)

array([[ 2.23, 15.23, -2.47, ..., 11.33, -5.07,  3.53],
       [ 1.04, 14.04, -3.66, ..., 10.14, -6.26,  2.34],
       [ 6.89, 19.89,  2.19, ..., 15.99, -0.41,  8.19],
       ...,
       [ 1.38, 14.38, -3.32, ..., 10.48, -5.92,  2.68],
       [13.93, 26.93,  9.23, ..., 23.03,  6.63, 15.23],
       [ 2.19, 15.19, -2.51, ..., 11.29, -5.11,  3.49]])

## *Question 2*

In [116]:
dataset = pd.read_csv('seeds.csv', header=0)
dataset.head()

Unnamed: 0,Area,Perimeter,Compactness,Kernel.Length,Kernel.Width,Asymmetry.Coeff,Kernel.Groove,Type
0,15.26,14.84,0.871,5.763,3.312,2.221,5.22,1
1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956,1
2,14.29,14.09,0.905,5.291,3.337,2.699,4.825,1
3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805,1
4,16.14,14.99,0.9034,5.658,3.562,1.355,5.175,1


In [117]:
X = dataset.iloc[:, :-1]
Y = dataset.iloc[:, -1]

In [118]:
train_X, test_X, train_Y, test_Y = split_data(X, Y, train_size=0.8, shuffle=True)
train_X = np.array(train_X)
test_X = np.array(test_X)
train_Y = np.array(train_Y)
test_Y = np.array(test_Y)

In [119]:
epochs2, alpha2, layerSizes2 = 3000, 0.04, [7, 6, 5, 1]
parameters2 = train_model(train_X, train_Y, layerSizes2, epochs2, alpha2)

In [120]:
predict_y2 = predict(test_X, parameters2)
np.round(predict_y2-test_Y, 2)

array([[-0.94,  0.06,  0.06, ..., -0.94, -0.94, -1.94],
       [-0.95,  0.05,  0.05, ..., -0.95, -0.95, -1.95],
       [-0.95,  0.05,  0.05, ..., -0.95, -0.95, -1.95],
       ...,
       [-0.94,  0.06,  0.06, ..., -0.94, -0.94, -1.94],
       [-0.94,  0.06,  0.06, ..., -0.94, -0.94, -1.94],
       [-0.95,  0.05,  0.05, ..., -0.95, -0.95, -1.95]])