In [None]:
import numpy as np
import pandas as pd
import math

def standardization(data):
    mu = np.mean(data,axis=0)
    sigma = np.std(data,axis=0)
    return (data-mu)/sigma

def sum_of_square_error(y_predict, y_true):
    X = (y_true - y_predict)**2
    return np.sum(X)

def sigmoid(X):
    X = np.asarray(X,dtype=np.float64)
    return 1/(1+np.exp(-X))

def sigmoid_derivative(X):
    return sigmoid(X)*(1-sigmoid(X))

# import and preprocess data
df = pd.read_csv('2024_energy_efficiency_data.csv')
# one hot encoding
df = pd.get_dummies(df, columns=['Orientation','Glazing Area Distribution'])
df = df.drop(labels='Cooling Load',axis=1)
cols = ['# Relative Compactness', 'Surface Area', 'Wall Area', 'Roof Area',
 'Overall Height', 'Glazing Area']
df[cols] = df[cols].apply(standardization)
# shuffle and split
training_set = df.sample(frac=0.75, random_state=313552034)
testing_set = df.drop(training_set.index)
# separate features and labels
X_training_set = training_set.drop(labels='Heating Load',axis=1).to_numpy()
Y_training_set = training_set['Heating Load'].to_numpy().reshape(-1,1)
X_testing_set = testing_set.drop(labels='Heating Load',axis=1).to_numpy()
Y_testing_set = testing_set['Heating Load'].to_numpy().reshape(-1,1)

EPOCH = 10000
learning_rate = 0.00001
np.random.seed(313552034)
W1 = np.random.randint(-1, 1, size=(16,16))
W2 = np.random.randint(-1, 1, size=(16,1))

mse = np.zeros(EPOCH)
for i in range(EPOCH):
    Z1 = np.dot(X_training_set, W1)
    A1 = sigmoid(Z1)
    Z2 = np.dot(A1, W2)
    mse[i] = sum_of_square_error(Z2, Y_training_set)
    E1 = 2*(Z2-Y_training_set)
    dW1 = E1
    E2 = np.dot(dW1, W2.T) 
    dW2 = E2
    W2_update = np.dot(Z1.T, dW1)
    W1_update = np.dot(X_training_set.T, dW2)
    W1 = W1 - learning_rate*W1_update
    W2 = W2 - learning_rate*W2_update

print("Training E_MRS: ",math.sqrt(mse[EPOCH-1]/len(Y_training_set)))
mse_2 = math.sqrt(sum_of_square_error(
    np.dot(np.dot(X_testing_set,W1),W2),Y_testing_set)/len(Y_testing_set))
print("Testing E_MRS: ",mse_2)