# Plan van aanpak

We willen radiation predicten

## Vraag die we willen beantwoorden
Welk model heeft beste accuracy?

## Subvragen
* Vergelijk modellen
* alles tegenelkaar
* bayesian vs non bayesian

## Modellen die we willen maken
* Linear Regression
* Logistic Regression
* Support Vector Regerssion (non bayesian)
* Relevance Vector Regression (bayesian)
* CNN
* BCNN

# Import packages

In [None]:
# Clears entire notebook
%reset -f

showInfo = True;

amountOfSamplesForPlot = 1000;

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, explained_variance_score, max_error, mean_absolute_percentage_error
from sklearn import preprocessing


from tabulate import tabulate

# Import Data

In [None]:
data = pd.read_csv(('./SolarPrediction.csv'))

if (showInfo):
    data.info()
    data.describe()

## Clean data

In [None]:
data.isnull().sum()

### Make separate columns of object types

In [None]:
# Make date of each element three columns and drop the Data object
data['Year'] = pd.DatetimeIndex(data['Data']).year
data['Month'] = pd.DatetimeIndex(data['Data']).month
data['Day'] = pd.DatetimeIndex(data['Data']).day
data.drop('Data', axis = 1, inplace=True)

# Make time of each element three columns and drop the Time object
data['Hour'] = pd.DatetimeIndex(data['Time']).hour
data['Minute'] = pd.DatetimeIndex(data['Time']).minute
data['Second'] = pd.DatetimeIndex(data['Time']).second
data.drop('Time', axis = 1, inplace=True)

# Make time of each element three columns and drop the TimeSunRise object
data['SunRiseHour'] = pd.DatetimeIndex(data['TimeSunRise']).hour
data['SunRiseMinute'] = pd.DatetimeIndex(data['TimeSunRise']).minute
data['SunRiseSecond'] = pd.DatetimeIndex(data['TimeSunRise']).second
data.drop('TimeSunRise', axis = 1, inplace=True)

# Make time of each element three columns and drop the TimeSunSet object
data['SunSetHour'] = pd.DatetimeIndex(data['TimeSunSet']).hour
data['SunSetMinute'] = pd.DatetimeIndex(data['TimeSunSet']).minute
data['SunSetSecond'] = pd.DatetimeIndex(data['TimeSunSet']).second
data.drop('TimeSunSet', axis = 1, inplace=True)

if (showInfo):
    data.info()

In [None]:
fig = plt.figure(figsize=(20,10))
fig.suptitle('Feature Correlation', fontsize=18)
seaborn.heatmap(data.corr(), annot=True, cmap='RdBu')

In [None]:
data.corr()

In [None]:
data.describe()
# Recognise that Year, SunRiseHour, SunRiseSecond, SunSetSecond have a standard deviation of 0 and are thus the same throughout the dataset
# Therefore we can drop these variables as they contain no information in regards to the radiation

In [None]:
data.drop('Year', axis = 1, inplace=True)
data.drop('SunRiseHour', axis = 1, inplace=True)
data.drop('SunRiseSecond', axis = 1, inplace=True)
data.drop('SunSetSecond', axis = 1, inplace=True)

In [None]:
fig = plt.figure(figsize=(20,10))
fig.suptitle('Feature Correlation', fontsize=18)
seaborn.heatmap(data.corr(), annot=True, cmap='RdBu')

## Daylight correction

In [None]:
# data = data[data.Hour > 5]
# data = data[data.Hour < 19]

data = data.sort_values(by=['UNIXTime'], ascending=True)

# Train test split

In [None]:
features = data.iloc[:,data.columns != 'Radiation' ]
radiation = data.iloc[:, data.columns == 'Radiation']

if showInfo:
    features.info()
    radiation.info()

Xtrain, Xtest, Ytrain, Ytest = train_test_split(features, radiation, test_size = 0.10, shuffle=False)


scalerX = preprocessing.StandardScaler().fit(Xtrain)
scalerY = preprocessing.StandardScaler().fit(Ytrain)

XtrainScaled = scalerX.transform(Xtrain)
YtrainScaled = scalerY.transform(Ytrain)

XtestScaled = scalerX.transform(Xtest)
YtestScaled = scalerY.transform(Ytest)

# Modellen
## Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression

linRegModel = LinearRegression();
linRegModel.fit(XtrainScaled, YtrainScaled); 

if (showInfo):
    plt.figure(figsize=(20,10))
    plt.bar(features.columns, linRegModel.coef_[0])

In [None]:
linRegRadiationPredictions = scalerY.inverse_transform(linRegModel.predict(XtestScaled));


In [None]:
def calcMetrics(YTrue, YPredict):
    MAE = mean_absolute_error(YTrue, YPredict);
    MSE = mean_squared_error(YTrue, YPredict);
    R2Score = r2_score(YTrue, YPredict);
    EVS = mean_absolute_percentage_error(YTrue, YPredict);
    MaxE = max_error(YTrue, YPredict);

    return [MAE, MSE, R2Score, EVS, MaxE]

In [None]:
def printTestMetrics(YTrue, YPredict):
    tableLabels = ['Mean Absolute Error', 'Mean Squared Error', 'R2 Score', 'MAPE', 'Max Error'];
    tableValues = calcMetrics(YTrue, YPredict)
    
    print(tabulate([tableLabels, tableValues],headers='firstrow', tablefmt='fancy_grid'))


In [None]:
def plotPrediction(YTrue, YPredict):
    plt.figure(figsize=(30,5))
    plt.plot(np.arange(0,len(YTrue)), YTrue, label='True values')
    plt.plot(np.arange(0,len(YTrue)), YPredict, label='Predictions')
    plt.legend()

In [None]:
printTestMetrics(Ytest, linRegRadiationPredictions)
plotPrediction(Ytest, linRegRadiationPredictions)



In [None]:
timeAxis = Xtest['UNIXTime'][0:100]
plt.figure(figsize=(10,4))
plt.plot(np.arange(0, len(Ytest)), Ytest, label="Ground Truth")
plt.plot(np.arange(0, len(Ytest)), linRegRadiationPredictions, label="Linear Regression",)
plt.legend(loc="upper right")
# np.arange(0, len(Ytest))
plt.title("Non-Bayesian Linear Models")
plt.xlabel("Sample number")
plt.ylabel("Radiation")

## Ridge Regression

In [None]:
from sklearn.linear_model import Ridge

ridgeModel = Ridge();

ridgeModel.fit(XtrainScaled, YtrainScaled)

ridgeRadiationPredictions = scalerY.inverse_transform(ridgeModel.predict(XtestScaled))

printTestMetrics(Ytest, ridgeRadiationPredictions);

## Bayesian Regression

In [None]:
from CompEx1Models import BayesianRegression

bRegModel = BayesianRegression(alpha=1, beta=1);
bRegModel.fit(XtrainScaled, np.ravel(YtrainScaled))

predBReg, stdBReg = bRegModel.predict(XtestScaled, return_std=True)
predBReg = (scalerY.inverse_transform(predBReg.reshape(-1,1)))
stdBReg = (scalerY.inverse_transform(stdBReg.reshape(-1,1)))
# predBReg = predBReg.reshape(-1,1)
# stdBReg = stdBReg.reshape(-1,1)


printTestMetrics(Ytest, predBReg)

printTestMetrics(Ytest, np.zeros(Ytest.shape))



## Empirical Bayesian Regressian

In [None]:
from CompEx1Models import *

bEmpRegModel = EmpiricalBayesRegression(alpha=1., beta=1.);
bEmpRegModel.fit(XtrainScaled, np.ravel(YtrainScaled))


predBEmpReg, stdBEmpReg = bEmpRegModel.predict(XtestScaled, return_std=True)
print(stdBEmpReg.mean())
predBEmpReg = (scalerY.inverse_transform(predBEmpReg.reshape(-1,1)))
stdBEmpReg = (scalerY.inverse_transform(stdBEmpReg.reshape(-1,1)));

printTestMetrics(Ytest, predBEmpReg)



## EM

In [None]:
from CompEx1Models import *

EMModel = EM(alpha=1., beta=1.);
EMModel.fit(XtrainScaled, np.ravel(YtrainScaled))

predEM, stdEM = EMModel.predict(XtestScaled, return_std=True)
predEM = (scalerY.inverse_transform(predEM.reshape(-1,1)))
stdEM = (scalerY.inverse_transform(stdEM.reshape(-1,1)));

printTestMetrics(Ytest, predEM)

## VB

In [None]:
from CompEx1Models import *

VBModel = VB(alpha=1., beta=1.);
VBModel.fit(XtrainScaled, np.ravel(YtrainScaled))

predVB, stdVB = VBModel.predict(XtestScaled, return_std=True)
predVB = (scalerY.inverse_transform(predVB.reshape(-1,1)))
stdVB = (scalerY.inverse_transform(stdVB.reshape(-1,1)));

printTestMetrics(Ytest, predVB)

In [None]:
## PLOTJES
amountOfSamplesForPlot = 500

timeAxis = Xtest['UNIXTime'][0:amountOfSamplesForPlot]
timeAxis = np.arange(0, len(Ytest))[0:amountOfSamplesForPlot]

fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2,2, figsize=(10,8))
ax1.plot(timeAxis, predBReg[0:amountOfSamplesForPlot], label="Prediction")
ax1.fill_between(timeAxis, np.ravel(predBReg[0:amountOfSamplesForPlot]-stdBReg[0:amountOfSamplesForPlot]), np.ravel(predBReg[0:amountOfSamplesForPlot]+stdBReg[0:amountOfSamplesForPlot]), alpha = 0.2, label = f"std({stdBReg.mean()})")
ax1.plot(timeAxis, np.array(Ytest[0:amountOfSamplesForPlot]), color='red', label = "True value")
ax1.legend(loc="upper right")
ax1.set_title("Bayesian")

ax2.plot(timeAxis, predBEmpReg[0:amountOfSamplesForPlot], label="Prediction")
ax2.fill_between(timeAxis, np.ravel(predBEmpReg[0:amountOfSamplesForPlot]-stdBEmpReg[0:amountOfSamplesForPlot]), np.ravel(predBEmpReg[0:amountOfSamplesForPlot]+stdBEmpReg[0:amountOfSamplesForPlot]), alpha = 0.2, label = f"std({stdBEmpReg.mean()})")
ax2.plot(timeAxis, np.array(Ytest[0:amountOfSamplesForPlot]), color='red', label="True value")
ax2.legend(loc="upper right")
ax2.set_title("Empirical Bayesian")


ax3.plot(timeAxis, predEM[0:amountOfSamplesForPlot], label = "Prediction")
ax3.fill_between(timeAxis, np.ravel(predEM[0:amountOfSamplesForPlot]-stdEM[0:amountOfSamplesForPlot]), np.ravel(predEM[0:amountOfSamplesForPlot]+stdEM[0:amountOfSamplesForPlot]), alpha = 0.2, label = f"std({stdEM.mean()})")
ax3.plot(timeAxis, np.array(Ytest[0:amountOfSamplesForPlot]), color='red', label = "True value")
ax3.legend(loc="upper right")
ax3.set_title("EM")

ax4.plot(timeAxis, predVB[0:amountOfSamplesForPlot], label="Prediction")
ax4.fill_between(timeAxis, np.ravel(predVB[0:amountOfSamplesForPlot]-stdVB[0:amountOfSamplesForPlot]), np.ravel(predVB[0:amountOfSamplesForPlot]+stdVB[0:amountOfSamplesForPlot]), alpha = 0.2, label=f"std({stdVB.mean()})")
ax4.plot(timeAxis, np.array(Ytest[0:amountOfSamplesForPlot]), color='red', label="True value")
ax4.legend(loc="upper right")
ax4.set_title("VB")

fig.suptitle("Bayesian linear models")



## Bayesian Ridge

In [None]:
from sklearn.linear_model import BayesianRidge

bRidgeModel = BayesianRidge();

bRidgeModel.fit(XtrainScaled, YtrainScaled)

bRidgeRadiationPredictions = scalerY.inverse_transform(bRidgeModel.predict(XtestScaled).reshape(-1,1))

printTestMetrics(Ytest, bRidgeRadiationPredictions);

## Lasso

In [None]:
from sklearn.linear_model import Lasso

lassoModel = Lasso(alpha=.1);

lassoModel.fit(XtrainScaled, YtrainScaled)

lassoRadiationPredictions = scalerY.inverse_transform(lassoModel.predict(XtestScaled).reshape(-1,1))

printTestMetrics(Ytest, lassoRadiationPredictions);

In [None]:
plt.figure(figsize=(10,4))
plt.plot(np.arange(0, len(Ytest))[0:500], Ytest[0:500], label="Ground Truth")
plt.plot(np.arange(0, len(Ytest))[0:500], linRegRadiationPredictions[0:500], label="Linear Regression", alpha = 1)
plt.plot(np.arange(0, len(Ytest))[0:500], ridgeRadiationPredictions[0:500], label="Ridge Regression", alpha = 0.8)
plt.plot(np.arange(0, len(Ytest))[0:500], lassoRadiationPredictions[0:500], label="Lasso Regression", alpha = 0.8)
plt.legend(loc="upper right")
# np.arange(0, len(Ytest))
plt.title("Non-Bayesian Linear Models")
plt.xlabel("Sample number")
plt.ylabel("Radiation")

## KNN Regressor

In [None]:
from sklearn.neighbors import KNeighborsRegressor

knnModel = KNeighborsRegressor();

knnModel.fit(XtrainScaled, YtrainScaled)

knnRadiationPredictions = scalerY.inverse_transform(knnModel.predict(XtestScaled).reshape(-1,1))

printTestMetrics(Ytest, knnRadiationPredictions);

## SVR

In [None]:
from sklearn.svm import SVR

svrModel = SVR(kernel='rbf');
svrModel.fit(XtrainScaled, YtrainScaled); 


svrRadiationPredictions = scalerY.inverse_transform(svrModel.predict(XtestScaled).reshape(-1,1))

printTestMetrics(Ytest, svrRadiationPredictions)
plotPrediction(Ytest, svrRadiationPredictions)

## RVR

Due to extremely long computation times, RVR is not tested (yet)

In [None]:
# from sklearn_rvm import EMRVR

# rvrModel = EMRVR(kernel='rbf', verbose=True);

# rvrModel.fit(XtrainScaled, YtrainScaled); 
# rvrRadiationPredictions = rvrModel.predict(XtestScaled);
# printTestMetrics(Ytest, scalerY.inverse_transform(svrRadiationPredictions))

## Decision Tree


In [None]:
from sklearn.tree import DecisionTreeRegressor

decTreeModel = DecisionTreeRegressor();

decTreeModel.fit(XtrainScaled, YtrainScaled);

decTreeRadiationPredictions = scalerY.inverse_transform(decTreeModel.predict(XtestScaled).reshape(-1,1))
printTestMetrics(Ytest, decTreeRadiationPredictions)
plotPrediction(Ytest, decTreeRadiationPredictions)

## Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor

rndForestModel = RandomForestRegressor(min_samples_leaf=3,min_samples_split=6,max_depth=30,max_features=0.7);

rndForestModel.fit(XtrainScaled, np.ravel(YtrainScaled))

rndForestRadiationPredictions = scalerY.inverse_transform(rndForestModel.predict(XtestScaled).reshape(-1,1));
printTestMetrics(Ytest, rndForestRadiationPredictions)
plotPrediction(Ytest, rndForestRadiationPredictions)

In [None]:
plt.figure(figsize=(8,4))
plt.bar(Xtrain.columns,rndForestModel.feature_importances_)
plt.xticks(rotation=45,ha='right');

## Extra trees

In [None]:
from sklearn.ensemble import ExtraTreesRegressor

xTreeModel = ExtraTreesRegressor();

xTreeModel.fit(XtrainScaled, np.ravel(YtrainScaled))

xTreeRadiationPredictions = scalerY.inverse_transform(xTreeModel.predict(XtestScaled).reshape(-1,1));
printTestMetrics(Ytest, xTreeRadiationPredictions)
plotPrediction(Ytest[-2000:-1], xTreeRadiationPredictions[-2000:-1])

In [None]:
plt.figure(figsize=(10,4))
plotSamples = 500;
plt.plot(np.arange(0, len(Ytest))[0:500], Ytest[0:500], label="Ground Truth")
plt.plot(np.arange(0, len(Ytest))[0:500], decTreeRadiationPredictions[0:500], label="Decision Tree", alpha = 1)
plt.plot(np.arange(0, len(Ytest))[0:500], rndForestRadiationPredictions[0:500], label="Random Forest", alpha = 1)
plt.plot(np.arange(0, len(Ytest))[0:500], xTreeRadiationPredictions[0:500], label="Extra Tree", alpha = 1)
plt.legend(loc="upper right")
# np.arange(0, len(Ytest))
plt.title("Decision tree based models")
plt.xlabel("Sample number")
plt.ylabel("Radiation")

## Neural Network

In [None]:

import tensorflow as tf
import tensorflow_probability as tfp


from tensorflow import keras

from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras import regularizers
from keras.optimizers import Adam

model = Sequential()
model.add(Dense(256, activation='relu', kernel_initializer='normal', input_shape=(Xtrain.shape[1],), kernel_regularizer=regularizers.l2(l=0.01)))
model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(l=0.01)))
model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(l=0.01)))
model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(l=0.01)))
model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(l=0.01)))
model.add(Dense(1, activation='linear'))

model.compile(loss='mse', optimizer='adam', metrics=['mse', 'categorical_crossentropy'])




In [None]:
model.fit(XtrainScaled, YtrainScaled, epochs=10)
neuralRadiationPredictions = scalerY.inverse_transform(model.predict(XtestScaled))
printTestMetrics(Ytest, neuralRadiationPredictions)
plotPrediction(Ytest, neuralRadiationPredictions)

## BCNN

In [None]:
# import tensorflow as tf
# import tensorflow_probability as tfp
# tfd = tfp.distributions
# tfpl = tfp.layers

# from tensorflow import keras

# from keras.models import Sequential

# from keras import regularizers
# from keras.optimizers import Adam

# kernel_divergence_fn=lambda q, p, _: tfp.distributions.kl_divergence(q, p) / (x_train.shape[0] *1.0)

# def prior(kernel_size, bias_size, dtype=None):
#     n = kernel_size + bias_size
#     # Independent Normal Distribution
#     return lambda t: tfd.Independent(
#             tfd.Normal(loc=tf.zeros(n, dtype=dtype), scale=1),
#             reinterpreted_batch_ndims=1
#         )

# def posterior(kernel_size, bias_size, dtype=None):
#     n = kernel_size + bias_size
#     return Sequential([
#         tfpl.VariableLayer(tfpl.IndependentNormal.params_size(n), dtype=dtype),
#         tfpl.IndependentNormal(n)
#     ])


# model = Sequential()
# model.add(tfp.layers.DenseVariational(256, posterior, prior, activation='relu', input_shape=(Xtrain.shape[1],)))
# model.add(tfp.layers.DenseVariational(256, posterior, prior, activation='relu'))
# model.add(tfp.layers.DenseVariational(256, posterior, prior, activation='relu'))
# model.add(tfp.layers.DenseVariational(256, posterior, prior, activation='relu'))
# model.add(tfp.layers.DenseVariational(1, posterior, prior, activation='linear'))

# model.compile(loss='mse', optimizer='adam', metrics=['mse'])

In [None]:
from keras.layers import Dropout,Dense,Input
from keras import Sequential

model_mc = Sequential()
model_mc.add(Input((Xtrain.shape[1],)))
model_mc.add(Dense(100, activation = 'relu'))
model_mc.add(Dropout((0.3)))
model_mc.add(Dense(100, activation = 'relu'))
model_mc.add(Dropout((0.3)))
model_mc.add(Dense(1, activation = 'linear'))


model_mc.compile(loss='mse', optimizer="adam", metrics=['mae'])
model_mc.summary()

In [None]:
from keras.layers import Dropout,Dense,Input
from keras import Sequential
from keras import regularizers
from keras.optimizers import Adam
import keras


inputLayer = Input(shape=(None, Xtrain.shape[1]))
x = Dropout(0.3)(inputLayer)
x = Dense(256, activation='relu', kernel_regularizer=regularizers.l2(l=0.01))(x, training=True)
x = Dropout(0.3)(x)
x = Dense(256, activation='relu', kernel_regularizer=regularizers.l2(l=0.01))(x, training=True)
x = Dropout(0.3)(x)
x = Dense(256, activation='relu', kernel_regularizer=regularizers.l2(l=0.01))(x, training=True)
outputLayer = Dense(1, activation='linear')(x, training=True)

model_dropout = keras.Model(inputLayer, outputLayer, name="Dropout_model")
model_dropout.compile(loss='mse', optimizer="adam", metrics=['mae'])
# model_dropout.summary()



In [None]:

# import tensorflow.keras.backend as K
# model_dropout_pred = K.function(
#     [model_dropout.input, 
#     K.learning_phase()], 
#     [model_dropout.output])


# model_mc.fit(XtrainScaled, YtrainScaled, epochs=10)

In [None]:
BneuralRadiationPredictions = scalerY.inverse_transform(model_mc.predict(XtestScaled))
printTestMetrics(Ytest, BneuralRadiationPredictions)
plotPrediction(Ytest, BneuralRadiationPredictions)

In [None]:
plt.figure(figsize=(10,4))
plotSamples = 500;
plt.plot(np.arange(0, len(Ytest))[0:500], Ytest[0:500], label="Ground Truth")
plt.plot(np.arange(0, len(Ytest))[0:500], neuralRadiationPredictions[0:500], label="CNN", alpha = 1)
plt.plot(np.arange(0, len(Ytest))[0:500], BneuralRadiationPredictions[0:500], label="CNN dropout", alpha = 1)
plt.legend(loc="upper right")
# np.arange(0, len(Ytest))
plt.title("Neural network based models")
plt.xlabel("Sample number")
plt.ylabel("Radiation")

## Gaussion Process Regressor

In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor

gprModel = GaussianProcessRegressor();

gprModel.fit(XtrainScaled, YtrainScaled);


In [None]:

predGPR, stdGPR = gprModel.predict(XtestScaled, return_std=True)
predGPR = scalerY.inverse_transform(predGPR.reshape(-1,1))
stdGPR = scalerY.inverse_transform(stdGPR.reshape(-1,1))

printTestMetrics(Ytest, predGPR)

In [None]:
plt.figure(figsize=(10,4))
plotSamples = 500;
plt.plot(np.arange(0, len(Ytest))[0:500], Ytest[0:500], label="Ground Truth")
plt.plot(np.arange(0, len(Ytest))[0:500], svrRadiationPredictions[0:500], label="SVR", alpha = 1)
plt.plot(np.arange(0, len(Ytest))[0:500], knnRadiationPredictions[0:500], label="KNN", alpha = 1)
plt.plot(np.arange(0, len(Ytest))[0:500], predGPR[0:500], label="GPR", alpha = 1, color="darkred")
plt.fill_between(np.arange(0, len(Ytest))[0:500], np.ravel(predGPR[0:500]-stdGPR[0:500]), np.ravel(predGPR[0:500]+stdGPR[0:500]), alpha = 0.2, color="darkred")
plt.legend(loc="upper right")
# np.arange(0, len(Ytest))
plt.title("Remaining models")
plt.xlabel("Sample number")
plt.ylabel("Radiation")

# Comparison

In [None]:
table = [];
tableColumns = ['Model', 'Mean Absolute Error', 'Mean Squared Error', 'R2 Score', 'Mean Absolute Percentage Error', 'Max Error']
table.append(tableColumns)

# Linear regression
table.append(["Linear Regression"]+ calcMetrics(Ytest, linRegRadiationPredictions))

# Ridge regression
table.append(["Ridge Regression"]+ calcMetrics(Ytest, ridgeRadiationPredictions))

# Bayesian  Ridge regression
table.append(["Bayes Ridge Regression"]+ calcMetrics(Ytest, bRidgeRadiationPredictions))

# Lasso regression
table.append(["Lasso Regression"]+ calcMetrics(Ytest, lassoRadiationPredictions))

# KNN regression
table.append(["KNN Regression"]+ calcMetrics(Ytest, knnRadiationPredictions))

# SVR
table.append(["SVR"]+ calcMetrics(Ytest, svrRadiationPredictions))

# Decision tree
table.append(["Decision Tree"]+calcMetrics(Ytest, decTreeRadiationPredictions))

# Random Forest
table.append(["Random Forest"]+calcMetrics(Ytest, rndForestRadiationPredictions))

# Extra Trees
table.append(["Extra Trees"]+calcMetrics(Ytest, xTreeRadiationPredictions))

# Neural Net
table.append(["Neural net"]+ calcMetrics(Ytest, neuralRadiationPredictions))

# Neural Net Dropout
table.append(["Neural net dropout"]+ calcMetrics(Ytest, BneuralRadiationPredictions))

# Gaussian Process Regressor
table.append(["Gaussian Process Regressor"]+ calcMetrics(Ytest, predGPR))

# Bayesian Regressor
table.append(["Bayesian Regressor"]+ calcMetrics(Ytest, predBReg))

# Empirical Bayesian Regressor
table.append(["Emp Bayesian Regressor"]+ calcMetrics(Ytest, predBEmpReg))

# EM  Regressor
table.append(["EM"]+ calcMetrics(Ytest, predEM))

# VB Regressor
table.append(["VB"]+ calcMetrics(Ytest, predVB))

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

### Linear Models

In [None]:
table = [];
tableColumns = ['Model', 'Mean Absolute Error', 'Mean Squared Error', 'R2 Score', 'Mean Absolute Percentage Error', 'Max Error']
table.append(tableColumns)

# Linear regression
table.append(["Linear Regression"]+ calcMetrics(Ytest, linRegRadiationPredictions))

# Ridge regression
table.append(["Ridge Regression"]+ calcMetrics(Ytest, ridgeRadiationPredictions))

# Bayesian  Ridge regression
table.append(["Bayes Ridge Regression"]+ calcMetrics(Ytest, bRidgeRadiationPredictions))

# Lasso regression
table.append(["Lasso Regression"]+ calcMetrics(Ytest, lassoRadiationPredictions))

# Bayesian Regressor
table.append(["Bayesian Regressor"]+ calcMetrics(Ytest, predBReg))

# Empirical Bayesian Regressor
table.append(["Emp Bayesian Regressor"]+ calcMetrics(Ytest, predBEmpReg))

# EM  Regressor
table.append(["EM"]+ calcMetrics(Ytest, predEM))

# VB Regressor
table.append(["VB"]+ calcMetrics(Ytest, predVB))

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

### Tree based Models

In [None]:
table = [];
tableColumns = ['Model', 'Mean Absolute Error', 'Mean Squared Error', 'R2 Score', 'Mean Absolute Percentage Error', 'Max Error']
table.append(tableColumns)

# Decision tree
table.append(["Decision Tree"]+calcMetrics(Ytest, decTreeRadiationPredictions))

# Random Forest
table.append(["Random Forest"]+calcMetrics(Ytest, rndForestRadiationPredictions))

# Extra Trees
table.append(["Extra Trees"]+calcMetrics(Ytest, xTreeRadiationPredictions))

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

plotPrediction(Ytest, xTreeRadiationPredictions)

In [None]:
table = [];
tableColumns = ['Model', 'Mean Absolute Error', 'Mean Squared Error', 'R2 Score', 'Mean Absolute Percentage Error', 'Max Error']
table.append(tableColumns)

# Neural Net
table.append(["Neural net"]+ calcMetrics(Ytest, neuralRadiationPredictions))

# Neural Net Dropout
table.append(["Neural net dropout"]+ calcMetrics(Ytest, BneuralRadiationPredictions))

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

# Data Analysis

In [None]:
plt.figure(figsize=(20,10))
seaborn.lineplot(x=data["Temperature"], y=data["Radiation"])


In [None]:
## Average radiation over hours
radiationPerHour = data.groupby([pd.Grouper(key='Hour')])['Radiation']

minRadiationPerHour = radiationPerHour.min()
maxRadiationPerHour = radiationPerHour.max()
meanRadiationPerHour = radiationPerHour.mean()
stdRadiationPerHour = radiationPerHour.std()


In [None]:
## Average temperature over hours
temperaturePerHour = data.groupby([pd.Grouper(key='Hour')])['Temperature']

minTemperaturePerHour = temperaturePerHour.min()
maxTemperaturePerHour = temperaturePerHour.max()
meanTemperaturePerHour = temperaturePerHour.mean()
stdTemperaturePerHour = temperaturePerHour.std()

timeAxis = minTemperaturePerHour.index;
plt.figure(figsize=(20,5))
plt.subplot(1,2,1)
plt.plot(timeAxis, meanRadiationPerHour, label = "Mean")
plt.fill_between(timeAxis, minRadiationPerHour, maxRadiationPerHour, alpha=0.25, label="Min - Max")
plt.fill_between(timeAxis, meanRadiationPerHour-stdRadiationPerHour, meanRadiationPerHour+stdRadiationPerHour, alpha=0.5, label="STD")
plt.legend()
plt.title("Radiation over Time")
plt.xlabel("Time of day [Hours]")
plt.ylabel("Radiation")

plt.subplot(1,2,2)
plt.plot(timeAxis, meanTemperaturePerHour, label = "Mean")
plt.fill_between(timeAxis, minTemperaturePerHour, maxTemperaturePerHour, alpha=0.25, label="Min - Max")
plt.fill_between(timeAxis, meanTemperaturePerHour-stdTemperaturePerHour, meanTemperaturePerHour+stdTemperaturePerHour, alpha=0.5, label="STD")
plt.title("Temperature over Time")
plt.xlabel("Time of day [Hours]")
plt.ylabel("Temperature")

plt.legend()

In [None]:
plt.figure(figsize=(10,5))
plt.plot(meanRadiationPerHour, color='blue')
plt.title("Radiation and temperature throughout the day")
plt.ylabel("Radiation", color='blue')
plt.xlabel("Time of day [Hours]")
plt.twinx()
plt.plot(meanTemperaturePerHour, color='red')
plt.ylabel("Temperature", color='red')

    