In [1]:
import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.datasets import load_diabetes

from sklearn.ensemble import RandomForestRegressor

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_absolute_error

from sklearn.neighbors import KNeighborsRegressor

from sklearn.preprocessing import StandardScaler

from sklearn.svm import SVR

from sklearn.tree import DecisionTreeRegressor



from ELM import ELMRegressor


In [2]:
df=pd.read_csv('final_learning.csv',header=None)
dataset=df.values
X,y=dataset[:,0:3],dataset[:,3]

In [3]:
evaluation = pd.DataFrame()

In [4]:
test_maes_dictionary = dict()
np.random.seed(0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2)



stdScaler_data = StandardScaler()

X_train = stdScaler_data.fit_transform(X_train)

X_test = stdScaler_data.transform(X_test)



stdScaler_target = StandardScaler()

y_train = np.expand_dims(y_train,1) # /max(y_train)

y_test = np.expand_dims(y_test,1) # /max(y_train)

#max_y_train = max(abs(y_train))

#y_train = y_train / max_y_train

#y_test = y_test / max_y_train

In [None]:
## ELM TRAINING

MAE_TRAIN_MINS = []

MAE_TEST_MINS = []

n=0

for M in range(1, 200, 1):

    MAES_TRAIN = []

    MAES_TEST = []

    # print "Training with %s neurons..."%M

    for i in range(30):

        ELM = ELMRegressor(M)

        ELM.fit(X_train, y_train)

        prediction = ELM.predict(X_train)

        MAES_TRAIN.append(mean_absolute_error(y_train,

                                              prediction))



        prediction = ELM.predict(X_test)
        evaluation["ELM_prediction"+str(n)]=np.reshape(prediction,prediction.shape[0],1)
        evaluation["ELM_real"+str(n)]=np.reshape(y_test,prediction.shape[0],1)
        MAES_TEST.append(mean_absolute_error(y_test,

                                             prediction))

    MAE_TEST_MINS.append(min(MAES_TEST))

    MAE_TRAIN_MINS.append(MAES_TRAIN[np.argmin(MAES_TEST)])
    n+=1


print("Minimum MAE ELM =", min(MAE_TEST_MINS))

test_maes_dictionary["ELM"] = min(MAE_TEST_MINS)



## LINEAR REGRESSION TRAINING

mae = []

lr = LinearRegression()

lr.fit(X_train, y_train)

prediction = lr.predict(X_test)
evaluation["Linear_prediction"]=np.reshape(prediction,prediction.shape[0],1)
evaluation["Linear_real"]=np.reshape(y_test,prediction.shape[0],1)
mae.append(mean_absolute_error(y_test, prediction))

print("Minimum MAE LR =", min(mae))

test_maes_dictionary["LinReg"] = min(mae)



## K-NEAREST NEIGHBORS TRAINING

mae = []
n=0
for N in range(1, 51):

    kn = KNeighborsRegressor()

    kn.fit(X_train, y_train)

    prediction = kn.predict(X_test)
    evaluation["KNN_prediction"+str(n)]=np.reshape(prediction,prediction.shape[0],1)
    evaluation["KNN_real"+str(n)]=np.reshape(y_test,prediction.shape[0],1)
    mae.append(mean_absolute_error(y_test, prediction))
    n+=1
print("Minimum MAE KNN =", min(mae))

test_maes_dictionary["KNN"] = min(mae)



## DECISION TREES TRAINING

mae = []
n=0
for max_depth in range(1, 51):

    for min_samples_split in range(5, 102, 5):

        tree = DecisionTreeRegressor(max_depth=max_depth, min_samples_split=min_samples_split)

        tree.fit(X_train, y_train)

        prediction = tree.predict(X_test)
        evaluation["DecisionTree_prediction"+str(n)]=np.reshape(prediction,prediction.shape[0],1)
        evaluation["DecisionTree_real"+str(n)]=np.reshape(y_test,prediction.shape[0],1)
        mae.append(mean_absolute_error(y_test, prediction))
        n+=1
print("Minimum MAE TREE = ", min(mae))

test_maes_dictionary["Dec. Tree"] = min(mae)



## SUPPORT VECTORS MACHINE TRAINING

mae = []
n=0
for kernel in ["rbf", "linear", "poly", "sigmoid"]:

    svr = SVR(kernel=kernel)

    svr.fit(X_train, y_train)

    prediction = svr.predict(X_test)
    evaluation["SVR_prediction"+str(n)]=np.reshape(prediction,prediction.shape[0],1)
    evaluation["SVR_real"+str(n)]=np.reshape(y_test,prediction.shape[0],1)
    mae.append(mean_absolute_error(y_test, prediction))
    n+=1
print("Minimum MAE SVR = ", min(mae))

test_maes_dictionary["SVM"] = min(mae)



## RANDOM FOREST TRAINING

mae = []
n=0
for n_estimators in range(10, 1100, 100):

    rf = RandomForestRegressor(n_estimators=n_estimators)

    rf.fit(X_train, y_train)

    prediction = rf.predict(X_test)
    evaluation["RandomForest_prediction"+str(n)]=np.reshape(prediction,prediction.shape[0],1)
    evaluation["RandomForest_real"+str(n)]=np.reshape(y_test,prediction.shape[0],1)
    mae.append(mean_absolute_error(y_test, prediction))
    n+=1
print("Minimum MAE R.Forest = ", min(mae))

test_maes_dictionary["R. Forest"] = min(mae)



In [None]:
## PLOTTING THE RESULTS

df = pd.DataFrame()

df["test"] = MAE_TEST_MINS

df["train"] = MAE_TRAIN_MINS



ax = df.plot(figsize=(16, 7))

ax.set_xlabel("Number of Neurons in the hidden layer")

ax.set_ylabel("Mean Absolute Error")

ax.set_title(

    "Extreme Learning Machine error obtained for the Diabetes dataset \n when varying the number of neurons in the "

    "hidden layer (min. at 23 neurons)")

plt.show()



plt.figure(figsize=(16, 7))

D = test_maes_dictionary

plt.bar(range(len(D)), D.values(), align='center')

plt.xticks(range(len(D)), D.keys())

plt.ylabel("Mean Absolute Error")

plt.title("Error Comparison between Classic Regression Models and ELM")

plt.show()

In [None]:
data=pd.DataFrame(evaluation)
data.to_csv("comparison.csv")