# Smart Planning : appreciation model (v1) 
 
### Details 

- Problem : `Regression`


- Models :
    1. scikit Linear Regression Model 
    2. Decision tree
    3. SVM
    4. Random forest 
    5. Stochastic Gradient Descent
    6. Ridge_Regression
    7. KNeighbors_Regression 
    8. Bayesian_Ridge_Regression
    9. Gaussian_Process_Regression 
    10. ExtraTreesRegressor
    
    
- label : numeric in the interval [0,10]


- Values in percentage to avoid the problem related to a specefic number of classrooms, teachers ...


<br>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model, metrics, tree, svm, model_selection
import math
import seaborn as sn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel

%matplotlib inline

# Get the dataset

In [2]:
df = pd.read_excel("dataset2.xlsx")
df.head()

Unnamed: 0,SC_simultanite_Perc,salles_utilisees_Perc,salles_surutiliseess_Perc,places_videss_Perc,seances_samedis_Perc,Smidis_Perc,IntvDep8Hs_Perc,InterDepDisp6s_Perc,GrpDep8Hs_Perc,GrpDepDisp6s_Perc,SHDispoIntrv_Nbr,SDepDIntvs_Perc,SHPDIntvs_Perc,Label
0,4,0,0,0,2,4,6,2,5,20,1,1,0,9
1,13,10,20,25,20,2,25,6,2,8,10,30,10,7
2,17,100,100,28,0,6,26,6,3,22,0,20,33,4
3,16,70,30,0,1,2,26,10,3,0,4,5,3,8
4,17,12,20,3,0,3,26,8,2,0,22,1,3,10


<br>

# Insights about the dataset

> To get insight : [click here](./DataSetStudy.ipynb)

<br>

# Splitting the dataset

In [3]:

# few data 
trainning_set = df.sample(frac=0.75, random_state=25) 
test_set = df.drop(trainning_set.index)

# Calculate the Z-scores of each column in the training set:
trainning_set_mean = trainning_set.mean()
trainning_set_std = trainning_set.std()
trainning_set = (trainning_set - trainning_set_mean)/trainning_set_std

# Calculate the Z-scores of each column in the testing set:
test_set_mean = test_set.mean()
test_set_std = test_set.std()
test_set = (test_set - test_set_mean)/test_set_std

print("# DataSet shape : ", df.shape)
print("# trainning_set shape : ", trainning_set.shape)
print("# test_set shape  20% : ", test_set.shape)


X = trainning_set.iloc[:,0:13]
Y = trainning_set[['Label']]


X_test = test_set.iloc[:,0:13]
Y_test = test_set[['Label']]


# DataSet shape :  (521, 14)
# trainning_set shape :  (391, 14)
# test_set shape  20% :  (130, 14)


<br>

# 1- Linear Regression


In [4]:

# create & train model
linearReg_model = linear_model.LinearRegression()
linearReg_model.fit( X, Y )

# print model parameters
print("# coef_ : \n", linearReg_model.coef_ )
print("\n # intercept_ : ", linearReg_model.intercept_ )

X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = linearReg_model.predict(X_test)
  
# Calculate metrics using test set
linearReg_score = round(linearReg_model.score(X_test, Y_test), 2)
linearReg_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
linearReg_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)

# coef_ : 
 [[-0.752921    0.01396509 -0.10081216 -0.06990918  0.01112606 -0.048899
  -0.05659528 -0.08966947 -0.04108855 -0.08768478 -0.01990842 -0.01196463
  -0.05212794]]

 # intercept_ :  [-7.22031242e-18]


## testing set with model predictions

In [5]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["LR_model_predictions"] = linearReg_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()


Unnamed: 0,SC_simultanite_Perc,salles_utilisees_Perc,salles_surutiliseess_Perc,places_videss_Perc,seances_samedis_Perc,Smidis_Perc,IntvDep8Hs_Perc,InterDepDisp6s_Perc,GrpDep8Hs_Perc,GrpDepDisp6s_Perc,SHDispoIntrv_Nbr,SDepDIntvs_Perc,SHPDIntvs_Perc,Label,LR_model_predictions
2,-0.984591,1.560553,1.867317,-0.629803,-1.67285,-1.410658,-0.564044,-1.607169,-1.304567,-1.075984,-1.739833,-0.950942,-0.506888,0.428692,1.065686
5,-1.519616,0.576534,-1.462147,-1.602429,-1.67285,-1.608236,-1.434976,-1.811771,-1.402617,-1.899897,-1.739833,-1.612733,-1.661108,2.73476,2.080078
14,1.62759,1.560553,1.867317,1.523871,1.677489,1.684734,1.914762,1.598251,1.865714,1.845161,0.00537,1.696221,1.836528,-1.108686,-2.168421
17,-1.204896,0.576534,-0.762959,-1.359273,-0.935776,0.894421,-1.033008,-1.300267,-0.977734,-1.375589,-1.507139,-1.11639,-0.891628,1.197381,1.458726
22,-0.984591,0.806138,-1.1292,-1.150853,-1.67285,-1.608236,-0.698034,-1.197967,-0.912367,-1.375589,-0.227324,-1.248748,-1.311345,2.350415,1.399755


<br>
<br>

# 2-  Decision tree

In [6]:

# create & train model
DT_model = tree.DecisionTreeRegressor()
DT_model.fit( X, Y )

# print model parameters
print("# depth : \n", DT_model.get_depth() )
print("\n # ccp_alpha : ", DT_model.ccp_alpha )

X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = DT_model.predict(X_test)
  
# Calculate metrics using test set
DT_score = round(DT_model.score(X_test, Y_test), 2)
DT_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
DT_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)


# depth : 
 17

 # ccp_alpha :  0.0


## testing set with model predictions

In [7]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["DT_model_predictions"] = DT_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()


Unnamed: 0,SC_simultanite_Perc,salles_utilisees_Perc,salles_surutiliseess_Perc,places_videss_Perc,seances_samedis_Perc,Smidis_Perc,IntvDep8Hs_Perc,InterDepDisp6s_Perc,GrpDep8Hs_Perc,GrpDepDisp6s_Perc,SHDispoIntrv_Nbr,SDepDIntvs_Perc,SHPDIntvs_Perc,Label,DT_model_predictions
2,-0.984591,1.560553,1.867317,-0.629803,-1.67285,-1.410658,-0.564044,-1.607169,-1.304567,-1.075984,-1.739833,-0.950942,-0.506888,0.428692,1.441139
5,-1.519616,0.576534,-1.462147,-1.602429,-1.67285,-1.608236,-1.434976,-1.811771,-1.402617,-1.899897,-1.739833,-1.612733,-1.661108,2.73476,2.14948
14,1.62759,1.560553,1.867317,1.523871,1.677489,1.684734,1.914762,1.598251,1.865714,1.845161,0.00537,1.696221,1.836528,-1.108686,-1.038055
17,-1.204896,0.576534,-0.762959,-1.359273,-0.935776,0.894421,-1.033008,-1.300267,-0.977734,-1.375589,-1.507139,-1.11639,-0.891628,1.197381,1.441139
22,-0.984591,0.806138,-1.1292,-1.150853,-1.67285,-1.608236,-0.698034,-1.197967,-0.912367,-1.375589,-0.227324,-1.248748,-1.311345,2.350415,2.503651


<br>
<br>

# 3-  SVM Resgression 

In [8]:

# create & train model
SVM_model = svm.SVR( kernel='rbf')
SVM_model.fit( X, Y )

# print model parameters
print("# coef0 : \n", SVM_model.coef0 ) 
#print("\n # coef_ : ", SVM_model.coef_ ) #only if using a linear kernel 
print("\n # degree : ", SVM_model.degree )

X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = SVM_model.predict(X_test)
  
# Calculate metrics using test set
SVM_score = round(SVM_model.score(X_test, Y_test), 2)
SVM_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
SVM_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)


# coef0 : 
 0.0

 # degree :  3


  return f(*args, **kwargs)


## testing set with model predictions

In [9]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["SVM_model_predictions"] = SVM_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()

Unnamed: 0,SC_simultanite_Perc,salles_utilisees_Perc,salles_surutiliseess_Perc,places_videss_Perc,seances_samedis_Perc,Smidis_Perc,IntvDep8Hs_Perc,InterDepDisp6s_Perc,GrpDep8Hs_Perc,GrpDepDisp6s_Perc,SHDispoIntrv_Nbr,SDepDIntvs_Perc,SHPDIntvs_Perc,Label,SVM_model_predictions
2,-0.984591,1.560553,1.867317,-0.629803,-1.67285,-1.410658,-0.564044,-1.607169,-1.304567,-1.075984,-1.739833,-0.950942,-0.506888,0.428692,0.974874
5,-1.519616,0.576534,-1.462147,-1.602429,-1.67285,-1.608236,-1.434976,-1.811771,-1.402617,-1.899897,-1.739833,-1.612733,-1.661108,2.73476,2.204365
14,1.62759,1.560553,1.867317,1.523871,1.677489,1.684734,1.914762,1.598251,1.865714,1.845161,0.00537,1.696221,1.836528,-1.108686,-0.464484
17,-1.204896,0.576534,-0.762959,-1.359273,-0.935776,0.894421,-1.033008,-1.300267,-0.977734,-1.375589,-1.507139,-1.11639,-0.891628,1.197381,1.817307
22,-0.984591,0.806138,-1.1292,-1.150853,-1.67285,-1.608236,-0.698034,-1.197967,-0.912367,-1.375589,-0.227324,-1.248748,-1.311345,2.350415,1.957809


<br>
<br>

# 4-  Random Forest regression

In [10]:

# create & train model
RFR_model = RandomForestRegressor( n_estimators=2000, n_jobs=30000 )
RFR_model.fit( X, Y.values.ravel() )

# print model parameters
print("# n_estimators : \n", RFR_model.n_estimators ) 
print("\n # feature_importances_ : ", RFR_model.feature_importances_ )

X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = RFR_model.predict(X_test)
  
# Calculate metrics using test set
RFR_score = round(RFR_model.score(X_test, Y_test), 2)
RFR_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
RFR_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)


# n_estimators : 
 2000

 # feature_importances_ :  [0.67819434 0.01737052 0.02136351 0.03396607 0.01569316 0.03941286
 0.03657935 0.03174374 0.02465919 0.01991584 0.02107929 0.0225442
 0.03747794]


## testing set with model predictions

In [11]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["SVM_model_predictions"] = RFR_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()

Unnamed: 0,SC_simultanite_Perc,salles_utilisees_Perc,salles_surutiliseess_Perc,places_videss_Perc,seances_samedis_Perc,Smidis_Perc,IntvDep8Hs_Perc,InterDepDisp6s_Perc,GrpDep8Hs_Perc,GrpDepDisp6s_Perc,SHDispoIntrv_Nbr,SDepDIntvs_Perc,SHPDIntvs_Perc,Label,SVM_model_predictions
2,-0.984591,1.560553,1.867317,-0.629803,-1.67285,-1.410658,-0.564044,-1.607169,-1.304567,-1.075984,-1.739833,-0.950942,-0.506888,0.428692,1.123094
5,-1.519616,0.576534,-1.462147,-1.602429,-1.67285,-1.608236,-1.434976,-1.811771,-1.402617,-1.899897,-1.739833,-1.612733,-1.661108,2.73476,2.274325
14,1.62759,1.560553,1.867317,1.523871,1.677489,1.684734,1.914762,1.598251,1.865714,1.845161,0.00537,1.696221,1.836528,-1.108686,-0.96545
17,-1.204896,0.576534,-0.762959,-1.359273,-0.935776,0.894421,-1.033008,-1.300267,-0.977734,-1.375589,-1.507139,-1.11639,-0.891628,1.197381,1.33719
22,-0.984591,0.806138,-1.1292,-1.150853,-1.67285,-1.608236,-0.698034,-1.197967,-0.912367,-1.375589,-0.227324,-1.248748,-1.311345,2.350415,1.935384


<br>
<br>

# 5-  Stochastic Gradient Descent

In [12]:

# create & train model
SGD_model = linear_model.SGDRegressor( alpha=.00000000000000000001 ) # this value should be so small so the algorithme fits well
SGD_model.fit( X, Y )

# print model parameters
print("# alpha : \n", SGD_model.alpha ) 


X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = SGD_model.predict(X_test)
  
# Calculate metrics using test set
SGD_score = round(SGD_model.score(X_test, Y_test), 2)
SGD_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
SGD_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)


# alpha : 
 1e-20


  return f(*args, **kwargs)


## testing set with model predictions

In [13]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["SVM_model_predictions"] = SGD_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()

Unnamed: 0,SC_simultanite_Perc,salles_utilisees_Perc,salles_surutiliseess_Perc,places_videss_Perc,seances_samedis_Perc,Smidis_Perc,IntvDep8Hs_Perc,InterDepDisp6s_Perc,GrpDep8Hs_Perc,GrpDepDisp6s_Perc,SHDispoIntrv_Nbr,SDepDIntvs_Perc,SHPDIntvs_Perc,Label,SVM_model_predictions
2,-0.984591,1.560553,1.867317,-0.629803,-1.67285,-1.410658,-0.564044,-1.607169,-1.304567,-1.075984,-1.739833,-0.950942,-0.506888,0.428692,1.051311
5,-1.519616,0.576534,-1.462147,-1.602429,-1.67285,-1.608236,-1.434976,-1.811771,-1.402617,-1.899897,-1.739833,-1.612733,-1.661108,2.73476,2.05605
14,1.62759,1.560553,1.867317,1.523871,1.677489,1.684734,1.914762,1.598251,1.865714,1.845161,0.00537,1.696221,1.836528,-1.108686,-2.132881
17,-1.204896,0.576534,-0.762959,-1.359273,-0.935776,0.894421,-1.033008,-1.300267,-0.977734,-1.375589,-1.507139,-1.11639,-0.891628,1.197381,1.444557
22,-0.984591,0.806138,-1.1292,-1.150853,-1.67285,-1.608236,-0.698034,-1.197967,-0.912367,-1.375589,-0.227324,-1.248748,-1.311345,2.350415,1.384058


<br>
<br>

# 6-  Ridge Regression
   

In [14]:

# create & train model
RidgeReg_model = linear_model.Ridge( alpha=5.9 ) # hyperparam : the greater it converges
RidgeReg_model.fit( X, Y )

# print model parameters
print("# coef_ : \n", RidgeReg_model.coef_ ) 
print("# intercept_ : \n", RidgeReg_model.intercept_ ) 


X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = RidgeReg_model.predict(X_test)
  
# Calculate metrics using test set
RidgeReg_score = round(RidgeReg_model.score(X_test, Y_test), 2)
RidgeReg_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
RidgeReg_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)


# coef_ : 
 [[-0.74116464  0.01460389 -0.09972246 -0.07097699  0.01019629 -0.04851451
  -0.05573323 -0.08943491 -0.0414293  -0.08698341 -0.02099658 -0.01345387
  -0.05227385]]
# intercept_ : 
 [-6.4089764e-18]


## testing set with model predictions

In [15]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["Ridge_model_predictions"] = RidgeReg_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()

Unnamed: 0,SC_simultanite_Perc,salles_utilisees_Perc,salles_surutiliseess_Perc,places_videss_Perc,seances_samedis_Perc,Smidis_Perc,IntvDep8Hs_Perc,InterDepDisp6s_Perc,GrpDep8Hs_Perc,GrpDepDisp6s_Perc,SHDispoIntrv_Nbr,SDepDIntvs_Perc,SHPDIntvs_Perc,Label,Ridge_model_predictions
2,-0.984591,1.560553,1.867317,-0.629803,-1.67285,-1.410658,-0.564044,-1.607169,-1.304567,-1.075984,-1.739833,-0.950942,-0.506888,0.428692,1.061038
5,-1.519616,0.576534,-1.462147,-1.602429,-1.67285,-1.608236,-1.434976,-1.811771,-1.402617,-1.899897,-1.739833,-1.612733,-1.661108,2.73476,2.065657
14,1.62759,1.560553,1.867317,1.523871,1.677489,1.684734,1.914762,1.598251,1.865714,1.845161,0.00537,1.696221,1.836528,-1.108686,-2.14891
17,-1.204896,0.576534,-0.762959,-1.359273,-0.935776,0.894421,-1.033008,-1.300267,-0.977734,-1.375589,-1.507139,-1.11639,-0.891628,1.197381,1.448369
22,-0.984591,0.806138,-1.1292,-1.150853,-1.67285,-1.608236,-0.698034,-1.197967,-0.912367,-1.375589,-0.227324,-1.248748,-1.311345,2.350415,1.390392


<br>
<br>

# 7- KNeighbors_Regression 
    
   

In [16]:

# create & train model
KNeighbors_model = KNeighborsRegressor( n_neighbors=11) # hyperparam
KNeighbors_model.fit( X, Y )

# print model parameters
print("# n_features_in_ : \n", KNeighbors_model.n_features_in_ ) 
print("# radius : \n", KNeighbors_model.radius ) 
print("# params : \n", KNeighbors_model.get_params() ) 


X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = KNeighbors_model.predict(X_test)
  
# Calculate metrics using test set
KNeighbors_score = round(KNeighbors_model.score(X_test, Y_test), 2)
KNeighbors_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
KNeighbors_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)


# n_features_in_ : 
 13
# radius : 
 None
# params : 
 {'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 11, 'p': 2, 'weights': 'uniform'}


## testing set with model predictions

In [17]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["Kneighbors_model_predictions"] = KNeighbors_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()

Unnamed: 0,SC_simultanite_Perc,salles_utilisees_Perc,salles_surutiliseess_Perc,places_videss_Perc,seances_samedis_Perc,Smidis_Perc,IntvDep8Hs_Perc,InterDepDisp6s_Perc,GrpDep8Hs_Perc,GrpDepDisp6s_Perc,SHDispoIntrv_Nbr,SDepDIntvs_Perc,SHPDIntvs_Perc,Label,Kneighbors_model_predictions
2,-0.984591,1.560553,1.867317,-0.629803,-1.67285,-1.410658,-0.564044,-1.607169,-1.304567,-1.075984,-1.739833,-0.950942,-0.506888,0.428692,1.569928
5,-1.519616,0.576534,-1.462147,-1.602429,-1.67285,-1.608236,-1.434976,-1.811771,-1.402617,-1.899897,-1.739833,-1.612733,-1.661108,2.73476,2.213875
14,1.62759,1.560553,1.867317,1.523871,1.677489,1.684734,1.914762,1.598251,1.865714,1.845161,0.00537,1.696221,1.836528,-1.108686,-0.587292
17,-1.204896,0.576534,-0.762959,-1.359273,-0.935776,0.894421,-1.033008,-1.300267,-0.977734,-1.375589,-1.507139,-1.11639,-0.891628,1.197381,1.956296
22,-0.984591,0.806138,-1.1292,-1.150853,-1.67285,-1.608236,-0.698034,-1.197967,-0.912367,-1.375589,-0.227324,-1.248748,-1.311345,2.350415,2.181678


<br>
<br>

# 8- Bayesian_Ridge_Regression
    
   

In [18]:

# create & train model
BayesianRidge_model = linear_model.BayesianRidge()
BayesianRidge_model.fit( X, Y )

# print model parameters
print("# n_features_in_ : \n", BayesianRidge_model.n_features_in_ ) 
print("# radius : \n", KNeighbors_model.radius ) 
print("# params : \n", KNeighbors_model.get_params() ) 


X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = BayesianRidge_model.predict(X_test)
  
# Calculate metrics using test set
BayesianRidge_score = round(BayesianRidge_model.score(X_test, Y_test), 2)
BayesianRidge_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
BayesianRidge_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)


# n_features_in_ : 
 13
# radius : 
 None
# params : 
 {'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 11, 'p': 2, 'weights': 'uniform'}


  return f(*args, **kwargs)


## testing set with model predictions

In [19]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["BayesianRidge_model_predictions"] = BayesianRidge_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()

Unnamed: 0,SC_simultanite_Perc,salles_utilisees_Perc,salles_surutiliseess_Perc,places_videss_Perc,seances_samedis_Perc,Smidis_Perc,IntvDep8Hs_Perc,InterDepDisp6s_Perc,GrpDep8Hs_Perc,GrpDepDisp6s_Perc,SHDispoIntrv_Nbr,SDepDIntvs_Perc,SHPDIntvs_Perc,Label,BayesianRidge_model_predictions
2,-0.984591,1.560553,1.867317,-0.629803,-1.67285,-1.410658,-0.564044,-1.607169,-1.304567,-1.075984,-1.739833,-0.950942,-0.506888,0.428692,1.060585
5,-1.519616,0.576534,-1.462147,-1.602429,-1.67285,-1.608236,-1.434976,-1.811771,-1.402617,-1.899897,-1.739833,-1.612733,-1.661108,2.73476,2.064271
14,1.62759,1.560553,1.867317,1.523871,1.677489,1.684734,1.914762,1.598251,1.865714,1.845161,0.00537,1.696221,1.836528,-1.108686,-2.147047
17,-1.204896,0.576534,-0.762959,-1.359273,-0.935776,0.894421,-1.033008,-1.300267,-0.977734,-1.375589,-1.507139,-1.11639,-0.891628,1.197381,1.447372
22,-0.984591,0.806138,-1.1292,-1.150853,-1.67285,-1.608236,-0.698034,-1.197967,-0.912367,-1.375589,-0.227324,-1.248748,-1.311345,2.350415,1.389492


<br>
<br>

# 9-  Gaussian_Process_Regression 
    
   

In [20]:

kernel = DotProduct() + WhiteKernel()

# create & train model
GPR_model =  GaussianProcessRegressor( kernel=kernel, random_state=0)
GPR_model.fit( X, Y )

# print model parameters
print("# n_features_in_ : \n", GPR_model.n_features_in_ ) 
print("# alpha : \n", GPR_model.alpha ) 
print("# kernel : \n", GPR_model.kernel ) 
print("# params : \n", GPR_model.get_params() ) 


X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = GPR_model.predict(X_test)
  
# Calculate metrics using test set
GPR_score = round(GPR_model.score(X_test, Y_test), 2)
GPR_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
GPR_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)


# n_features_in_ : 
 13
# alpha : 
 1e-10
# kernel : 
 DotProduct(sigma_0=1) + WhiteKernel(noise_level=1)
# params : 
 {'alpha': 1e-10, 'copy_X_train': True, 'kernel__k1': DotProduct(sigma_0=1), 'kernel__k2': WhiteKernel(noise_level=1), 'kernel__k1__sigma_0': 1.0, 'kernel__k1__sigma_0_bounds': (1e-05, 100000.0), 'kernel__k2__noise_level': 1.0, 'kernel__k2__noise_level_bounds': (1e-05, 100000.0), 'kernel': DotProduct(sigma_0=1) + WhiteKernel(noise_level=1), 'n_restarts_optimizer': 0, 'normalize_y': False, 'optimizer': 'fmin_l_bfgs_b', 'random_state': 0}


## testing set with model predictions

In [21]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["GPR_model_predictions"] = GPR_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()

Unnamed: 0,SC_simultanite_Perc,salles_utilisees_Perc,salles_surutiliseess_Perc,places_videss_Perc,seances_samedis_Perc,Smidis_Perc,IntvDep8Hs_Perc,InterDepDisp6s_Perc,GrpDep8Hs_Perc,GrpDepDisp6s_Perc,SHDispoIntrv_Nbr,SDepDIntvs_Perc,SHPDIntvs_Perc,Label,GPR_model_predictions
2,-0.984591,1.560553,1.867317,-0.629803,-1.67285,-1.410658,-0.564044,-1.607169,-1.304567,-1.075984,-1.739833,-0.950942,-0.506888,0.428692,1.065452
5,-1.519616,0.576534,-1.462147,-1.602429,-1.67285,-1.608236,-1.434976,-1.811771,-1.402617,-1.899897,-1.739833,-1.612733,-1.661108,2.73476,2.079345
14,1.62759,1.560553,1.867317,1.523871,1.677489,1.684734,1.914762,1.598251,1.865714,1.845161,0.00537,1.696221,1.836528,-1.108686,-2.167423
17,-1.204896,0.576534,-0.762959,-1.359273,-0.935776,0.894421,-1.033008,-1.300267,-0.977734,-1.375589,-1.507139,-1.11639,-0.891628,1.197381,1.458201
22,-0.984591,0.806138,-1.1292,-1.150853,-1.67285,-1.608236,-0.698034,-1.197967,-0.912367,-1.375589,-0.227324,-1.248748,-1.311345,2.350415,1.399279


<br>
<br>

# 10- ExtraTreesRegressor
    
   

In [22]:

# create & train model
EXTRG_model =  ExtraTreesRegressor( n_estimators=2000, n_jobs=30000 )
EXTRG_model.fit( X, Y )

# print model parameters
print("# n_estimators : \n", EXTRG_model.n_estimators ) 
print("\n # feature_importances_ : ", EXTRG_model.feature_importances_ )

X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = EXTRG_model.predict(X_test)
  
# Calculate metrics using test set
EXTRG_score = round(EXTRG_model.score(X_test, Y_test), 2)
EXTRG_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
EXTRG_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)


  EXTRG_model.fit( X, Y )


# n_estimators : 
 2000

 # feature_importances_ :  [0.67619263 0.01892937 0.02852595 0.03951148 0.02101323 0.03277019
 0.02981882 0.03199156 0.02414965 0.0251621  0.02158186 0.02486896
 0.0254842 ]


In [23]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["EXTRG_model_predictions"] = EXTRG_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()

Unnamed: 0,SC_simultanite_Perc,salles_utilisees_Perc,salles_surutiliseess_Perc,places_videss_Perc,seances_samedis_Perc,Smidis_Perc,IntvDep8Hs_Perc,InterDepDisp6s_Perc,GrpDep8Hs_Perc,GrpDepDisp6s_Perc,SHDispoIntrv_Nbr,SDepDIntvs_Perc,SHPDIntvs_Perc,Label,EXTRG_model_predictions
2,-0.984591,1.560553,1.867317,-0.629803,-1.67285,-1.410658,-0.564044,-1.607169,-1.304567,-1.075984,-1.739833,-0.950942,-0.506888,0.428692,1.183126
5,-1.519616,0.576534,-1.462147,-1.602429,-1.67285,-1.608236,-1.434976,-1.811771,-1.402617,-1.899897,-1.739833,-1.612733,-1.661108,2.73476,2.306732
14,1.62759,1.560553,1.867317,1.523871,1.677489,1.684734,1.914762,1.598251,1.865714,1.845161,0.00537,1.696221,1.836528,-1.108686,-0.999096
17,-1.204896,0.576534,-0.762959,-1.359273,-0.935776,0.894421,-1.033008,-1.300267,-0.977734,-1.375589,-1.507139,-1.11639,-0.891628,1.197381,1.487004
22,-0.984591,0.806138,-1.1292,-1.150853,-1.67285,-1.608236,-0.698034,-1.197967,-0.912367,-1.375589,-0.227324,-1.248748,-1.311345,2.350415,2.006749


<br>
<hr>
<br>

# Results & Comparaison

In [24]:
pd.DataFrame({  
        'Model' : [ 
            'Linear Regression Model',  
            'Decision Tree',
            'SVM',
            'Random Forest Reg',
            'Stochastic Gradient Descent',
            'Ridge_Regression',
            'KNeighborsRegressor',
            "BayesianRidge",
            'GaussianProcessRegressor',
            'ExtraTreesRegressor',
        ],
        'Score' : [ 
            linearReg_score,
            DT_score,
            SVM_score,
            RFR_score,
            SGD_score,
            RidgeReg_score,
            KNeighbors_score,
            BayesianRidge_score,
            GPR_score,
            EXTRG_score,
        ],
        'MAE'  : [ 
            linearReg_mae,
            DT_mae,
            SVM_mae,
            RFR_mae,
            SGD_mae,
            RidgeReg_mae,
            KNeighbors_mae,
            BayesianRidge_mae,
            GPR_mae,
            EXTRG_mae,
        ],
        'MSE'  : [ 
            linearReg_mse,
            DT_mse,
            SVM_mse,
            RFR_mse,
            SGD_mse,
            RidgeReg_mse,
            KNeighbors_mse,
            BayesianRidge_mse,
            GPR_mse,
            EXTRG_mse,
        ]
    })

Unnamed: 0,Model,Score,MAE,MSE
0,Linear Regression Model,0.69,0.4148,0.3032
1,Decision Tree,0.46,0.5037,0.5396
2,SVM,0.64,0.4593,0.354
3,Random Forest Reg,0.71,0.3839,0.2878
4,Stochastic Gradient Descent,0.7,0.4152,0.3021
5,Ridge_Regression,0.69,0.417,0.3038
6,KNeighborsRegressor,0.47,0.59,0.5286
7,BayesianRidge,0.69,0.4172,0.3039
8,GaussianProcessRegressor,0.69,0.4148,0.3032
9,ExtraTreesRegressor,0.71,0.3832,0.2838


<br>

## Manual testing

In [31]:
seance_en_simultanite_perc = 0
salles_utilisees_perc = 82
salles_surutiliseess_Perc = 10
places_videss_Perc = 10
seances_samedis_Perc = 0
Smidis_Perc = 0
IntvDep8Hs_Perc = 0 
InterDepDisp6s_Perc = 0 
GrpDep8Hs_Perc = 0 
GrpDepDisp6s_Perc = 0
SHDispoIntrv_Nbr = 0 
SDepDIntvs_Perc = 0
SHPDIntvs_Perc = 0

manual_testing_var = [ seance_en_simultanite_perc, salles_utilisees_perc, salles_surutiliseess_Perc, places_videss_Perc, seances_samedis_Perc, 
Smidis_Perc, IntvDep8Hs_Perc, InterDepDisp6s_Perc, GrpDep8Hs_Perc, GrpDepDisp6s_Perc, SHDispoIntrv_Nbr, SDepDIntvs_Perc, SHPDIntvs_Perc]

In [32]:

pd.DataFrame({  
        'Model' : [ 
            'Linear Regression Model',  
            'Decision Tree',
            'SVM',
            'Random Forest Reg',
            'Stochastic Gradient Descent',
            'Ridge_Regression',
            'KNeighborsRegressor',
            'BayesianRidge',
            'ExtraTreesRegressor'
        ],
        'Prediction' : [ 
            linearReg_model.predict([ manual_testing_var ])[0],
            DT_model.predict([ manual_testing_var ])[0],
            SVM_model.predict([ manual_testing_var ])[0],
            RFR_model.predict([ manual_testing_var ])[0],
            SGD_model.predict([ manual_testing_var ])[0],
            RidgeReg_model.predict([ manual_testing_var ])[0],
            KNeighbors_model.predict([ manual_testing_var ])[0],
            BayesianRidge_model.predict([ manual_testing_var ])[0],
            EXTRG_model.predict([ manual_testing_var ])[0],
        ]})


Unnamed: 0,Model,Score
0,Linear Regression Model,[-0.5620757691209493]
1,Decision Tree,-0.683884
2,SVM,-0.145936
3,Random Forest Reg,-0.402496
4,Stochastic Gradient Descent,-0.212434
5,Ridge_Regression,[-0.5094758513277013]
6,KNeighborsRegressor,[-0.49070040691988676]
7,BayesianRidge,-0.504547
8,ExtraTreesRegressor,-0.369735
