# Smart Planning : appreciation model (v1) 
 
### Details 

- Problem : `Regression`


- Models :
    1. scikit Linear Regression Model 
    2. Decision tree
    3. SVM
    4. Random forest 
    5. Stochastic Gradient Descent
    6. Ridge_Regression
    7. KNeighbors_Regression 
    8. Bayesian_Ridge_Regression
    9. Gaussian_Process_Regression 
    10. ExtraTreesRegressor
    
    
- label : numeric in the interval [0,10]


- Values in percentage to avoid the problem related to a specefic number of classrooms, teachers ...


<br>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model, metrics, tree, svm, model_selection
import math
import seaborn as sn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel

%matplotlib inline

# Get the dataset

In [2]:
df = pd.read_excel("dataset2.xlsx")
df.head()

KeyboardInterrupt: 

<br>

# Insights about the dataset

> To get insight : [click here](./DataSetStudy.ipynb)

<br>

# Splitting the dataset

In [None]:

# few data 
trainning_set = df.sample(frac=0.75, random_state=25) 
test_set = df.drop(trainning_set.index)

# Calculate the Z-scores of each column in the training set:
trainning_set_mean = trainning_set.mean()
trainning_set_std = trainning_set.std()
trainning_set = (trainning_set - trainning_set_mean)/trainning_set_std

# Calculate the Z-scores of each column in the testing set:
test_set_mean = test_set.mean()
test_set_std = test_set.std()
test_set = (test_set - test_set_mean)/test_set_std

print("# DataSet shape : ", df.shape)
print("# trainning_set shape : ", trainning_set.shape)
print("# test_set shape  20% : ", test_set.shape)


X = trainning_set.iloc[:,0:13]
Y = trainning_set[['Label']]


X_test = test_set.iloc[:,0:13]
Y_test = test_set[['Label']]


<br>

# 1- Linear Regression


In [None]:

# create & train model
linearReg_model = linear_model.LinearRegression()
linearReg_model.fit( X, Y )

# print model parameters
print("# coef_ : \n", linearReg_model.coef_ )
print("\n # intercept_ : ", linearReg_model.intercept_ )

X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = linearReg_model.predict(X_test)
  
# Calculate metrics using test set
linearReg_score = round(linearReg_model.score(X_test, Y_test), 2)
linearReg_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
linearReg_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)

## testing set with model predictions

In [None]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["LR_model_predictions"] = linearReg_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()


<br>
<br>

# 2-  Decision tree

In [None]:

# create & train model
DT_model = tree.DecisionTreeRegressor()
DT_model.fit( X, Y )

# print model parameters
print("# depth : \n", DT_model.get_depth() )
print("\n # ccp_alpha : ", DT_model.ccp_alpha )

X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = DT_model.predict(X_test)
  
# Calculate metrics using test set
DT_score = round(DT_model.score(X_test, Y_test), 2)
DT_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
DT_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)


## testing set with model predictions

In [None]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["DT_model_predictions"] = DT_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()


<br>
<br>

# 3-  SVM Resgression 

In [None]:

# create & train model
SVM_model = svm.SVR( kernel='rbf')
SVM_model.fit( X, Y )

# print model parameters
print("# coef0 : \n", SVM_model.coef0 ) 
#print("\n # coef_ : ", SVM_model.coef_ ) #only if using a linear kernel 
print("\n # degree : ", SVM_model.degree )

X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = SVM_model.predict(X_test)
  
# Calculate metrics using test set
SVM_score = round(SVM_model.score(X_test, Y_test), 2)
SVM_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
SVM_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)


## testing set with model predictions

In [None]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["SVM_model_predictions"] = SVM_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()

<br>
<br>

# 4-  Random Forest regression

In [None]:

# create & train model
RFR_model = RandomForestRegressor( n_estimators=2000, n_jobs=30000 )
RFR_model.fit( X, Y.values.ravel() )

# print model parameters
print("# n_estimators : \n", RFR_model.n_estimators ) 
print("\n # feature_importances_ : ", RFR_model.feature_importances_ )

X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = RFR_model.predict(X_test)
  
# Calculate metrics using test set
RFR_score = round(RFR_model.score(X_test, Y_test), 2)
RFR_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
RFR_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)


## testing set with model predictions

In [None]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["SVM_model_predictions"] = RFR_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()

<br>
<br>

# 5-  Stochastic Gradient Descent

In [None]:

# create & train model
SGD_model = linear_model.SGDRegressor( alpha=.00000000000000000001 ) # this value should be so small so the algorithme fits well
SGD_model.fit( X, Y )

# print model parameters
print("# alpha : \n", SGD_model.alpha ) 


X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = SGD_model.predict(X_test)
  
# Calculate metrics using test set
SGD_score = round(SGD_model.score(X_test, Y_test), 2)
SGD_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
SGD_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)


## testing set with model predictions

In [None]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["SVM_model_predictions"] = SGD_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()

<br>
<br>

# 6-  Ridge Regression
   

In [None]:

# create & train model
RidgeReg_model = linear_model.Ridge( alpha=5.9 ) # hyperparam : the greater it converges
RidgeReg_model.fit( X, Y )

# print model parameters
print("# coef_ : \n", RidgeReg_model.coef_ ) 
print("# intercept_ : \n", RidgeReg_model.intercept_ ) 


X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = RidgeReg_model.predict(X_test)
  
# Calculate metrics using test set
RidgeReg_score = round(RidgeReg_model.score(X_test, Y_test), 2)
RidgeReg_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
RidgeReg_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)


## testing set with model predictions

In [None]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["Ridge_model_predictions"] = RidgeReg_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()

<br>
<br>

# 7- KNeighbors_Regression 
    
   

In [None]:

# create & train model
KNeighbors_model = KNeighborsRegressor( n_neighbors=11) # hyperparam
KNeighbors_model.fit( X, Y )

# print model parameters
print("# n_features_in_ : \n", KNeighbors_model.n_features_in_ ) 
print("# radius : \n", KNeighbors_model.radius ) 
print("# params : \n", KNeighbors_model.get_params() ) 


X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = KNeighbors_model.predict(X_test)
  
# Calculate metrics using test set
KNeighbors_score = round(KNeighbors_model.score(X_test, Y_test), 2)
KNeighbors_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
KNeighbors_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)


## testing set with model predictions

In [None]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["Kneighbors_model_predictions"] = KNeighbors_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()

<br>
<br>

# 8- Bayesian_Ridge_Regression
    
   

In [None]:

# create & train model
BayesianRidge_model = linear_model.BayesianRidge()
BayesianRidge_model.fit( X, Y )

# print model parameters
print("# n_features_in_ : \n", BayesianRidge_model.n_features_in_ ) 
print("# radius : \n", KNeighbors_model.radius ) 
print("# params : \n", KNeighbors_model.get_params() ) 


X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = BayesianRidge_model.predict(X_test)
  
# Calculate metrics using test set
BayesianRidge_score = round(BayesianRidge_model.score(X_test, Y_test), 2)
BayesianRidge_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
BayesianRidge_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)


## testing set with model predictions

In [None]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["BayesianRidge_model_predictions"] = BayesianRidge_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()

<br>
<br>

# 9-  Gaussian_Process_Regression 
    
   

In [None]:

kernel = DotProduct() + WhiteKernel()

# create & train model
GPR_model =  GaussianProcessRegressor( kernel=kernel, random_state=0)
GPR_model.fit( X, Y )

# print model parameters
print("# n_features_in_ : \n", GPR_model.n_features_in_ ) 
print("# alpha : \n", GPR_model.alpha ) 
print("# kernel : \n", GPR_model.kernel ) 
print("# params : \n", GPR_model.get_params() ) 


X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = GPR_model.predict(X_test)
  
# Calculate metrics using test set
GPR_score = round(GPR_model.score(X_test, Y_test), 2)
GPR_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
GPR_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)


## testing set with model predictions

In [None]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["GPR_model_predictions"] = GPR_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()

<br>
<br>

# 10- ExtraTreesRegressor
    
   

In [None]:

# create & train model
EXTRG_model =  ExtraTreesRegressor( n_estimators=2000, n_jobs=30000 )
EXTRG_model.fit( X, Y )

# print model parameters
print("# n_estimators : \n", EXTRG_model.n_estimators ) 
print("\n # feature_importances_ : ", EXTRG_model.feature_importances_ )

X_test = test_set.iloc[:,0:13]
Y_test =  test_set[['Label']]
Y_pred = EXTRG_model.predict(X_test)
  
# Calculate metrics using test set
EXTRG_score = round(EXTRG_model.score(X_test, Y_test), 2)
EXTRG_mae = round(metrics.mean_absolute_error(Y_test, Y_pred), 4)
EXTRG_mse = round(metrics.mean_squared_error(Y_test, Y_pred), 4)


In [None]:

testing_set_with_predictions = test_set.copy()
testing_set_with_predictions["EXTRG_model_predictions"] = EXTRG_model.predict(test_set.iloc[:,0:13])
testing_set_with_predictions.head()

<br>
<hr>
<br>

# Results & Comparaison

In [None]:
pd.DataFrame({  
        'Model' : [ 
            'Linear Regression Model',  
            'Decision Tree',
            'SVM',
            'Random Forest Reg',
            'Stochastic Gradient Descent',
            'Ridge_Regression',
            'KNeighborsRegressor',
            "BayesianRidge",
            'GaussianProcessRegressor',
            'ExtraTreesRegressor',
        ],
        'Score' : [ 
            linearReg_score,
            DT_score,
            SVM_score,
            RFR_score,
            SGD_score,
            RidgeReg_score,
            KNeighbors_score,
            BayesianRidge_score,
            GPR_score,
            EXTRG_score,
        ],
        'MAE'  : [ 
            linearReg_mae,
            DT_mae,
            SVM_mae,
            RFR_mae,
            SGD_mae,
            RidgeReg_mae,
            KNeighbors_mae,
            BayesianRidge_mae,
            GPR_mae,
            EXTRG_mae,
        ],
        'MSE'  : [ 
            linearReg_mse,
            DT_mse,
            SVM_mse,
            RFR_mse,
            SGD_mse,
            RidgeReg_mse,
            KNeighbors_mse,
            BayesianRidge_mse,
            GPR_mse,
            EXTRG_mse,
        ]
    })

<br>

## Manual testing

In [None]:
seance_en_simultanite_perc = 0
salles_utilisees_perc = 82
salles_surutiliseess_Perc = 10
places_videss_Perc = 10
seances_samedis_Perc = 0
Smidis_Perc = 0
IntvDep8Hs_Perc = 0 
InterDepDisp6s_Perc = 0 
GrpDep8Hs_Perc = 0 
GrpDepDisp6s_Perc = 0
SHDispoIntrv_Nbr = 0 
SDepDIntvs_Perc = 0
SHPDIntvs_Perc = 0

manual_testing_var = [ seance_en_simultanite_perc, salles_utilisees_perc, salles_surutiliseess_Perc, places_videss_Perc, seances_samedis_Perc, 
Smidis_Perc, IntvDep8Hs_Perc, InterDepDisp6s_Perc, GrpDep8Hs_Perc, GrpDepDisp6s_Perc, SHDispoIntrv_Nbr, SDepDIntvs_Perc, SHPDIntvs_Perc]

In [None]:

pd.DataFrame({  
        'Model' : [ 
            'Linear Regression Model',  
            'Decision Tree',
            'SVM',
            'Random Forest Reg',
            'Stochastic Gradient Descent',
            'Ridge_Regression',
            'KNeighborsRegressor',
            'BayesianRidge',
            'ExtraTreesRegressor'
        ],
        'Prediction' : [ 
            linearReg_model.predict([ manual_testing_var ])[0],
            DT_model.predict([ manual_testing_var ])[0],
            SVM_model.predict([ manual_testing_var ])[0],
            RFR_model.predict([ manual_testing_var ])[0],
            SGD_model.predict([ manual_testing_var ])[0],
            RidgeReg_model.predict([ manual_testing_var ])[0],
            KNeighbors_model.predict([ manual_testing_var ])[0],
            BayesianRidge_model.predict([ manual_testing_var ])[0],
            EXTRG_model.predict([ manual_testing_var ])[0],
        ]})
