# Smart Planning : appreciation model (v1) : all models At once
 
### Details 

- Problem : `Regression`

- dataset : 'dataset3'

- Models : 
    1. scikit Linear Regression Model 
    2. Decision tree
    3. SVM
    4. Random forest 
    5. Stochastic Gradient Descent
    6. Ridge_Regression
    7. KNeighbors_Regression 
    8. Bayesian_Ridge_Regression
    9. Gaussian_Process_Regression 
    10. ExtraTreesRegressor
    
- label : numeric in the interval [0,10]


- Values in percentage to avoid the problem related to a specefic number of classrooms, teachers ...


<br>

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model, metrics, tree, svm, model_selection
import math
import seaborn as sn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel


%matplotlib inline

# Get the dataset

In [4]:
df = pd.read_excel("dataset3.xlsx")
df.head()

Unnamed: 0,SC_simultanite_Perc,salles_utilisees_Perc,salles_surutiliseess_Perc,places_videss_Perc,seances_samedis_Perc,Smidis_Perc,IntvDep8Hs_Perc,InterDepDisp6s_Perc,GrpDep8Hs_Perc,GrpDepDisp6s_Perc,SHDispoIntrv_Nbr,SDepDIntvs_Perc,SHPDIntvs_Perc,Label
0,75,16,86,0,28,32,32,68,14,93,1,74,68,3
1,11,35,80,0,76,60,24,68,76,32,18,49,84,2
2,36,95,20,80,80,50,76,72,1,0,26,28,3,3
3,98,60,36,80,84,60,81,50,40,70,15,51,76,0
4,60,0,52,88,26,88,76,31,40,32,8,51,28,2


<br>

# Insights about the dataset

> To get insight : [click here](./DataSetStudy.ipynb)

<br>

# Splitting the dataset

In [5]:

# few data 
trainning_set = df.sample(frac=0.75, random_state=25) 
test_set = df.drop(trainning_set.index)

# Calculate the Z-scores of each column in the training set:
trainning_set_mean = trainning_set.mean()
trainning_set_std = trainning_set.std()
trainning_set = (trainning_set - trainning_set_mean)/trainning_set_std

# Calculate the Z-scores of each column in the testing set:
test_set_mean = test_set.mean()
test_set_std = test_set.std()
test_set = (test_set - test_set_mean)/test_set_std

print("# DataSet shape : ", df.shape)
print("# trainning_set shape : ", trainning_set.shape)
print("# test_set shape  20% : ", test_set.shape)


X_train = trainning_set.iloc[:,0:13]
Y_train = trainning_set[['Label']]


X_test = test_set.iloc[:,0:13]
Y_test = test_set[['Label']]


# DataSet shape :  (1082, 14)
# trainning_set shape :  (812, 14)
# test_set shape  20% :  (270, 14)


<br>

# Create all models


In [None]:


kernel = DotProduct() + WhiteKernel()

dfs=[]

models = [
    ('Simple_Linear_Reg', linear_model.LinearRegression()),
    ('Decision_Tree',  tree.DecisionTreeRegressor() ),
    ('SVM_Regression',  svm.SVR( kernel='rbf') ), # kernel type parameter
    ('Random_Forest_Reg',  RandomForestRegressor( n_estimators=13, n_jobs=15 ) ), 
    ('Stochastic_Gradient_Descent',  linear_model.SGDRegressor( alpha=.00000000000000000001 )), # this value should be so small so the algorithme fits well 
    ('KNeighbors_Regression',  KNeighborsRegressor( n_neighbors=11) ), 
    ('Bayesian_Ridge_Regression',   linear_model.BayesianRidge() ), 
    ('Gaussian_Process_Regression',   GaussianProcessRegressor( kernel=kernel, random_state=0) ), 
    ('Ridge_Regression',   linear_model.Ridge(alpha=.5) ),
    ('ExtraTreesRegressor',   ExtraTreesRegressor( n_estimators=13, n_jobs=15 ) ),
]


results = []
names = []
target_names = [ i for i in range(10) ] # targets : appreciation 0->10
final = None

for name, model in models:
    kfold = model_selection.KFold(n_splits=2, shuffle=True, random_state=90210)
    cv_results = model_selection.cross_validate(model, X_train, Y_train.values.ravel(), cv=kfold)
    clf = model.fit( X_train, Y_train.values.ravel() )
    y_pred = clf.predict( X_test)
    results.append(cv_results)
    names.append(name)
    this_df = pd.DataFrame(cv_results)

    this_df['test_m_score'] = round(model.score(X_test, Y_test), 2)
    this_df['test_mae'] = round(metrics.mean_absolute_error(Y_test, y_pred), 4)
    this_df['test_mse']  = round(metrics.mean_squared_error(Y_test, y_pred), 4)
    this_df['model'] = name
    dfs.append(this_df)
final = pd.concat(dfs, ignore_index=True)



In [None]:
final