### Model Training

In [14]:
#basic import 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns

#modelling 
from sklearn.metrics import mean_squared_error,r2_score,mean_absolute_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.model_selection import RandomizedSearchCV
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
import warnings

In [3]:
df=pd.read_csv('data/stud.csv')

In [4]:
df.head()

Unnamed: 0,gender,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,math_score,reading_score,writing_score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [7]:
X=df.drop(columns=['math_score'],axis=1)

In [6]:
y=df['math_score']

In [None]:
#create Columns transformer with 3types of transformer
num_features = X.select_dtypes(exclude='object').columns
cat_features = X.select_dtypes(include='object').columns

from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer

numeric_transformer=StandardScaler()
oh_transformer=OneHotEncoder()


preprocessor = ColumnTransformer(
    [
        ('OneHotEncoder',oh_transformer,cat_features),
        ('StandardScaler',numeric_transformer,num_features),
    ]
)
''' first one hot encoder should happen then standard scaler should happen and 
the job lies with the column transformer to combine them, the is preprocessor 
will be use to perform any kind of fit or transform in any kind of datasets'''

In [11]:
X= preprocessor.fit_transform(X)

In [12]:
X.shape

(1000, 19)

In [13]:
#seprate datset into train test split 
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
X_train.shape,X_test.shape

((800, 19), (200, 19))

Create and evaluate funciton to give all metrics to model training


In [15]:
def evaluate_model(true,predicted):
    mae = mean_absolute_error(true,predicted)
    mse=mean_squared_error(true,predicted)
    rmse = np.sqrt(mean_squared_error(true,predicted))
    r2_square=r2_score(true,predicted)
    return mae, rmse,r2_square

In [None]:
models = {
    'Linear Regression': LinearRegression(),
    "Lasso":Lasso(),
    "Ridge":Ridge(),
    "K-Neighbors Regressor":KNeighborsRegressor(),
    "Decision Tree":DecisionTreeRegressor(),
    "Random forest Regressor":RandomForestRegressor(),
    "XGBRegressor":XGBRegressor(),
    "Catboosting Regressor":CatBoostRegressor(verbose=False),
    "Ada boost regressor":AdaBoostRegressor()

}
model_list = []
r2_list=[]

for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train,y_train)#train model

    #make predicitions 
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    #evaluate Train and test dataset 
    model_train_mse, model_train_rmse, model_train_r2 = evaluate_model(y_train,y_train_pred)

    model_test_mae = model_test_rmse, model_test_r2 = evaluate_model(y_test,y_test_pred)

    print(list(models.keys())[i])
    model_list.append(list(model.keys())[i])

    print('Model performance for training set')
    print("- Root Mean Squared Error:{:,4f})".format(model_train_rmse))
    print("- Mean Absolute Error:{:,4f}".format(model_test_mae))
    print("-R2 score: {:4f}".format(model_test_r2))
    r2_list.append(model_test_r2)

    print('='*35)
    print('\n')
          