In [33]:
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt      
from sklearn import preprocessing, linear_model, model_selection
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn import linear_model, metrics, preprocessing
from sklearn.metrics import r2_score ,mean_absolute_error
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV , RandomizedSearchCV ,train_test_split
from catboost import CatBoostRegressor
from sklearn.feature_selection import SelectKBest, f_regression
from ipynb.fs.full.set_up import data_prep


In [34]:
def tune_model(X_train_scaled, y_train):
    params = {'iterations': [1000, 1500],
                           'learning_rate': [0.01,0.05],
                           'depth': [8,10],
                            'l2_leaf_reg': [3,9],
                           }
    grid_search_cv = GridSearchCV(CatBoostRegressor(logging_level='Silent'), params, n_jobs=-1, verbose=1, cv=5)
    grid_search_cv.fit(X_train_scaled, y_train)
    best_params = grid_search_cv.best_params_
    best_score = grid_search_cv.best_score_
    print(best_params)
    
    cv_results = grid_search_cv.cv_results_      
    df = pd.DataFrame(cv_results)
    
    return best_params, best_score , df


def return_best_model(X_train_scaled, y_train, best_params):
    reg = CatBoostRegressor(**best_params , logging_level='Silent')
    reg.fit(X_train_scaled, y_train)
    y_pred = reg.predict(X_test_scaled)
    return reg, y_pred 

 
def evaluate_model(y_test, y_pred):
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    return mae, r2

In [35]:
#1.Prep the Model
X_train_scaled,  X_test_scaled  ,y_train, y_test , X_train, X_test = data_prep()

#2.Tune Model
best_params, best_score ,df_2 = tune_model(X_train_scaled, y_train)

#3. Test the model
forest, y_pred = return_best_model(X_train_scaled, y_train, best_params)

#4.evaluate the model
mae, r2 = evaluate_model(y_test, y_pred)

print("Best Parameters:", best_params)
print("Best Score:", best_score)
print("MAE:", mae) 
print("R2:", r2)

(58945, 20)
(35715, 12)
Fitting 5 folds for each of 16 candidates, totalling 80 fits
{'depth': 8, 'iterations': 1500, 'l2_leaf_reg': 3, 'learning_rate': 0.05}
Best Parameters: {'depth': 8, 'iterations': 1500, 'l2_leaf_reg': 3, 'learning_rate': 0.05}
Best Score: 0.7820667154224821
MAE: 315044.11380542687
R2: 0.7719588020079527


In [None]:
# (58945, 20)
# (35715, 12)
# Fitting 5 folds for each of 16 candidates, totalling 80 fits
# {'depth': 8, 'iterations': 1500, 'l2_leaf_reg': 3, 'learning_rate': 0.05}
# Best Parameters: {'depth': 8, 'iterations': 1500, 'l2_leaf_reg': 3, 'learning_rate': 0.05}
# Best Score: 0.7820667154224821
# MAE: 315044.11380542687
# R2: 0.7719588020079527