In [None]:
!pip install catboost

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting catboost
  Downloading catboost-1.0.6-cp37-none-manylinux1_x86_64.whl (76.6 MB)
[K     |████████████████████████████████| 76.6 MB 78 kB/s 
Installing collected packages: catboost
Successfully installed catboost-1.0.6


In [None]:
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.model_selection import GridSearchCV, cross_validate, RandomizedSearchCV, validation_curve

from catboost import CatBoostClassifier
pd.set_option("display.max_columns", None)
warnings.simplefilter(action="ignore", category = Warning)

In [None]:
df = pd.read_csv("diabetes.csv")
y = df["Outcome"]
X = df.drop(["Outcome"], axis=1)

In [None]:
catboost_model = CatBoostClassifier(random_state = 17, verbose = False)

In [None]:
def cv_func_result(model, X, y, cv=10, scoring=["accuracy", "f1", "roc_auc"],returns=False):
  cv_results = cross_validate(model, X=X, y=y, cv=cv, scoring=scoring)
  for col in scoring:
    if col == "accuracy":
      print("Accuracy test mean = {0}".format(cv_results["test_accuracy"].mean()))
    elif col == "f1":
      print("F1 test mean = {0}".format(cv_results["test_f1"].mean()))
    elif col == "roc_auc":
      print("ROC_AUC test mean = {0}".format(cv_results["test_roc_auc"].mean()))
  if returns:
    return cv_results

cv_func_result(catboost_model, X, y, cv=5)

Accuracy test mean = 0.7735251676428148
F1 test mean = 0.6502723851348231
ROC_AUC test mean = 0.8378923829489867


#Hiperparametre optimizasyonu yapmadan önceki hatalarımız :
* Accuracy test mean = 0.7735251676428148
* F1 test mean = 0.6502723851348231
* ROC_AUC test mean = 0.8378923829489867

In [None]:
catboost_params = {"learning_rate": [0.1, 0.01],
                  "depth": [3, 6],
                  "iterations": [200, 500]}

def model_best_params(model, params, cv=5, n_jobs=-1, verbose =True, random_state =17):
  model_best_grid = GridSearchCV(model, params, cv=cv, n_jobs = n_jobs, verbose = verbose).fit(X,y)
  for x in model_best_grid.best_params_.keys():
    print('For {0} best param {1}'.format(x, model_best_grid.best_params_[x]))
  
  print("Set with the best parameters of your model...")
  model_final = model.set_params(**model_best_grid.best_params_, random_state = random_state).fit(X,y)
  return model_final

catboost_final = model_best_params(catboost_model, catboost_params)

Fitting 5 folds for each of 8 candidates, totalling 40 fits
For depth best param 3
For iterations best param 500
For learning_rate best param 0.01
Set with the best parameters of your model...


In [None]:
cv_func_result(catboost_final, X, y, cv=5)

Accuracy test mean = 0.7721755368814192
F1 test mean = 0.6322580676028952
ROC_AUC test mean = 0.842001397624039


#Hiperparametre optimizasyonundan sonra hatalarımız:
* Accuracy test mean = 0.7721755368814192
* F1 test mean = 0.6322580676028952
* ROC_AUC test mean =  0.842001397624039