In [None]:
!pip install lightgbm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.model_selection import GridSearchCV, cross_validate, RandomizedSearchCV, validation_curve

from lightgbm import LGBMClassifier
pd.set_option("display.max_columns", None)
warnings.simplefilter(action="ignore", category = Warning)

In [None]:
df = pd.read_csv("diabetes.csv")
y = df["Outcome"]
X = df.drop(["Outcome"], axis=1)

In [None]:
lgbm_model = LGBMClassifier(random_state = 17)
lgbm_model.get_params()

{'boosting_type': 'gbdt',
 'class_weight': None,
 'colsample_bytree': 1.0,
 'importance_type': 'split',
 'learning_rate': 0.1,
 'max_depth': -1,
 'min_child_samples': 20,
 'min_child_weight': 0.001,
 'min_split_gain': 0.0,
 'n_estimators': 100,
 'n_jobs': -1,
 'num_leaves': 31,
 'objective': None,
 'random_state': 17,
 'reg_alpha': 0.0,
 'reg_lambda': 0.0,
 'silent': True,
 'subsample': 1.0,
 'subsample_for_bin': 200000,
 'subsample_freq': 0}

In [None]:
def cv_func_result(model, X, y, cv=10, scoring=["accuracy", "f1", "roc_auc"],returns=False):
  cv_results = cross_validate(model, X=X, y=y, cv=cv, scoring=scoring)
  for col in scoring:
    if col == "accuracy":
      print("Accuracy test mean = {0}".format(cv_results["test_accuracy"].mean()))
    elif col == "f1":
      print("F1 test mean = {0}".format(cv_results["test_f1"].mean()))
    elif col == "roc_auc":
      print("ROC_AUC test mean = {0}".format(cv_results["test_roc_auc"].mean()))
  if returns:
    return cv_results

cv_func_result(lgbm_model, X, y, cv=5)

Accuracy test mean = 0.7487479840421016
F1 test mean = 0.6191041399705154
ROC_AUC test mean = 0.8028567435359889


#Hiperparametre optimizasyonu yapmadan önceki hatalarımız :
* Accuracy test mean = 0.7487479840421016
* F1 test mean = 0.6191041399705154
* ROC_AUC test mean = 0.8028567435359889

In [None]:
lgbm_params = {"learning_rate": [0.1, 0.01],
                  "n_estimators": [100, 300, 500, 1000],
                  "colsample_bytree": [ 0.5, 0.7, 1]}

def model_best_params(model, params, cv=5, n_jobs=-1, verbose =True, random_state =17):
  model_best_grid = GridSearchCV(model, params, cv=cv, n_jobs = n_jobs, verbose = verbose).fit(X,y)
  for x in model_best_grid.best_params_.keys():
    print('For {0} best param {1}'.format(x, model_best_grid.best_params_[x]))
  
  print("Set with the best parameters of your model...")
  model_final = model.set_params(**model_best_grid.best_params_, random_state = random_state).fit(X,y)
  return model_final

lgbm_final = model_best_params(lgbm_model, lgbm_params)

Fitting 5 folds for each of 24 candidates, totalling 120 fits
For colsample_bytree best param 0.7
For learning_rate best param 0.01
For n_estimators best param 300
Set with the best parameters of your model...


In [None]:
cv_func_result(lgbm_final, X, y, cv=5)

Accuracy test mean = 0.7656650539003481
F1 test mean = 0.6235412536580761
ROC_AUC test mean = 0.8215443745632426


#İlk hiperparametre optimizasyonundan sonra hatalarımız:
* Accuracy test mean = 0.7656650539003481
* F1 test mean = 0.6235412536580761
* ROC_AUC test mean =  0.8215443745632426