# Boilerplate

In [1]:
%load_ext autoreload
%autoreload

import pandas as pd 
import matplotlib.pyplot as plt 
import numpy as np
import seaborn as sns
import scipy.stats as stats
import statsmodels.stats.api as sms
import json
import optuna

from IPython.display import display
from IPython.display import Markdown as md
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

def f():
    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_rows', None)
    
def nf():
    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_rows', 4)
nf()
import warnings
warnings.filterwarnings('ignore')

def dp(df, r = 5, c = None):
    from IPython.display import display
    with pd.option_context('display.max_rows', 4, 'display.max_columns', None):
        display(df)

def fg(w = 6, h = 4, dpi = 120):
    plt.rcParams['figure.figsize'] = (w,h)
    plt.rcParams['figure.dpi'] = dpi
fg()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from libs.pipeline import Pipe
import lightgbm as lgb

In [3]:
df = pd.read_csv("data/UCI_Credit_Card.csv")

df.columns = df.columns.str.lower()

df = df.drop('id', axis = 1)

df = df.rename(columns = {'default.payment.next.month':'target'})

df.to_parquet('data/df_to_feature_selection.parquet', index = False)

In [4]:
from sklearn.model_selection import KFold

target_var = 'target'
X = df.drop(target_var, axis = 1)
y = df[target_var]

kf = KFold(n_splits=3, shuffle=True, random_state=0)
train_scores = []
test_scores = []
for train_index, test_index in kf.split(X):
    X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
    y_train , y_test = y[train_index] , y[test_index]

# Optuna

In [8]:
from libs.tuning_cv import Tuning

In [9]:
tuning = Tuning(df,
                target_var = 'target', 
                n_trials = 2,
                metric='f1_score',
                direction = 'maximize')

In [10]:
tuning.exec()

[32m[I 2023-03-01 08:28:37,963][0m A new study created in memory with name: no-name-95467fa9-b2ef-4596-80d8-b94771d06142[0m


Cross validation - 1
Calculating optuna final objective metrics
[1 1 0 ... 0 1 0]
final first cross validation
Cross validation - 2
Calculating optuna final objective metrics
[0 0 0 ... 1 1 1]
final first cross validation
Cross validation - 3


[32m[I 2023-03-01 08:28:39,603][0m Trial 0 finished with value: 0.429889111041941 and parameters: {'boosting': 'dart', 'n_estimators': 110, 'learning_rate': 0.06999999999999999, 'max_depth': 7, 'num_leaves': 20, 'lambda_l1': 0, 'lambda_l2': 10, 'bagging_fraction': 0.2, 'bagging_freq': 6, 'feature_fraction': 0.4}. Best is trial 0 with value: 0.429889111041941.[0m


Calculating optuna final objective metrics
[1 1 0 ... 1 1 1]
final first cross validation
Cross validation - 1
Calculating optuna final objective metrics
[1 1 0 ... 0 1 0]
final first cross validation
Cross validation - 2
Calculating optuna final objective metrics
[0 0 0 ... 1 1 1]
final first cross validation
Cross validation - 3


[32m[I 2023-03-01 08:28:41,003][0m Trial 1 finished with value: 0.4106941180257782 and parameters: {'boosting': 'dart', 'n_estimators': 150, 'learning_rate': 0.06999999999999999, 'max_depth': 3, 'num_leaves': 35, 'lambda_l1': 40, 'lambda_l2': 80, 'bagging_fraction': 0.5, 'bagging_freq': 11, 'feature_fraction': 0.5}. Best is trial 0 with value: 0.429889111041941.[0m


Calculating optuna final objective metrics
[1 1 0 ... 1 1 1]
final first cross validation


<optuna.study.study.Study at 0x7fbfc8434190>

In [13]:
bestparams = tuning.study.best_params

bestparams

{'boosting': 'dart',
 'n_estimators': 110,
 'learning_rate': 0.06999999999999999,
 'max_depth': 7,
 'num_leaves': 20,
 'lambda_l1': 0,
 'lambda_l2': 10,
 'bagging_fraction': 0.2,
 'bagging_freq': 6,
 'feature_fraction': 0.4}

In [None]:
baseparams = ma.model.get_params()

In [None]:
best = lgb.LGBMClassifier().set_params(**bestparams)

mbest = Pipe(df, target_var = 'target', model = best)

mbest.popin.scores

mbest.oos.scores

In [None]:
base = lgb.LGBMClassifier().set_params(**baseparams)

mbase = Pipe(df, target_var = 'target', model = base)

mbase.popin.scores

mbase.oos.scores

In [None]:
optuna.visualization.plot_param_importances(tuning.study)

In [None]:
optuna.visualization.plot_optimization_history(tuning.study)

In [None]:
optuna.visualization.plot_intermediate_values(tuning.study)

In [None]:
optuna.visualization.plot_slice(tuning.study)

In [None]:
optuna.visualization.plot_contour(tuning.study)

In [None]:
optuna.visualization.plot_parallel_coordinate(tuning.study)