In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

df_train = pd.read_csv('data/train.csv')
df_test = pd.read_csv('data/test.csv')
df_aug = pd.read_csv('data/bank-full.csv', sep=';')

df_aug['y'] = df_aug['y'].map({'yes':1, 'no':0})
df_aug.columns = df_train.columns.drop('id')

df_train_ids = df_train['id']
df_test_ids = df_test['id']
df_train_y = df_train['y']
df_aug_y = df_aug['y']

#drop unnecessary columns before column trasformation
df_train = df_train.drop(columns=['id', 'y'])
df_test = df_test.drop(columns=['id'])
df_aug = df_aug.drop(columns=['y'])

df_all = pd.concat([df_train, df_test, df_aug], ignore_index=True)
categorical_cols = df_all.select_dtypes(include=['object']).columns.tolist()

df_all_transformed = pd.get_dummies(df_all, columns=categorical_cols, dummy_na=False)

df_train_transformed = df_all_transformed.iloc[:len(df_train)]
df_test_transformed = df_all_transformed.iloc[len(df_train):len(df_train)+len(df_test)]
df_aug_transformed = df_all_transformed.iloc[len(df_train)+len(df_test):]

x_all = pd.concat([df_train_transformed, df_aug_transformed])
y_all = pd.concat([df_train_y, df_aug_y])

x_train, x_test, y_train, y_test = train_test_split(x_all, y_all, test_size=0.2, random_state=42)

In [None]:
import numpy as np
from sklearn.model_selection import RepeatedKFold
from sklearn.metrics import roc_auc_score
import lightgbm as lgb
from xgboost import XGBClassifier

def classifier_func(X, y, XX, model_lg, model_xg):

    n_splits = 7
    n_repeats = 2
    predict = np.zeros(len(XX))
    oof_all = np.zeros(len(X))

    rkf = RepeatedKFold(n_splits=n_splits , n_repeats=n_repeats, random_state=424)
    
    for fold, (train_idx, valid_idx) in enumerate(rkf.split(X, y)):  
        X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
        X_valid, y_valid = X.iloc[valid_idx], y.iloc[valid_idx]   

        print(f'\n:::::::::::::::::: Fold ~ {fold+1} :::::::::::::::::::')
        model_lg.fit(X_train, y_train,
                     eval_set= [(X_valid, y_valid)])

        model_xg.fit(X_train, y_train, 
                     eval_set= [(X_valid, y_valid)],     
                     verbose= 0) 

    # ...............................................................................
        oof1 = model_lg.predict_proba(X_valid)[:,1]
        oof2 = model_xg.predict_proba(X_valid)[:,1]

        oof = (oof1 * 0.5) + (oof2 * 0.5)
        oof_all[valid_idx] += oof

        score_fold = roc_auc_score(y_valid, oof)
        print('LightGBM & XGBoost')
        print('AUC | Score Fold:', round(score_fold, 4))

    # ...............................................................................
        prd1 = model_lg.predict_proba(XX)[:,1]
        prd2 = model_xg.predict_proba(XX)[:,1]
        
        prd = (prd1 * 0.5) + (prd2 * 0.5)
        predict += prd
    
    # ...............................................................................
    oof_all = oof_all / n_repeats
    score = roc_auc_score(y, oof_all)
    
    print('\n', '='* 40)
    print(' .'* 20)
    print('\tAUC Score:', round(score, 4))
    print(' .'* 20)
    print('='* 40, '\n')
    
    # ...............................................................................
    roc_auc_score(y, oof_all)
    predict = predict / rkf.get_n_splits(X, y)  
    
    print('Number of predictions:', len(predict))
    print('predict:', predict)


    return predict
    



In [None]:
params_lg1 = {    
    
    'device'              :'gpu', 
    'objective'           :'binary',
    'metric'              :'auc', 
    'learning_rate'       : 0.03, 
    'num_leaves'          : 230,
    'max_depth'           : 6,
    'subsample'           : 0.8, 
    'colsample_bytree'    : 0.6,
    'min_child_samples'   : 83,
    #'max_bin'             : 3600,
    'reg_alpha'           : 3.7, 
    'reg_lambda'          : 4.7, 
    'verbose'             : -1, 
    'random_state'        : 420, 
       
}

# ...........................................................................................
params_xg1 = {    
    
    'device'              :'cuda',
    'eval_metric'         :'auc',
    'learning_rate'       : 0.01,   
    'max_depth'           : 9,
    'subsample'           : 0.8,
    'min_child_weight'    : 26,
    'enable_categorical'  : True,
    'random_state'        : 420,     
    
}

# .....................................................................................................
model_lg = lgb.LGBMClassifier(**params_lg1, n_estimators=10000, early_stopping_rounds=150)
model_xg = XGBClassifier(**params_xg1, n_estimators=10000, early_stopping_rounds=150)

predict = classifier_func(x_train, y, y_train, model_lg, model_xg)