# IEEE-CIS Fraud Detection -- Model [Adaboost]

In [1]:
import pandas as pd
import numpy as np
import gc
import warnings
warnings.filterwarnings('ignore')

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('classic')
sns.set_palette('deep')
sns.set_style('white')
%matplotlib inline

In [3]:
train_features = pd.read_csv('./Data/train_features.csv' )
test_features = pd.read_csv('./Data/test_features.csv')
train_target = pd.read_csv('./Data/train_target.csv', header=None)

## Model

In [8]:
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score, auc
from sklearn.ensemble import AdaBoostClassifier
from sklearn.externals import joblib

In [11]:
# ada params
ada_params = {
    'n_estimators': 100,
    'learning_rate': 0.8,
    'random_state': 0
    }

In [18]:
def model_fn(clf, params):
    print("############## New Run ################")
    print("PARAMETERS: ")
    print(f"params  = {params}")
    
    EPOCHS = 5
    kf = KFold(n_splits = EPOCHS, shuffle = True)
    y_oof = np.zeros(train_features.shape[0])
    score_mean = 0
    print("CV SCORE: ")
    for train_idx, validation_idx in kf.split(train_features, train_target):
        model = clf(**params)

        X_train, X_validation = train_features.iloc[train_idx, :], train_features.iloc[validation_idx, :]
        y_train, y_validation = train_target.iloc[train_idx], train_target.iloc[validation_idx]

        model.fit(X_train, y_train)

        y_pred_train = model.predict_proba(X_validation)[:,1]
        score = roc_auc_score(y_validation, y_pred_train)
        score_mean += score
        print(f'AUC: {score}')
    
    print(f'Mean AUC: {score_mean / EPOCHS} \n')
    return -(score_mean / EPOCHS)

In [19]:
model_fn(AdaBoostClassifier, ada_params)

############## New Run ################
PARAMETERS: 
params  = {'n_estimators': 100, 'learning_rate': 0.8, 'random_state': 0}
CV SCORE: 
AUC: 0.8773607268760287
AUC: 0.8749044535107535
AUC: 0.8859727954435082
AUC: 0.8773056258410896
AUC: 0.8790144924205275
Mean AUC: 0.8789116188183815 



-0.8789116188183815

### Gridsearch for best params

In [14]:
from hyperopt import fmin, hp, tpe, Trials, space_eval, STATUS_OK, STATUS_RUNNING

In [27]:
sample_submission = pd.read_csv('./Data/sample_submission.csv')

In [29]:
ada_space = {
    "n_estimators": hp.quniform('n_estimators', 10, 500, 10),
    'learning_rate': hp.uniform('learning_rate', 0.03, 0.2),
    'random_state': 0}

In [32]:
def ada_grid(params):
    print("############## New Run ################")
    print("PARAMETERS: ")
    print(f"params  = {params}")
    params = {
        "n_estimators": int(params['n_estimators']),
        'learning_rate': params['learning_rate'],
        'random_state': 0
    }
    
    EPOCHS = 5
    kf = KFold(n_splits = EPOCHS, shuffle = True)
    y_preds = np.zeros(sample_submission.shape[0])
    y_oof = np.zeros(train_features.shape[0])
    score_mean = 0
    print("CV SCORE: ")
    for tr_idx, val_idx in kf.split(train_features, train_target):
        clf = AdaBoostClassifier(
            **params,
        )

        X_tr, X_vl = train_features.iloc[tr_idx, :], train_features.iloc[val_idx, :]
        y_tr, y_vl = train_target.iloc[tr_idx], train_target.iloc[val_idx]
        clf.fit(X_tr, y_tr)
        y_pred_train = clf.predict_proba(X_vl)[:,1]
        y_oof[val_idx] = y_pred_train
        score = roc_auc_score(y_vl, y_pred_train)
        score_mean += score
        print(f'AUC: {score}')
        
    print(f'Mean AUC: {score_mean / EPOCHS} \n')
    joblib.dump(clf, "./Model/ada_model_cv.m")
    return -(score_mean / EPOCHS)

In [None]:
ada_best = fmin(fn=ada_grid,
            space=ada_space,
            algo=tpe.suggest,
            max_evals=20)

############## New Run ################             
PARAMETERS:                                         
params  = {'learning_rate': 0.09454924305674534, 'n_estimators': 120.0, 'random_state': 0}
CV SCORE:                                           
  0%|          | 0/20 [00:00<?, ?it/s, best loss: ?]

## Reference
- https://www.kaggle.com/smerllo/identify-unique-cards-id