In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb

from collections import OrderedDict
from sklearn.metrics import roc_auc_score
from tqdm import tqdm
from copy import deepcopy

from autowoe import ReportDeco, AutoWoE

In [33]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
train.shape

(1120472, 25)

In [47]:
cat_col = [col for col in train.drop('loan_status', axis=1).columns if train[col].dtype == 'int64']
num_col = [col for col in train.columns if train[col].dtype == 'float64']

In [48]:
features = cat_col + num_col

In [49]:
num_feature_type = {x: "real" for x in num_col}
cat_feature_type = {x: "cat" for x in cat_col}

In [50]:
features_type = dict(**num_feature_type, **cat_feature_type)

In [51]:
auto_woe = AutoWoE(interpreted_model=True, monotonic=False, max_bin_count=5, select_type=None, 
                   pearson_th=0.9, auc_th=.505, vif_th=10., imp_th=0, th_const=32, 
                   force_single_split=True, 
                   th_nan=0.01, th_cat=0.005, woe_diff_th=0.01, 
                   min_bin_size=0.01, min_bin_mults=(2, 4), min_gains_to_split=(0.0, 0.5, 1.0), 
                   auc_tol=1e-4, cat_alpha=100, 
                   cat_merge_to="to_woe_0", 
                   nan_merge_to="to_woe_0", 
                   oof_woe=True, n_folds=6, n_jobs=6, l1_grid_size=20, l1_exp_scale=6, 
                   imp_type="feature_imp", regularized_refit=False, p_val=0.05, debug=False, 
                   verbose=0
        )

auto_woe = ReportDeco(auto_woe)

In [52]:
auto_woe.fit(train[features + ["loan_status"]], 
             target_name="loan_status",
             features_type=features_type,
             group_kf=None,
#              validation=test
            )

In [54]:
pred = auto_woe.predict_proba(test)
auc = roc_auc_score(test['loan_status'], pred)

In [55]:
print("AutoWoE auc: {}".format(auc))

AutoWoE auc: 0.6987453913282243


In [57]:
report_params = {"output_path": "./AutoWoE_REPORT_1",  #  папка, куда сгенерится отчет и сложатся нужные файлы
                 "report_name": "pilot_report",
                 "report_version_id": 1,
                 "city": "Москва",
                 "model_aim": "___Проверка работы___",
                 "model_name": "___AutoWoE___",
                 "zakazchik": "___Dmitry___",
                 "high_level_department": "___sber___",
                 "ds_name": "___Super_Stepan___",
                 "target_descr": "___ОПИСАНИЕ ЦЕЛЕВОГО СОБЫТИЯ___",
                 "non_target_descr": "___ОПИСАНИЕ НЕЦЕЛЕВОГО СОБЫТИЯ___"}

auto_woe.generate_report(report_params)

No handles with labels found to put in legend.
No handles with labels found to put in legend.
No handles with labels found to put in legend.
No handles with labels found to put in legend.
No handles with labels found to put in legend.
No handles with labels found to put in legend.
No handles with labels found to put in legend.
No handles with labels found to put in legend.
No handles with labels found to put in legend.
No handles with labels found to put in legend.
No handles with labels found to put in legend.
No handles with labels found to put in legend.
No handles with labels found to put in legend.
