In [None]:
import yaml
from crosspredict.nodes import model_fit, forward_selection, hyperopt_fit, onefactor, model_single_fit
import pandas as pd
import pandas as pd
import datetime
import json
import numpy as np
from itertools import combinations
from hyperopt import fmin, tpe, Trials, space_eval
from collections import defaultdict
pd.set_option('display.max_rows',100)
pd.set_option('display.max_columns',200)

from crosspredict.iterator import Iterator
from crosspredict.crossval import CrossLightgbmModel
from crosspredict.report_binary import ReportBinary

# Data load

In [None]:

df = pd.read_csv(f'data/data_train.csv',sep=';')
df_test = pd.read_csv(f'data/data_test.csv',sep=';')

df['SIGN_DATE'] = pd.to_datetime(df['SIGN_DATE'])
df['SIGN_DATE_mon'] = df['SIGN_DATE'].map(lambda x: x.replace(day=1))

with open('params.yaml') as f:
    params = yaml.load(f, Loader=yaml.FullLoader)

# Adversarial validation

In [None]:

def adversarial_make_target(train, adversarial_frac, col_adversarial, col_date, **kwargs):
    target_ind = train.sort_values(col_date)[-int(train.shape[0] * adversarial_frac):].index
    train[col_adversarial] = train.index.isin(target_ind).astype(int)
    return train
train = adversarial_make_target(df, **params)
params_adversarial = dict(params)
params_adversarial.update({'col_target':params['col_adversarial']})
params_adversarial['cols_exclude'].append('id_contract')
fig, shap_df, report, model_class = model_fit(df, **params_adversarial)

In [None]:
print(report)

# One factor analysis - unstable

In [None]:
report_onefactor = onefactor(train, shap_df, col_date=params['col_date_mon'], 
          col_target=params['col_target'], 
          report_directory='report', 
        output_file='README_onefactor_unstable.md',
          top_features=3)

In [None]:
params['cols_exclude'].append('DAYS_SIGN_DATE_AFTER_TAXREGPAY_REGDATE')

# Model fit on all data

In [None]:
fig, shap_df, report, model_class = model_fit(df, **params)

In [None]:
params_fs = dict(params)
params_fs['feature_name'] = shap_df['feature'][:10].values.tolist()

# Forward selection

In [None]:
scores_df, top_features = forward_selection(df, **params_fs)

In [None]:
scores_df

In [None]:
top_features = int(json.loads(top_feature÷s)['feature_selection'])

# Hyperopt

In [None]:
params_hp = dict(params_fs)
params_hp['feature_name']=scores_df['index'][:top_features].values.tolist()

In [None]:
results = hyperopt_fit(df, **params_hp)

In [None]:
print(results)

# Fit final model

In [None]:
params_hp['lgb_params'] = json.loads(results)['params']

In [None]:
fig, shap_df, report, model_class = model_fit(df, **params_hp)

In [None]:
fig = model_class.shap_summary_plot(df)

In [None]:
model_class.scores

In [None]:
model_class.num_boost_optimal

In [None]:
params_hp['num_boost'] = model_class.num_boost_optimal
params_hp['early_stopping'] = None
params_hp['cols_cat']=None

In [None]:
model_str, model = model_single_fit(df, **params_hp)

# Predict

In [None]:
df['PREDICT_class'] = model_class.transform(df)

In [None]:
df_test['PREDICT_class'] = model_class.predict(df_test)

# Report

In [None]:
a = ReportBinary()


In [None]:
a.plot_report(
    df,
    report_shape = (4,2),
    report={'Roc-Auc':  {'loc':(0, 0)},
          'Precision-Recall': [{'loc':(0, 1)}],
          'MeanTarget-by-Probability': [{'loc':(1, 0)}],
          'Gini-by-Generations': {'loc':(2,0), 'colspan':2},
          'MeanTarget-by-Generations': {'loc':(3,0), 'colspan':2},
          'Probability-Distribution': [{'loc':(1,1)}]},
    cols_score = ['PREDICT_class'],
    cols_target = [params['col_target']],
    col_generation_deals=params['col_date_mon']
)
a.fig.savefig('report1.png')