In [None]:
import doctools as d
import pandas as pd
import numpy as np

# Preparing the data

In [None]:
cd Desktop

In [None]:
df = pd.read_csv('exported_acl_data_for_Vitek2.csv')
df = df.rename(columns={'PROPOSED_SCORE':'HCPAY_GEN1',
                        'GEN3_V1_SCORE':'ACL_GEN3',
                       u'A45_m_ratio_cnt_partner_Loan_Imbank_all_wcl_14pd30_bin_woe': u'A45_m_ratio_cnt_partner_Loan_Imbank_all_woe',
                       'A45_i_freq_riskrank_2_30day_acl_1pd30_bin_woe': 'A45_i_freq_riskrank_2_30day_woe',
                        u'A45_i_ratio_cnt_partner_Loan_con_all_acl_1pd30_bin_woe': u'A45_i_ratio_cnt_partner_Loan_con_all_woe',
                        'A42_ZM_SCORE_acl_on_14pd30_bin_woe2': 'A42_ZM_SCORE_woe2'
                       })

In [None]:
def a45_m_ratio(x):
    if np.abs((x - 0.450050))<=0.001:
        return 'x < 0.6567 or x is missing'
    else:
        return 'x >= 0.6567'
    
def a45_i_freq(x):
    if np.abs((x- 0.905324))<=0.001:
        return 'x is missing'
    else:
        return 'x is not missing'    
    
def a45_i_ratio(x):
    if np.abs((x -(-0.222326)))<=0.001:
        return 'x < 0.35855 or x is missing'
    else:
        return 'x >= 0.35855'    
    
def zh(x):
    if x==0:
        return 'x is missing'
    if np.abs(x - (-0.102122)) <=0.001:
        return 'x < 650'
    else:
        return 'x >= 650'    

In [None]:
df[u'A45_m_ratio_cnt_partner_Loan_Imbank_all_grp'] = df[u'A45_m_ratio_cnt_partner_Loan_Imbank_all_woe'].apply(a45_m_ratio)
df[u'A45_i_freq_riskrank_2_30day_grp'] = df[u'A45_i_freq_riskrank_2_30day_woe'].apply(a45_i_freq)
df[u'A45_i_ratio_cnt_partner_Loan_con_all_grp'] = df[u'A45_i_ratio_cnt_partner_Loan_con_all_woe'].apply(a45_i_ratio)
df[u'A42_ZM_SCORE_grp'] = df['A42_ZM_SCORE_woe2'].apply(zh)

In [None]:
for sample in [df]:
    sample['DATE_DECISION'] = pd.to_datetime(sample['DATE_DECISION'], format='%Y-%m-%d')
    sample['WEEK_DECISION'] = sample['DATE_DECISION'].apply(lambda x: str(x.year*100+x.week))
    sample['MONTH_DECISION'] = sample['DATE_DECISION'].apply(lambda x: str(x.year*100+x.month))

In [None]:
acl_train = df[df['acl_on_type']=='train']
acl_test = df[df['acl_on_type']=='test']
acl_oot= df[df['acl_on_type']=='oot']

# 1. Setting up the project

In [None]:
# initializing the documentatino project configuration
pp = d.ProjectParameters()

pp.sample_dict = {
               'ACL_TRAIN': acl_train,
               'ACL_TEST': acl_test,
                'ACL_OOT': acl_oot
              }
pp.sample_ordering = ['ACL_TRAIN','ACL_TEST','ACL_OOT']


pp.targets = [
                 ('FSTQPD30', 'FSTQPD30_AGR'),
                 ('N1PD30','N1PD30_AGR')
]

pp.scores = ['HCPAY_GEN1','ACL_GEN3']

pp.predictors_continuous = ['CM_SCORE_LOGSCORE', 'UMENG_LARGE_LOGSCORE', 'PBOC_GEN4_LOGSCORE2', u'A8_FINALSCORE']

pp.predictors_grouped = [u'A45_m_ratio_cnt_partner_Loan_Imbank_all_grp',
                         'A45_i_freq_riskrank_2_30day_grp',
                         u'A45_i_ratio_cnt_partner_Loan_con_all_grp',
                        'A42_ZM_SCORE_grp']

pp.predictors_woe = [u'A45_m_ratio_cnt_partner_Loan_Imbank_all_woe',
                         'A45_i_freq_riskrank_2_30day_woe',
                         u'A45_i_ratio_cnt_partner_Loan_con_all_woe',
                        'A42_ZM_SCORE_woe2']

pp.rowid_variable = 'SKP_CREDIT_CASE'
pp.time_variable = 'MONTH_DECISION'

pd.options.display.max_columns = 100
pd.options.mode.chained_assignment = None

In [None]:
pd.options.display.max_columns = 100
pd.options.mode.chained_assignment = None

#

# 2. Ad-hoc use

## 2.1 Sample description

In [None]:
sdc = d.SampleDescriptionCalculator(pp)

In [None]:
sdc = sdc.s([(acl_train,'ACL_TRAIN')]).calculate()

In [None]:
sdc.get_description()

In [None]:
sdc.get_table()

## 2.2 Evaluation of grouped predictors

In [None]:
gec = d.GroupingEvaluationCalculator(pp)
gec = gec.s([(acl_train,'ACL_TRAIN')]).p(['A42_ZM_SCORE_grp']).t([('FSTQPD30','FSTQPD30_AGR')]).calculate()

In [None]:
gec.get_description()

In [None]:
gec.get_visualization().get_table()

## 2.3 Evaluation of continuous predictors

In [None]:
cec = d.ContinuousEvaluationCalculator(pp)
cec = cec.s([(acl_train,'ACL_TRAIN')]).p(['CM_SCORE_LOGSCORE']).t([('FSTQPD30','FSTQPD30_AGR')]).calculate()

In [None]:
cec.get_visualization().get_table()

## 2.4.a Score comparison

In [None]:
scc = d.ScoreComparisonCalculator(pp)
scc = scc.s([(acl_train,'ACL_TRAIN')]).p(['ACL_GEN3','HCPAY_GEN1','A42_ZM_SCORE']).t([('FSTQPD30','FSTQPD30_AGR')]).calculate()

In [None]:
scc.get_description()

In [None]:
scc.get_visualization().get_table()

## 2.4.b Gini performance of m

#### Multiple predictors on single target

In [None]:
pgt = d.PredictorGiniInTimeCalculator(pp)
pgt = pgt.s([(acl_train,'ACL_TRAIN')]).p(['ACL_GEN3','HCPAY_GEN1','A42_ZM_SCORE']).t([('FSTQPD30','FSTQPD30_AGR')]).calculate()

In [None]:
pgt.get_description()

In [None]:
pgt.get_visualization().get_table()

#### Single predictor on multiple targets

In [None]:
pgt = d.PredictorGiniInTimeCalculator(pp)
pgt = pgt.s([(acl_train,'ACL_TRAIN')]).p(['ACL_GEN3']).t([('FSTQPD30','FSTQPD30_AGR'),('N1PD30','N1PD30_AGR')]).calculate()

In [None]:
pgt.get_visualization().get_table()

#### Multiple predictors on multiple targets

In [None]:
pgt = d.PredictorGiniInTimeCalculator(pp)
pgt = pgt.s([(acl_train,'ACL_TRAIN')]).p(['ACL_GEN3','HCPAY_GEN1','A42_ZM_SCORE']).t([('FSTQPD30','FSTQPD30_AGR'),('N1PD30','N1PD30_AGR')]).calculate()

In [None]:
pgt.get_visualization().get_table()


## 2.5 Marginal contributions

In [None]:
predictors = pp.predictors_continuous + pp.predictors_woe
current_score = pp.scores[0]

In [None]:
predictors = pp.predictors_continuous + pp.predictors_woe
current_score = pp.scores[0]

mcc = d.MarginalContributionsCalculator(pp)
mcc = mcc.s([(pp.sample_dict[sample], sample) for sample in pp.sample_ordering]).t(pp.targets).p(predictors).sc([current_score])
mcc.calculate().get_table()

## 2.6 Transition matrix

In [None]:
tc = d.TransitionCalculator(pp)
tc = tc.s([(acl_train,'ACL_TRAIN')]).sc(pp.scores)
tc.calculate().get_table()

## 2.7 Correlation matrix

In [None]:
predictors = pp.predictors_continuous + pp.predictors_woe

cc = d.CorrelationCalculator(pp)
cc = cc.s([(acl_train,'ACL_TRAIN')]).p(predictors)
cc.calculate().get_table()

# 

# 

# 3. Automated use

In [None]:
ep = d.StandardExecutionPlan(pp)

In [None]:
ep.calculate()

In [None]:
ep.print_title('HCPAY GEN1 SCORECARD DOCUMENTATION')
ep.print_summary()
ep.print_documentation()

#  4. Exporting results

In [None]:
%%javascript
IPython.notebook.kernel.execute('nb_name = ' + '"' + IPython.notebook.notebook_name + '"')

In [None]:
e = d.Exporter()
e.export(nb_name)