In [21]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

print('numpy version : ',np.__version__)
print('pandas version : ',pd.__version__)
print('seaborn version : ',sns.__version__)

numpy version :  1.20.1
pandas version :  1.2.4
seaborn version :  0.11.1


In [22]:
df = pd.read_csv('credit-g_csv.csv')

In [23]:
def segment(x):
    if x['class'] == 'good':
        class_encoded = '0'
    else:
        class_encoded = '1'
    return class_encoded

df['class'] = df.apply(lambda x: segment(x), axis=1)

In [24]:
df["class"] = pd.to_numeric(df["class"], downcast="float")

In [25]:
def segment(x):
    if x['credit_amount'] >= 250 and x['credit_amount'] <= 3500:
        credit_amount_segment = '(250-3500]'
    elif x['credit_amount'] > 3500 and x['credit_amount'] <= 18500:
        credit_amount_segment = '(3500-18500]'
    else:
        credit_amount_segment = 'XXX'
    return credit_amount_segment

df['credit_amount'] = df.apply(lambda x: segment(x), axis=1)

In [26]:
def calc_iv(df, feature, target, pr=False):

    d1 = df.groupby(by=feature, as_index=True)
    data = pd.DataFrame()

    data['all'] = d1[target].count()
    data['bad'] = d1[target].sum()
    data['good'] = data['all']-data['bad']
    data['share'] = data['all'] / data['all'].sum()
    data['bad_rate'] = d1[target].mean()
    data['d_g'] = (data['all'] - data['bad']) / (data['all'] - data['bad']).sum()
    data['d_b'] = data['bad'] / data['bad'].sum()
    data['woe'] = np.log(data['d_g'] / data['d_b'])
    data = data.replace({'woe': {np.inf: 0, -np.inf: 0}})
    data['iv'] = data['woe'] * (data['d_g'] - data['d_b'])

    data.insert(0, 'variable', feature)
    data.insert(1, 'value', data.index)
    data.index = range(len(data))

    iv = data['iv'].sum()

    if pr:
        print(data)
        print('IV = %s' % iv)

    return iv, data

In [27]:
iv, data = calc_iv(df, 'credit_amount', 'class')

In [28]:
iv

0.044361200999330636

In [29]:
data

Unnamed: 0,variable,value,all,bad,good,share,bad_rate,d_g,d_b,woe,iv
0,credit_amount,(250-3500],686,185.0,501.0,0.686,0.269679,0.715714,0.616667,0.148952,0.014753
1,credit_amount,(3500-18500],314,115.0,199.0,0.314,0.366242,0.284286,0.383333,-0.298925,0.029608
