In [1]:
import numpy as np 
import pandas as pd

In [2]:
mydata = pd.read_csv("https://stats.idre.ucla.edu/stat/data/binary.csv")

In [3]:
mydata.head()

Unnamed: 0,admit,gre,gpa,rank
0,0,380,3.61,3
1,1,660,3.67,3
2,1,800,4.0,1
3,1,640,3.19,4
4,0,520,2.93,4


In [4]:
def iv_woe(data, target, bins=10, show_woe=False):
    
    #Empty Dataframe
    newDF,woeDF = pd.DataFrame(), pd.DataFrame()
    
    #Extract Column Names
    cols = data.columns
    
    #Run WOE and IV on all the independent variables
    for ivars in cols[~cols.isin([target])]:
        if (data[ivars].dtype.kind in 'bifc') and (len(np.unique(data[ivars]))>10):
            binned_x = pd.qcut(data[ivars], bins,  duplicates='drop')
            d0 = pd.DataFrame({'x': binned_x, 'y': data[target]})
        else:
            d0 = pd.DataFrame({'x': data[ivars], 'y': data[target]})
        d = d0.groupby("x", as_index=False).agg({"y": ["count", "sum"]})
        d.columns = ['Cutoff', 'N', 'Events']
        d['% of Events'] = np.maximum(d['Events'], 0.5) / d['Events'].sum()
        d['Non-Events'] = d['N'] - d['Events']
        d['% of Non-Events'] = np.maximum(d['Non-Events'], 0.5) / d['Non-Events'].sum()
        d['WoE'] = np.log(d['% of Events']/d['% of Non-Events'])
        d['IV'] = d['WoE'] * (d['% of Events'] - d['% of Non-Events'])
        d.insert(loc=0, column='Variable', value=ivars)
        print("Information value of " + ivars + " is " + str(round(d['IV'].sum(),6)))
        temp =pd.DataFrame({"Variable" : [ivars], "IV" : [d['IV'].sum()]}, columns = ["Variable", "IV"])
        newDF=pd.concat([newDF,temp], axis=0)
        woeDF=pd.concat([woeDF,d], axis=0)

        #Show WOE Table
        if show_woe == True:
            print(d)
    return newDF, woeDF

In [5]:
iv, woe = iv_woe(data = mydata, target = 'admit', bins=10, show_woe = True)
print(iv)
print(woe)

Information value of gre is 0.312882
  Variable            Cutoff   N  Events  % of Events  Non-Events  \
0      gre  (219.999, 440.0]  48       6     0.047244          42   
1      gre    (440.0, 500.0]  51      12     0.094488          39   
2      gre    (500.0, 520.0]  24      10     0.078740          14   
3      gre    (520.0, 560.0]  51      15     0.118110          36   
4      gre    (560.0, 580.0]  29       6     0.047244          23   
5      gre    (580.0, 620.0]  53      21     0.165354          32   
6      gre    (620.0, 660.0]  45      17     0.133858          28   
7      gre    (660.0, 680.0]  20       9     0.070866          11   
8      gre    (680.0, 740.0]  44      12     0.094488          32   
9      gre    (740.0, 800.0]  35      19     0.149606          16   

   % of Non-Events       WoE        IV  
0         0.153846 -1.180625  0.125857  
1         0.142857 -0.413370  0.019994  
2         0.051282  0.428812  0.011774  
3         0.131868 -0.110184  0.001516 