In [169]:
import pandas as pd, numpy as np
from IPython.display import display

In [170]:
# generate data
df1 = pd.DataFrame(np.random.randint(0,100000, size=(25000, 1)), columns=list('A'))/100000
df2 = pd.DataFrame(np.random.randint(2, size=(25000, 1)), columns=list('B'))
df = pd.concat([df1, df2], axis = 1)

In [171]:
def make_equal_bin(df, pred_prob, ngroups):      
    # add noise to make the bins equal
    df['noise'] = pd.DataFrame(np.random.randint(1, 100, size=(25000, 1)), columns=list('N'))/10000
    df['to_sort'] = df[pred_prob] + df['noise']
    df['bins'] = pd.qcut(df['to_sort'], ngroups, labels=False)
    del df['to_sort']
    return df
    

In [187]:
def gain_lift_chart(df, pred_prob, y_test, ngroups = 10):
    
    # sort the dataframe by the column 'A'
    df.sort_values(by = pred_prob, ascending=False, inplace=True)

    # bin the dataframe by column 'A'
    df['bins'] = ngroups - pd.qcut(df[pred_prob], ngroups, labels=False)
    
    # check if the bins have equal size (caused by the qcut function)
    if df.shape[0]%ngroups == 0:
        while len(set(df['bins'].value_counts()))> 1:
            df = make_equal_bin(df, pred_prob, ngroups)
        
    number_of_case = df.bins.value_counts().sort_index().rename('Number of Cases')
        
    number_of_response = df[df[y_test] == 1].bins.value_counts().sort_index().rename('Number of Responses')
        
    cumulative_response = pd.Series(np.cumsum(number_of_response)).rename('Cumulative Responses')
    
    total_response = cumulative_response.tolist()[-1]
    
    percent_of_events = (number_of_response/total_response*100).rename('% of events')
                
    gain = (cumulative_response/total_response*100).rename('Gain')
                
    n_range = range(10, ngroups*10+10, 10)
            
    cumulative_lift = (gain/n_range).rename('Cumulative Lift')
                
    cols = [number_of_case, number_of_response, cumulative_response, percent_of_events, gain, cumulative_lift]
    
    table = pd.concat(cols, axis = 1)
    
    table['decile'] = table.index
    
    ordered_cols = ['decile'] + table.columns.tolist()[:-1]

    table =  table[ordered_cols]
    
    display(table)
    
            

In [190]:
gain_lift_chart(df, pred_prob='A', y_test='B', ngroups=20)

Unnamed: 0,decile,Number of Cases,Number of Responses,Cumulative Responses,% of events,Gain,Cumulative Lift
0,0,1250,609,609,4.917636,4.917636,0.491764
1,1,1250,618,1227,4.99031,9.907946,0.495397
2,2,1250,594,1821,4.796512,14.704457,0.490149
3,3,1250,623,2444,5.030685,19.735142,0.493379
4,4,1250,609,3053,4.917636,24.652778,0.493056
5,5,1250,614,3667,4.95801,29.610788,0.493513
6,6,1250,646,4313,5.216408,34.827196,0.497531
7,7,1250,619,4932,4.998385,39.825581,0.49782
8,8,1250,633,5565,5.111434,44.937016,0.4993
9,9,1250,635,6200,5.127584,50.064599,0.500646
