In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from matplotlib.ticker import MaxNLocator

from scipy.stats import levene
from scipy.stats import mannwhitneyu
import scipy.stats as stats
from scipy.stats import chi2_contingency
from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import StandardScaler

pd.set_option('display.max_columns', None)

In [5]:
data = pd.read_csv("Bank Leads Cleaned v2 20240119.csv")

## Scale numeric features and run hypothesis tests

In [9]:
approved = data[data['Approved']==1]
not_approved = data[data['Approved']!=1]
print("Approved: ", approved.shape)
print("Not Approved: ", not_approved.shape)

Approved:  (1015, 25)
Not Approved:  (67782, 25)


In [46]:
def scale_test(data1, data2):
    # Instantiate PowerTransformer and run
    pt = PowerTransformer(method='box-cox')

    pt_scaled_a = pt.fit_transform((data1.values + 1).reshape(-1, 1))
    pt_scaled_na = pt.fit_transform((data2.values + 1).reshape(-1, 1))

    ## Levene's test of variance
    lev_a = pd.Series(map(lambda x: x, pt_scaled_a))
    lev_na = pd.Series(map(lambda x: x, pt_scaled_na))
    print("***Box-Cox Scaled***\nLevene's test of variance...................................")
    if levene(lev_a, lev_na)[1] < 0.05:
        print('Reject the null hypothesis of equal variance between groups.')
        print(f'P-value is {levene(lev_a, lev_na)[1]}.')
    else:
        print('Fail to reject the null hypothesis of equal variance between groups.')
        print(f'P-value is {levene(lev_a, lev_na)[1]}.')

    ## Mann Whitney U test of distribution
    print("\nMann Whitney U test of distribution..........................")
    if mannwhitneyu(pt_scaled_a, pt_scaled_na, alternative='two-sided')[1] < 0.05:
        print("Reject the null hypothesis that Monthly Income are similar.")
        print(f"P-value is {mannwhitneyu(pt_scaled_a, pt_scaled_na, alternative='two-sided')[1]}")
    else:
        print("Fail to reject the null hypothesis that Monthly Income are similar.")
        print(f"P-value is {mannwhitneyu(pt_scaled_a, pt_scaled_na, alternative='two-sided')[1]}")

    # Instantiate StandardScaler and run
    ss = StandardScaler()

    ss_scaled_a = ss.fit_transform((data1.values + 1).reshape(-1, 1))
    ss_scaled_na = ss.fit_transform((data2.values + 1).reshape(-1, 1))

    # Levene's test of variance
    lev_a_ss = pd.Series(map(lambda x: x, ss_scaled_a))
    lev_na_ss = pd.Series(map(lambda x: x, ss_scaled_na))                              
    print("\n\n***Standardized***\nLevene's test of variance...................................")
    if levene(lev_a_ss, lev_na_ss)[1] < 0.05:
        print('Reject the null hypothesis of equal variance between groups.')
        print(f'P-value is {levene(lev_a_ss, lev_na_ss)[1]}.')
    else:
        print('Fail to reject the null hypothesis of equal variance between groups.')
        print(f'P-value is {levene(lev_a_ss, lev_na_ss)[1]}.')

    # Mann Whitney U test of distribution
    print("\nMann Whitney U test of distribution..........................")
    if mannwhitneyu(pt_scaled_a, pt_scaled_na, alternative='two-sided')[1] < 0.05:
        print("Reject the null hypothesis that Monthly Income are similar.")
        print(f"P-value is {mannwhitneyu(pt_scaled_a, pt_scaled_na, alternative='two-sided')[1]}")
    else:
        print("Fail to reject the null hypothesis that Monthly Income are similar.")
        print(f"P-value is {mannwhitneyu(pt_scaled_a, pt_scaled_na, alternative='two-sided')[1]}")

### Scale and test Monthly_Income

In [47]:
scale_test(approved['Monthly_Income'], not_approved['Monthly_Income'])

***Box-Cox Scaled***
Levene's test of variance...................................
Reject the null hypothesis of equal variance between groups.
P-value is [8.97713067e-06].

Mann Whitney U test of distribution..........................
Reject the null hypothesis that Monthly Income are similar.
P-value is [0.02676371]


***Standardized***
Levene's test of variance...................................
Reject the null hypothesis of equal variance between groups.
P-value is [0.02733187].

Mann Whitney U test of distribution..........................
Reject the null hypothesis that Monthly Income are similar.
P-value is [0.02676371]


### Scale and test Age

In [48]:
scale_test(approved['Age'], not_approved['Age'])

***Box-Cox Scaled***
Levene's test of variance...................................
Fail to reject the null hypothesis of equal variance between groups.
P-value is [0.69973729].

Mann Whitney U test of distribution..........................
Fail to reject the null hypothesis that Monthly Income are similar.
P-value is [0.6778571]


***Standardized***
Levene's test of variance...................................
Fail to reject the null hypothesis of equal variance between groups.
P-value is [0.34586098].

Mann Whitney U test of distribution..........................
Fail to reject the null hypothesis that Monthly Income are similar.
P-value is [0.6778571]
