# Advanced Statistical Analysis
## Individual Assignment
Importing packages

In [1]:
import pandas as pd
import numpy as np

from scipy.stats import f, chi2
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report
from statsmodels.multivariate.manova import MANOVA

In [2]:
df1=pd.read_csv("sample.csv")
df1=df1.rename(columns={"ZIP Code":"zipcode","Personal Loan":"personal_loan",
                        "Securities Account":"securities_account",
                        "CD Account":"cd_account"})
print(df1.columns)

Index(['Age', 'Experience', 'Income', 'zipcode', 'Family', 'CCAvg',
       'Education', 'Mortgage', 'personal_loan', 'securities_account',
       'cd_account', 'Online', 'CreditCard'],
      dtype='object')


Creating dummy variables using one-hot encoding for family and education variable

In [3]:
family_encoded = pd.get_dummies(df1['Family'].astype(int), prefix='Family')
education_encoded = pd.get_dummies(df1['Education'].astype(int), prefix='Education')
df_dummy=pd. \
        concat([df1, family_encoded,education_encoded], axis=1). \
        drop(["Education","Family","Family_4","Education_1"],axis=1)

Getting list of zipcode where people have got personal loans

In [4]:
df_zipcode=df1.groupby(["zipcode"]).agg(got_loan=('personal_loan','sum')).reset_index()
df_zipcode=df_zipcode[df_zipcode["got_loan"]>0]

Creating a dummy_zip column where 1 is for those zipcodes where loan have been given and 0 for others

In [5]:
df_zip=pd.merge(df_dummy,df_zipcode,on="zipcode", how="left").fillna(0)
df_zip['dummy_zip'] = np.where(df_zip['got_loan'] > 0, 1, 0)
df_zip=df_zip.drop(["zipcode","got_loan"],axis=1)

In [6]:
df=df_zip.copy()
X=df.drop(["personal_loan"], axis=1)
y=df["personal_loan"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train

Unnamed: 0,Age,Experience,Income,CCAvg,Mortgage,securities_account,cd_account,Online,CreditCard,Family_1,Family_2,Family_3,Education_2,Education_3,dummy_zip
29,26,1,54,0.6,230,0,0,0,0,0,0,0,1,0,0
535,65,41,91,0.0,146,0,0,0,0,0,1,0,0,1,0
695,29,2,30,1.5,112,0,0,0,1,0,0,0,1,0,1
557,45,19,22,1.5,0,0,0,1,1,0,0,1,0,0,0
836,65,40,95,3.7,138,0,0,0,1,0,0,1,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,43,17,91,5.7,0,0,0,1,0,1,0,0,0,0,0
270,28,2,121,2.0,341,0,0,1,0,0,1,0,0,0,0
860,56,31,72,2.0,0,0,0,0,0,0,0,1,1,0,1
435,46,20,69,2.1,0,0,0,0,0,0,0,1,0,0,0


# Part 1

### 1. Build a Discriminant Analysis Model to predict whether the person is likely to accept the bank’s offer for a personal loan. If necessary, create new variables to improve the model performance

In [7]:
lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True)
lda.fit(X_train, y_train)
# Make predictions on the test data
y_pred = lda.predict(X_test)

# Calculate the accuracy of the model
print("Accuracy on training set: ", lda.score(X_train,y_train))
print("Accuracy on test set: ", lda.score(X_test,y_test))

Accuracy on training set:  0.9675
Accuracy on test set:  0.975


### 2. Carry out significance tests using Wilk’s Lambda

In [8]:
# Perform MANOVA with Wilks' test
manova = MANOVA(X_train, y_train)
m=manova.mv_test()
m.summary()

0,1,2,3
,,,

0,1,2,3,4,5,6
,x0,Value,Num DF,Den DF,F Value,Pr > F
,Wilks' lambda,0.4680,15.0000,785.0000,59.4822,0.0000
,Pillai's trace,0.5320,15.0000,785.0000,59.4822,0.0000
,Hotelling-Lawley trace,1.1366,15.0000,785.0000,59.4822,0.0000
,Roy's greatest root,1.1366,15.0000,785.0000,59.4822,0.0000


The wilk's lambda value is 0.4680 and the p value is very small. <br><br>To find out the significance of individual variables I have manually calculated Wilk's lambda for each and then calculated F value and P-value. <br>Please see below code block for the same:

In [9]:
# Calculate Wilk's Lambda
def calculate_wilks_lambda(X, y):
    # Calculate the total sum of squares
    total_sum_squares = np.sum((X - np.mean(X))**2)

    # Calculate the within-group sum of squares
    unique_classes = np.unique(y)
    within_sum_squares = 0
    for c in unique_classes:
        class_data = X[y == c]
        within_sum_squares += np.sum((class_data - np.mean(class_data))**2)

    # Calculate the ratio
    ratio_within_total = within_sum_squares / total_sum_squares

    return ratio_within_total


In [10]:
# Calculate Partial F
def calculate_partial_f(wilks_lambda, n, g, p):
    partial_f = ((1 - wilks_lambda) / wilks_lambda) * (n - g - p + 1)
    return partial_f


In [11]:
# Calculate P-value
n=800
g=2
p=15
df_num=1
df_den=n-p-2

p_val=[]
for i in X_train.columns:
    t={}
    Wilks_lambda=calculate_wilks_lambda(X_train[i],y_train)
    partial_F=calculate_partial_f(Wilks_lambda, n, g, p)

    # Calculate the p-value
    p_value = 1 - f.cdf(partial_F, df_num, df_den)
    t["feature"]=i
    t["Wilks_lambda"]=Wilks_lambda
    t["partial_F"]=partial_F
    t["df_num"]=df_num
    t["df_den"]=df_den
    t["p_value"]=round(p_value,3)
    p_val.append(t)


In [12]:
#Get P values for variables, we have assumed the alpha as 0.2
p_df=pd.DataFrame(p_val)
p_df

Unnamed: 0,feature,Wilks_lambda,partial_F,df_num,df_den,p_value
0,Age,0.999431,0.446726,1,783,0.504
1,Experience,0.999339,0.518358,1,783,0.472
2,Income,0.753063,257.080877,1,783,0.0
3,CCAvg,0.80702,187.475667,1,783,0.0
4,Mortgage,0.988676,8.979695,1,783,0.003
5,securities_account,0.994668,4.202337,1,783,0.041
6,cd_account,0.906302,81.054244,1,783,0.0
7,Online,0.999928,0.056553,1,783,0.812
8,CreditCard,0.99674,2.563857,1,783,0.11
9,Family_1,0.999899,0.079302,1,783,0.778


### 3. Comment on the variables that are significant.
<br> Below are the variables that are significant for alpha value of 0.2

In [13]:
p_df[p_df["p_value"]<0.2].sort_values("p_value",ascending=True)

Unnamed: 0,feature,Wilks_lambda,partial_F,df_num,df_den,p_value
2,Income,0.753063,257.080877,1,783,0.0
3,CCAvg,0.80702,187.475667,1,783,0.0
6,cd_account,0.906302,81.054244,1,783,0.0
14,dummy_zip,0.812431,181.005093,1,783,0.0
13,Education_3,0.98539,11.624349,1,783,0.001
4,Mortgage,0.988676,8.979695,1,783,0.003
10,Family_2,0.993997,4.734401,1,783,0.03
5,securities_account,0.994668,4.202337,1,783,0.041
8,CreditCard,0.99674,2.563857,1,783,0.11
12,Education_2,0.996838,2.487182,1,783,0.115


Below are variables that turned to be insignificant

In [14]:
p_df[p_df["p_value"]>=0.2].sort_values("p_value",ascending=False)

Unnamed: 0,feature,Wilks_lambda,partial_F,df_num,df_den,p_value
7,Online,0.999928,0.056553,1,783,0.812
9,Family_1,0.999899,0.079302,1,783,0.778
0,Age,0.999431,0.446726,1,783,0.504
1,Experience,0.999339,0.518358,1,783,0.472


### 4. Create the confusion matrix and comment on the prediction accuracy.

In [15]:
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
class_report= classification_report(y_test, y_pred)

print("\nPrecision:", precision)
print("\nRecall:", recall)
print("\nAccuracy on test set: ", lda.score(X_test,y_test))
print("\nclassification_report: ", class_report)

Confusion Matrix:
[[185   2]
 [  3  10]]

Precision: 0.9086879432624113

Recall: 0.8792677910324969

Accuracy on test set:  0.975

classification_report:                precision    recall  f1-score   support

           0       0.98      0.99      0.99       187
           1       0.83      0.77      0.80        13

    accuracy                           0.97       200
   macro avg       0.91      0.88      0.89       200
weighted avg       0.97      0.97      0.97       200



The prediction accuracy is at 97.5%

### 5. The bank would like to address the top 30 persons with an offer for personal loan based on the probability (propensity). Create a table displaying all the details of the “top” 30 persons who are most likely to accept the bank’s offer. Make sure to include the probability of accepting the offer along with all the other details.

In [16]:
propensity_train=pd.DataFrame(lda.predict_proba(X_train), columns=["pl_0","prob_of_acceptance"])
propensity_test =pd.DataFrame(lda.predict_proba(X_test), columns=["pl_0","prob_of_acceptance"])

In [17]:
#len(lda.predict_proba(X_train))
train_prop_data=pd.concat([X_train.reset_index(),propensity_train], axis=1)
test_prop_data =pd.concat([X_test.reset_index(),propensity_test], axis=1)

propensity_data=pd.concat([train_prop_data, test_prop_data] ,axis=0)
top_30_person=propensity_data.drop(["pl_0"],axis=1). \
        rename(columns={"index":"id"}).sort_values("prob_of_acceptance",ascending=False)[:30]
df_orig=df1.reset_index().rename(columns={"index":"id"})
top_30_person=top_30_person[["id","prob_of_acceptance"]]
df_LDA_top30=df_orig.merge(top_30_person,left_on='id', right_on='id'). \
        sort_values("prob_of_acceptance",ascending=False).drop(["id"],axis=1)

In [18]:
df_LDA_top30

Unnamed: 0,Age,Experience,Income,zipcode,Family,CCAvg,Education,Mortgage,personal_loan,securities_account,cd_account,Online,CreditCard,prob_of_acceptance
8,37,11,190,94305,4,7.3,2,565,1,0,1,1,0,0.999998
17,56,31,192,90089,1,7.0,3,0,1,0,1,1,0,0.999989
10,62,38,195,91125,4,5.2,3,522,1,0,1,1,1,0.99998
23,32,8,169,94596,1,6.5,3,272,1,1,1,1,0,0.999955
4,34,9,180,93955,2,6.5,3,0,1,0,1,1,0,0.999948
16,65,39,184,91302,1,5.4,3,176,1,0,1,1,1,0.999888
6,26,1,184,94608,2,4.2,3,577,1,0,1,1,1,0.999762
13,59,35,180,96008,2,6.5,2,0,1,1,1,1,1,0.999631
0,40,15,173,95060,4,6.6,1,0,1,0,1,1,1,0.999565
1,63,33,178,91768,4,9.0,3,0,1,0,0,0,0,0.999509


# Part 2

## 1. Build a logistic regression equation to predict whether the person is likely to accept the bank’s offer for a personal loan. If necessary, create new variables to improve the model performance.

In [19]:
# Initialize and fit the Logistic Regression model
model = LogisticRegression()
result=model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.98


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [20]:
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
class_report= classification_report(y_test, y_pred)

print("\nPrecision:", precision)
print("\nRecall:", recall)
print("\nAccuracy on test set: ", accuracy)
print("\nclassification_report: ", class_report)

Confusion Matrix:
[[185   2]
 [  2  11]]

Precision: 0.9177293294940354

Recall: 0.9177293294940354

Accuracy on test set:  0.98

classification_report:                precision    recall  f1-score   support

           0       0.99      0.99      0.99       187
           1       0.85      0.85      0.85        13

    accuracy                           0.98       200
   macro avg       0.92      0.92      0.92       200
weighted avg       0.98      0.98      0.98       200



## 2. Carry out the omnibus test to test whether the model as a whole is significant. Comment on the result of the omnibus test

<b>Log likelyhood Ratio test</b>

In [21]:
s=result.intercept_[0]
for i in zip(result.coef_[0],X_train.columns):
    s=s+X_train.loc[29,i[1]]*i[0]

In [22]:
LL1=0
for s in zip(result.decision_function(X_train),y_train):
    fx=np.exp(s[0])/(1+np.exp(s[0]))
    yi=s[1]
    LL1=LL1+( yi * np.log(fx) ) + ( (1-yi) * np.log(1-fx) )

print("-2LL1:",-2*LL1)

-2LL1: 145.41332176044608


In [23]:
N=len(y_train)
N1=len(y_train[y_train>0])
N0=len(y_train[y_train==0])
LL0=( N1 * np.log(N1/N) ) + ( N0 * np.log(N0/N) )
print("-2LL0:",-2*LL0)

-2LL0: 515.7243676139976


In [24]:
test_statistic = (-2*LL0)-(-2*LL1)
deg_f = len(X_train.columns)
p_value = 1 - chi2.cdf(test_statistic, deg_f)
print("chi-square:",test_statistic)
print("degrees of freedom:",deg_f)
print("P value:",p_value)
print("Hence Reject H0. So atleast one of the Bj is not equal to zero")

chi-square: 370.31104585355155
degrees of freedom: 15
P value: 0.0
Hence Reject H0. So atleast one of the Bj is not equal to zero


## 3. Test the hypothesis that βj = 0 for all βj, where βj indicates the coefficient corresponding to jth explanatory variable. Comment on the result of these hypothesis tests

<b>Wald's Test</b>

In [33]:
out=[]
for i,a in enumerate(X_train.columns):
    t={}
    t["feature"]=a
    t["coeff"]=result.coef_[0][i]
    t["std_err"]=np.std(X_train[a])/np.sqrt(len(X_train))
    t["test_stats"]=t["coeff"]/t["std_err"]
    t["p-value"]=round(1 - chi2.cdf(t["test_stats"], 1),3)
    t["alpha"]=0.2
    inference=""
    if t["p-value"]<t["alpha"]: 
        inference="H0 is rejected, "+a+" is not equal to 0" 
    else: 
        inference="Fail to reject H0, "+a+" is 0"
    t["inference"]=inference
    out.append(t)

pd.DataFrame(out)
    

Unnamed: 0,feature,coeff,std_err,test_stats,p-value,alpha,inference
0,Age,-0.369866,0.41152,-0.898779,1.0,0.2,"Fail to reject H0, Age is 0"
1,Experience,0.345922,0.413225,0.837127,0.36,0.2,"Fail to reject H0, Experience is 0"
2,Income,0.037972,1.631293,0.023277,0.879,0.2,"Fail to reject H0, Income is 0"
3,CCAvg,0.270799,0.062747,4.315732,0.038,0.2,"H0 is rejected, CCAvg is not equal to 0"
4,Mortgage,0.002278,3.260038,0.000699,0.979,0.2,"Fail to reject H0, Mortgage is 0"
5,securities_account,0.17769,0.010547,16.846736,0.0,0.2,"H0 is rejected, securities_account is not equa..."
6,cd_account,1.319021,0.00806,163.643815,0.0,0.2,"H0 is rejected, cd_account is not equal to 0"
7,Online,-0.147939,0.017356,-8.523948,1.0,0.2,"Fail to reject H0, Online is 0"
8,CreditCard,-0.43156,0.016104,-26.798935,1.0,0.2,"Fail to reject H0, CreditCard is 0"
9,Family_1,-0.480367,0.01624,-29.578995,1.0,0.2,"Fail to reject H0, Family_1 is 0"


## 4. Carry out the hypothesis test that the model fits the data. Comment on the results.

<b>Doing Hosmer - Lemeshow Test</b><br>
Dividing data in 10 equal groups

In [26]:
LH_df=pd.concat([X_train,y_train], axis=1).reset_index()
LH_df["group"]=df.index.to_series().apply(lambda x: int(x/80))
LH_df["decision_function"]=result.decision_function(X_train)
LH_df["personal_loan_pred"]=LH_df["decision_function"].apply(lambda x: np.exp(x)/(1+np.exp(x)))
LH_df["no_personal_loan"]=1-LH_df["personal_loan"]
LH_df["no_personal_loan_pred"]=1-LH_df["personal_loan_pred"]
LH_DF_agg=LH_df.groupby(["group"]).agg(
    cnt=('group',"count"),observed_1=('personal_loan','sum'),
    expected_1=('personal_loan_pred','sum'),
    observed_0=('no_personal_loan','sum'), 
    expected_0=('no_personal_loan_pred','sum'))
LH_DF_agg

Unnamed: 0_level_0,cnt,observed_1,expected_1,observed_0,expected_0
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,80,8,5.567498,72,74.432502
1,80,11,12.305545,69,67.694455
2,80,5,5.936878,75,74.063122
3,80,7,5.902693,73,74.097307
4,80,9,10.075487,71,69.924513
5,80,7,6.461588,73,73.538412
6,80,8,8.132723,72,71.867277
7,80,8,8.295935,72,71.704065
8,80,8,8.869519,72,71.130481
9,80,8,8.05409,72,71.94591


In [27]:
LH_DF_agg["total_observed_sum"]=LH_DF_agg["observed_1"]+LH_DF_agg["observed_0"]
LH_DF_agg["total_expected_sum"]=LH_DF_agg["expected_1"]+LH_DF_agg["expected_0"]
LH_DF_agg

Unnamed: 0_level_0,cnt,observed_1,expected_1,observed_0,expected_0,total_observed_sum,total_expected_sum
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,80,8,5.567498,72,74.432502,80,80.0
1,80,11,12.305545,69,67.694455,80,80.0
2,80,5,5.936878,75,74.063122,80,80.0
3,80,7,5.902693,73,74.097307,80,80.0
4,80,9,10.075487,71,69.924513,80,80.0
5,80,7,6.461588,73,73.538412,80,80.0
6,80,8,8.132723,72,71.867277,80,80.0
7,80,8,8.295935,72,71.704065,80,80.0
8,80,8,8.869519,72,71.130481,80,80.0
9,80,8,8.05409,72,71.94591,80,80.0


In [28]:
cnt=0
for i in range(2):
    cnt_i=0
    for j in LH_DF_agg.index:
        cnt_j=0
        observed=LH_DF_agg.loc[j,"observed_"+str(i)]
        expected=LH_DF_agg.loc[j,"expected_"+str(i)]
        cnt_j=cnt_j+((observed-expected)**2) / expected
    cnt_i=cnt_i+cnt_j

cnt=cnt+cnt_i
print("chi-square test statistic:",cnt)        
print("p-value:",1 - chi2.cdf(cnt, 15))    

chi-square test statistic: 0.00036325407856624013
p-value: 1.0


The p value is 1, so we fail to reject the null hypothesis, So, the logistic model fits the data.

## 5. The bank would like to address the top 30 persons with an offer for personal loan based on the probability (propensity). Create a table displaying all the details of the “top” 30 persons who are most likely to accept the bank’s offer. Make sure to include the probability of accepting the offer along with all the other details.

In [29]:
df_prob=df.copy()
df_prob["decision_function"]=result.decision_function(df.drop(["personal_loan"],axis=1))
df_prob["prob_of_acceptance"]=df_prob["decision_function"].apply(lambda x: np.exp(x)/(1+np.exp(x)))
LR_top30=df_prob.drop("decision_function",axis=1).reset_index(). \
            rename(columns={"index":"id"}). \
            sort_values(by="prob_of_acceptance",ascending=False)[:30]

In [30]:
#df_orig=df1.reset_index().rename(columns={"index":"id"})
LR_top30=LR_top30[["id","prob_of_acceptance"]]
df_LR_top30=df_orig.merge(LR_top30,left_on='id', right_on='id'). \
    sort_values("prob_of_acceptance",ascending=False).drop(["id"],axis=1)
df_LR_top30

Unnamed: 0,Age,Experience,Income,zipcode,Family,CCAvg,Education,Mortgage,personal_loan,securities_account,cd_account,Online,CreditCard,prob_of_acceptance
8,37,11,190,94305,4,7.3,2,565,1,0,1,1,0,0.99853
12,62,38,195,91125,4,5.2,3,522,1,0,1,1,1,0.99658
24,32,8,169,94596,1,6.5,3,272,1,1,1,1,0,0.995117
21,56,31,192,90089,1,7.0,3,0,1,0,1,1,0,0.990125
6,26,1,184,94608,2,4.2,3,577,1,0,1,1,1,0.988392
13,34,9,180,93023,1,8.9,3,0,1,0,0,0,0,0.982405
5,37,12,194,91380,4,0.2,3,211,1,1,1,1,1,0.982093
10,50,26,190,90245,3,2.1,3,240,1,0,0,1,0,0.980718
28,48,23,161,96003,4,7.9,3,310,1,0,0,0,0,0.979797
4,34,9,180,93955,2,6.5,3,0,1,0,1,1,0,0.978975


## 6. Compare the above list of 30 persons against the 30 persons obtained from Discriminant Analysis (Part 1). Comment on the similarities and dissimilarities.

In [31]:
merged = pd.merge(df_LDA_top30.drop("prob_of_acceptance",axis=1), 
                  df_LR_top30.drop("prob_of_acceptance",axis=1), 
                  how='outer', indicator=True)
different_values = merged.loc[merged['_merge'] != 'both']

different_values

Unnamed: 0,Age,Experience,Income,zipcode,Family,CCAvg,Education,Mortgage,personal_loan,securities_account,cd_account,Online,CreditCard,_merge
9,63,33,178,91768,4,9.0,3,0,1,0,0,0,0,left_only
16,48,22,149,94928,2,5.5,2,0,1,1,1,1,0,left_only
21,48,23,112,93014,1,5.1,2,86,1,1,1,1,0,left_only
22,29,4,120,94553,1,4.1,2,0,1,1,1,0,1,left_only
23,36,12,123,95060,2,5.6,2,0,1,0,1,1,1,left_only
24,39,14,141,93302,4,6.3,1,0,1,1,1,1,1,left_only
25,48,18,182,92626,4,6.0,3,0,1,0,0,1,0,left_only
33,37,11,123,94720,1,2.3,2,0,1,1,1,1,0,left_only
34,50,26,190,90245,3,2.1,3,240,1,0,0,1,0,right_only
35,34,10,154,94583,3,5.4,2,0,1,0,0,1,0,right_only


In [32]:
different_values["type"]=np.where(different_values["_merge"]=="left_only","LDA","LR")
different_values.groupby("type").agg(diffrence_count=("_merge","count"))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  different_values["type"]=np.where(different_values["_merge"]=="left_only","LDA","LR")


Unnamed: 0_level_0,diffrence_count
type,Unnamed: 1_level_1
LDA,8
LR,8


Therefore, there are 22 common values between top 30 results of both LDA and LR. And 8 in each of them is different.