In [133]:
""" Importing the modules """
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
from sklearn import metrics
from sklearn import linear_model
from sklearn.cross_validation import train_test_split

%matplotlib inline

In [134]:
""" loading for loan data"""
loan = pd.read_csv('C:/vikas/data/ML/loan_training_01.csv')

In [135]:
""" Converting the values into Numerical format """
def verified(string):
    if 'Not' in string:
        return 0
    else :
        return 1
    
def fix_borrower(string):
    if string in 'Under Mortgage':
        return 0
    elif string in 'Own':
        return 2
    if string in 'Rental':
        return 1
    if string in 'Other':
        return -1
    if string in 'Info Not Available':
        return -2
    
def fix_loan(string):
    return int(string.split()[0])

def fix_float(string):
    return round(string,2)

loan['EMI'] = loan['EMI'].apply(fix_float)
loan['Last Amount Received as Payment'] = loan['Last Amount Received as Payment'].apply(fix_float)
loan['Loan Duration'] = loan['Loan Duration'].apply(fix_loan)
loan['Home Ownership of Borrower'] = loan['Home Ownership of Borrower'].apply(fix_borrower)
loan['Borrower\'s Verification Status'] = loan['Borrower\'s Verification Status'].apply(verified)

In [136]:
""" Dropping the Not required Collumns """
loan.drop(['Borrower\'s Duration of Employment','Borrower Rating by Bank',
'Last Month Payment was Received','Purpose of Loan','Loan Issue Date',
'Number of Times the Borrower has Availed Loan from All Banks',
'Percentage of Credit the Borrower is Using Relative to All Available Revolving Credit', 
'Date of Borrower\'s First  Loan','Interest Rate',
'The Number of Months Since the Borrower\'s Credit Record  was Updated',
'Debt-to-Income Ratio (A ratio calculated based on Borrower\'s monthly debt repayments to self-reported monthly income)'],axis=1, inplace=True)
loan.head(3)

Unnamed: 0,Loan Application Number,Loan Amount,Loan Duration,EMI,Home Ownership of Borrower,Annual Income of Borrower,Borrower's Verification Status,Borrower Delinquency in Last Two Years,Number of Months Since the Borrower's Last Delinquency,Number of Times the Borrower has Availed Loan from the Bank,Number of Negative Comments About the Borrower in Credit History,Total Credit Revolving Balance,Late Fees Received To Date,Last Amount Received as Payment,rm_employee_id,Loan Status
0,1444,385182,51,9955.33,0,934125,1,0,0,9,0,193772,0,310882.11,1,1
1,11495,79080,44,2158.89,1,424889,0,0,0,10,0,135665,0,2163.4,1,1
2,14082,109993,46,3189.16,0,1539008,1,0,0,14,0,722849,0,3469.55,1,1


In [139]:
loan.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 24000 entries, 0 to 23999
Data columns (total 16 columns):
Loan Application Number                                             24000 non-null int64
Loan Amount                                                         24000 non-null int64
Loan Duration                                                       24000 non-null int64
EMI                                                                 24000 non-null float64
Home Ownership of Borrower                                          24000 non-null int64
Annual Income of Borrower                                           24000 non-null float64
Borrower's Verification Status                                      24000 non-null int64
Borrower Delinquency in Last Two Years                              23983 non-null float64
Number of Months Since the Borrower's Last Delinquency              24000 non-null float64
Number of Times the Borrower has Availed Loan from the Bank         23983 non-null

In [140]:
""" Fix Nan """
loan['Borrower Delinquency in Last Two Years'].fillna(0, inplace=True)
loan['Number of Times the Borrower has Availed Loan from the Bank'].fillna(0, inplace=True)
loan['Number of Negative Comments About the Borrower in Credit History'].fillna(0, inplace=True)
loan['Total Credit Revolving Balance'].fillna(0, inplace=True)
loan['Number of Months Since the Borrower\'s Last Delinquency'].fillna(0, inplace=True)

In [141]:
""" Splitting the trainning data into test and training set """
X = loan.drop('Loan Status',axis=1)
y = loan['Loan Status']
X_train,X_test, y_train,y_test = train_test_split(X, y, test_size=0.40, random_state=101)

In [142]:
 """importing the model"""
from sklearn.linear_model import LogisticRegression

logmodel = LogisticRegression()
logmodel.fit(X_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [143]:
 """now predict the output on training dataset"""
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

predict = logmodel.predict(X_train)
print(classification_report(y_train,predict))

             precision    recall  f1-score   support

          0       0.53      0.08      0.14      2091
          1       0.86      0.99      0.92     12309

avg / total       0.81      0.86      0.81     14400



In [144]:
""" Checking the Accuracy of the model """
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
print(confusion_matrix(y_train,predict))
print("Accuracy Score: %f" % accuracy_score(y_train,predict))
print("F1 Score: %f" % f1_score(y_train,predict))

[[  168  1923]
 [  150 12159]]
Accuracy Score: 0.856042
F1 Score: 0.921450


# Now Predicting the Loan Evaluation dataset

In [145]:
""" Predition of Evaluation of Loan Data """
loan_eva = pd.read_csv('C:/vikas/data/ML/loan_evaluation02.csv')

In [146]:
loan_eva.drop(['Borrower\'s Duration of Employment','Borrower Rating by Bank',
'Last Month Payment was Received','Purpose of Loan','Loan Issue Date',
'Number of Times the Borrower has Availed Loan from All Banks',
'Percentage of Credit the Borrower is Using Relative to All Available Revolving Credit', 
'Date of Borrower\'s First  Loan','Interest Rate',
'The Number of Months Since the Borrower\'s Credit Record  was Updated',
'Debt-to-Income Ratio (A ratio calculated based on Borrower\'s monthly debt repayments to self-reported monthly income)'],axis=1, inplace=True)
loan_eva['Number of Months Since the Borrower\'s Last Delinquency'].fillna(0, inplace=True)
loan_eva.head(3)

Unnamed: 0,Loan Application Number,Loan Amount,Loan Duration,EMI,Home Ownership of Borrower,Annual Income of Borrower,Borrower's Verification Status,Borrower Delinquency in Last Two Years,Number of Months Since the Borrower's Last Delinquency,Number of Times the Borrower has Availed Loan from the Bank,Number of Negative Comments About the Borrower in Credit History,Total Credit Revolving Balance,Late Fees Received To Date,Last Amount Received as Payment,rm_employee_id,Loan Status
0,30950,65888,60 months,1431.584629,Rental,339594,Verified,0,0,5,0,202068,0,1109.504629,1684,???
1,24049,11152,43 months,314.576057,Own,182048,Not Verified,0,38,13,0,34474,0,5963.406057,3867,???
2,38793,43872,45 months,1100.367522,Rental,721041,Not Verified,0,0,7,0,18086,0,29124.40752,230,???


In [147]:
def verified(string):
    if 'Not' in string:
        return 0
    else :
        return 1
    
def fix_borrower(string):
    if string in 'Under Mortgage':
        return 0
    elif string in 'Own':
        return 2
    if string in 'Rental':
        return 1
    if string in 'Other':
        return -1
    if string in 'Info Not Available':
        return -2
    
def fix_loan(string):
    return int(string.split()[0])

def fix_float(string):
    return round(string,2)

loan_eva['EMI'] = loan_eva['EMI'].apply(fix_float)
loan_eva['Last Amount Received as Payment'] = loan_eva['Last Amount Received as Payment'].apply(fix_float)
loan_eva['Loan Duration'] = loan_eva['Loan Duration'].apply(fix_loan)
loan_eva['Home Ownership of Borrower'] = loan_eva['Home Ownership of Borrower'].apply(fix_borrower)
loan_eva['Borrower\'s Verification Status'] = loan_eva['Borrower\'s Verification Status'].apply(verified)

In [148]:
""" Fix Nan """
loan_eva['Borrower Delinquency in Last Two Years'].fillna(0, inplace=True)
loan_eva['Number of Times the Borrower has Availed Loan from the Bank'].fillna(0, inplace=True)
loan_eva['Number of Negative Comments About the Borrower in Credit History'].fillna(0, inplace=True)
loan_eva['Total Credit Revolving Balance'].fillna(0, inplace=True)

In [150]:
X_eva = loan_eva.drop('Loan Status',axis=1)
predict_eva = logmodel.predict(X_eva)
X_eva['Loan Status'] = predict_eva
X_eva.head()

Unnamed: 0,Loan Application Number,Loan Amount,Loan Duration,EMI,Home Ownership of Borrower,Annual Income of Borrower,Borrower's Verification Status,Borrower Delinquency in Last Two Years,Number of Months Since the Borrower's Last Delinquency,Number of Times the Borrower has Availed Loan from the Bank,Number of Negative Comments About the Borrower in Credit History,Total Credit Revolving Balance,Late Fees Received To Date,Last Amount Received as Payment,rm_employee_id,Loan Status
0,30950,65888,60,1431.58,1,339594,1,0,0,5,0,202068,0,1109.5,1684,1
1,24049,11152,43,314.58,2,182048,0,0,38,13,0,34474,0,5963.41,3867,1
2,38793,43872,45,1100.37,1,721041,0,0,0,7,0,18086,0,29124.41,230,1
3,27884,123059,60,3117.46,1,597839,1,0,0,7,0,115711,0,3117.46,142,1
4,38157,76870,56,2069.76,1,372235,1,0,0,16,0,44331,0,40498.81,2874,1


In [152]:
X_eva.to_csv('C:/vikas/data/ML/loan_evaluation02_results.csv',index=False)

# DONE !!!