In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')

In [None]:
data_info = pd.read_csv('../input/lendingclub/lending_club_info.csv', index_col='LoanStatNew')

In [None]:
def feat_info(x):
    print(data_info.loc[x]['Description'])

In [None]:
feat_info('mort_acc')

In [None]:
df = pd.read_csv('../input/lendingclub/lending_club_loan_two.csv')
df.info()

In [None]:
df.describe().transpose()

In [None]:
sns.countplot('loan_status', data = df)
plt.show()

In [None]:
df['loan_amnt'].hist(bins = 30)
plt.show()

In [None]:
df.corr()

In [None]:
plt.figure(figsize=(8,8))
sns.heatmap(df.corr(), annot=True)
plt.show()
plt.tight_layout()

In [None]:
sns.scatterplot('installment', 'loan_amnt', data = df)
plt.show()

In [None]:
feat_info('installment')

In [None]:
feat_info('loan_amnt')

In [None]:
sns.boxplot('loan_status', 'loan_amnt', data = df)
plt.show()

In [None]:
df.groupby('loan_status').describe()['loan_amnt']

In [None]:
df['grade'].unique()

In [None]:
sns.countplot('grade', data = df, hue = 'loan_status', order = sorted(df['grade'].unique()))
plt.show()

In [None]:
plt.figure(figsize = (10,4))
sns.countplot('sub_grade', data = df, order = sorted(df['sub_grade'].unique()))
plt.show()

In [None]:
plt.figure(figsize = (10,4))
sns.countplot('sub_grade', data = df, order = sorted(df['sub_grade'].unique()), hue = 'loan_status')
plt.show()

In [None]:
plt.figure(figsize = (10,4))
sns.countplot('sub_grade', data = df[(df['grade'] == 'F') | (df['grade'] == 'G')], hue = 'loan_status', order = sorted(df[(df['grade'] == 'F') | (df['grade'] == 'G')]['sub_grade'].unique()))
plt.show()

In [None]:
df['loan_status'] = df['loan_status'].map({'Fully Paid':1, 'Charged Off':0})

In [None]:
df['loan_status']

In [None]:
sorted(df.corr())

In [None]:
df.corr()['loan_status'].sort_values().drop('loan_status').plot(kind = 'bar')
plt.show()

In [None]:
df.columns[df.isnull().any()]

In [None]:
df.isnull().mean()*100

In [None]:
df['emp_title'].nunique()

In [None]:
df.drop('emp_title', axis = 1, inplace = True)

In [None]:
plt.figure(figsize = (10,4))
sns.countplot('emp_length', data = df,hue = 'loan_status', order = ['< 1 year','1 year','2 years','3 years','4 years','5 years','6 years','7 years','8 years','9 years','10+ years'])
plt.show()


In [None]:
emp = df[df['loan_status'] == 0].groupby('emp_length').count()['loan_status'] / df.groupby('emp_length').count()['loan_status']

In [None]:
emp.plot(kind = 'bar')
plt.show()

In [None]:
df.drop('emp_length', axis = 1, inplace=True)

In [None]:
df['title'].nunique()

In [None]:
feat_info('purpose')
print('/n')
feat_info('title')

In [None]:
df.drop('title', axis = 1, inplace=True)

In [None]:
df.isnull().sum()

In [None]:
df['mort_acc'].unique()

In [None]:
df.corr()['mort_acc'].sort_values()

In [None]:
tot = df.groupby('total_acc').mean()['mort_acc']

In [None]:
def fun(x,y):
    if np.isnan(y):
        return tot[x]
    else:
        return x

In [None]:
df['mort_acc'] = df.apply(lambda x: fun(x['total_acc'], x['mort_acc']), axis = 1)

In [None]:
df.isnull().sum()

In [None]:
df.dropna(inplace = True)

In [None]:
df.isnull().sum()

In [None]:
stcol = []
for i in df.columns:
    if df[i].dtype == 'object':
        stcol.append(i)
stcol

In [None]:
df['term'].unique()

In [None]:
df['term'] = df['term'].apply(lambda x: int(x[:3]))

In [None]:
df.drop('grade', axis = 1, inplace = True)

In [None]:
df

In [None]:
sub_dummies = pd.get_dummies(df['sub_grade'], drop_first = True)
df = pd.concat([df,sub_dummies],axis=1)
df.drop("sub_grade", axis=1, inplace=True)

In [None]:
df.select_dtypes(['object']).columns

In [None]:
ver_dummies = pd.get_dummies(df['verification_status'], drop_first = True)
df = pd.concat([df,ver_dummies],axis=1)
df.drop("verification_status", axis=1, inplace=True)

app_dummies = pd.get_dummies(df['application_type'], drop_first = True)
df = pd.concat([df,app_dummies],axis=1)
df.drop("application_type", axis=1, inplace=True)

init_dummies = pd.get_dummies(df['initial_list_status'], drop_first = True)
df = pd.concat([df,init_dummies],axis=1)
df.drop("initial_list_status", axis=1, inplace=True)

purp_dummies = pd.get_dummies(df['purpose'], drop_first = True)
df = pd.concat([df,purp_dummies],axis=1)
df.drop("purpose", axis=1, inplace=True)

In [None]:
df.select_dtypes(['object']).columns

In [None]:
def fn(x):
    if x == 'ANY' or x == 'NONE':
        return('OTHER')
    else:
        return(x)
    

In [None]:
df['home_ownership'] = df['home_ownership'].apply(lambda x: fn(x))

In [None]:
home_dummies = pd.get_dummies(df['home_ownership'], drop_first = True)
df = pd.concat([df,home_dummies],axis=1)
df.drop("home_ownership", axis=1, inplace=True)

In [None]:
df['zip'] = df['address'].apply(lambda x: x[len(x)-5:])

In [None]:
df.drop('address', axis = 1, inplace = True)

In [None]:
zip_dummies = pd.get_dummies(df['zip'], drop_first = True)
df = pd.concat([df,zip_dummies],axis=1)
df.drop("zip", axis=1, inplace=True)

In [None]:
df.drop('issue_d', axis = 1, inplace = True)

In [None]:
df['earliest_cr_line'][0][4:]

In [None]:
df['earliest_cr_year'] = df['earliest_cr_line'].apply(lambda x: int(x[4:]))

In [None]:
df.drop('earliest_cr_line', axis = 1, inplace = True)

In [None]:
df.info()

In [None]:
df2 = df.sample(frac=0.1,random_state=101)

In [None]:
from sklearn.model_selection import train_test_split
x = df2.drop('loan_status', axis = 1).values
y = df2['loan_status'].values


In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state = 101)

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
model = Sequential()
model.add(Dense(78 ,activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(39 ,activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(19 ,activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation = 'sigmoid'))
model.compile(optimizer = 'adam', loss = 'binary_crossentropy')


In [None]:
from tensorflow.keras.callbacks import EarlyStopping
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)
model.fit(x=x_train, 
          y=y_train, 
          epochs=100,
          validation_data=(x_test, y_test), verbose=1,
          callbacks=[early_stop]
          )


In [None]:
losses = pd.DataFrame(model.history.history)
losses[['loss','val_loss']].plot()

In [None]:
from sklearn.metrics import classification_report,confusion_matrix
predictions = model.predict_classes(x_test)
print(classification_report(y_test,predictions))
confusion_matrix(y_test,predictions)