# Loanbook Exploratory Analysis

In [None]:
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

pd.options.display.max_rows = 125

Load loan data from CSV

In [None]:
loandata = pd.read_csv("data/loandata.csv", low_memory=False)

First look on data:

In [None]:
loandata.head()

In [None]:
loandata.shape

All the columns with types and number of unique values:

In [None]:
types = [loandata[c].dtype for c in loandata.columns]
uniques = [loandata[c].unique().size for c in loandata.columns]
nans = [loandata[c].isnull().sum() for c in loandata.columns]
# + ", " + str(loandata[i].unique().size))
    
columns = pd.DataFrame(data={'names': loandata.columns, 'types': types, 'uniques': uniques, 'nans': nans})
columns = columns.set_index('names').sort_values(by='nans')
columns.head(120)

Analyse missing values:

In [None]:
missingvalues = (columns['nans'] / len(loandata.index) * 100)
missingvalues = missingvalues[missingvalues > 5].sort_values(ascending=False)
missingvalues.plot(kind='bar', figsize=(20, 2))

In [None]:
missingvalues

Let's have a look at some of the columns:

In [None]:
loandata['yearmonth'] = pd.to_datetime(loandata['ListedOnUTC']).dt.to_period('M')

Number of loans in countries per YearMonth:

In [None]:
loandata.groupby(['yearmonth', 'Country']).size().unstack(1).sort_index(ascending=True).fillna(0).plot(figsize=(16, 5))

In [None]:
r = pd.concat([loandata['yearmonth'], loandata[['Rating', 'Rating_V0', 'Rating_V1', 'Rating_V2']].notnull()], axis=1)

Different types of rating per YearMonth:

In [None]:
g = r.groupby('yearmonth').sum()
g = g[(g.T != 0).any()]
g.head()

In [None]:
g.plot(figsize=(16,4))