In [1]:
import pandas as pd
from bokeh.plotting import figure, output_file, show, output_notebook, ColumnDataSource

In [2]:
output_notebook()

In [62]:
data = pd.read_csv('prosperLoanData.csv')

Index([u'ListingKey', u'ListingNumber', u'ListingCreationDate', u'CreditGrade',
       u'Term', u'LoanStatus', u'ClosedDate', u'BorrowerAPR', u'BorrowerRate',
       u'LenderYield', u'EstimatedEffectiveYield', u'EstimatedLoss',
       u'EstimatedReturn', u'ProsperRating (numeric)',
       u'ProsperRating (Alpha)', u'ProsperScore', u'ListingCategory (numeric)',
       u'BorrowerState', u'Occupation', u'EmploymentStatus',
       u'EmploymentStatusDuration', u'IsBorrowerHomeowner',
       u'CurrentlyInGroup', u'GroupKey', u'DateCreditPulled',
       u'CreditScoreRangeLower', u'CreditScoreRangeUpper',
       u'FirstRecordedCreditLine', u'CurrentCreditLines', u'OpenCreditLines',
       u'TotalCreditLinespast7years', u'OpenRevolvingAccounts',
       u'OpenRevolvingMonthlyPayment', u'InquiriesLast6Months',
       u'TotalInquiries', u'CurrentDelinquencies', u'AmountDelinquent',
       u'DelinquenciesLast7Years', u'PublicRecordsLast10Years',
       u'PublicRecordsLast12Months', u'RevolvingCredi

In [39]:
df = data[['BorrowerAPR', 'LoanStatus', 'Term', 'EstimatedLoss', 'EstimatedReturn', 'ProsperRating (Alpha)',\
          'Occupation', 'BorrowerState','EmploymentStatus', 'StatedMonthlyIncome', 'DebtToIncomeRatio',\
          'IncomeRange', 'OnTimeProsperPayments', 'TotalProsperLoans']]

### Basic Bokeh Plotting

In [42]:
p = figure(plot_width=950, plot_height=400, x_axis_label='Estimated Loss', y_axis_label='Borrower APR')
p.circle(df['EstimatedLoss'], df['BorrowerAPR'], size=5, color="navy", alpha=0.5)
show(p)

### Plotting with Hoverover

In [43]:
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource, HoverTool, LabelSet
from bokeh.plotting import figure
from bokeh.palettes import Spectral5
from bokeh.transform import factor_cmap

df.EmploymentStatus = df.EmploymentStatus.astype(str)

group = df.groupby('EmploymentStatus')

source = ColumnDataSource(group)

TOOLS = "pan,wheel_zoom,box_zoom,reset,save"

p = figure(plot_width=950, plot_height=400, title="Employment status",
           x_range=group, tools=TOOLS, toolbar_location="above")

p.vbar(x='EmploymentStatus', top='StatedMonthlyIncome_mean', width=1, source=source, line_color="white")


p.y_range.start = 0
p.x_range.range_padding = 0.09
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Occupation"
p.xaxis.major_label_orientation = 1.3
p.outline_line_color = None

p.add_tools(HoverTool(tooltips=[("Salary_AVG", "@StatedMonthlyIncome_mean"), ("Term, Employ", "@EmploymentStatus")]))

show(p)

ImportError: No module named transform

In [None]:
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource, HoverTool, LabelSet
from bokeh.plotting import figure
from bokeh.palettes import Spectral5
from bokeh.transform import factor_cmap

df.Term = df.Term.astype(str)
df.EmploymentStatus = df.EmploymentStatus.astype(str)

group = df.groupby(('Term', 'EmploymentStatus'))

source = ColumnDataSource(group)

# Use index_cmap for color
index_cmap = factor_cmap('Term_EmploymentStatus', palette=Spectral5, factors=sorted(df.Term.unique()), end=1)

TOOLS = "pan,wheel_zoom,box_zoom,reset,save"

p = figure(plot_width=950, plot_height=400, title="Loan Sttaus vs Employment status",
           x_range=group, tools=TOOLS, toolbar_location="above")

p.vbar(x='Term_EmploymentStatus', top='StatedMonthlyIncome_mean', width=1, source=source, line_color="white", \
       fill_color=index_cmap)


p.y_range.start = 0
p.x_range.range_padding = 0.09
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Employees Terms vs Occupation"
p.xaxis.major_label_orientation = 1.3
p.outline_line_color = None

p.add_tools(HoverTool(tooltips=[("Salary_AVG", "@StatedMonthlyIncome_mean"), ("Term, Employ", "@Term_EmploymentStatus")]))

show(p)

### Plotting with Labels

In [44]:

APR_State = df.groupby('BorrowerState').BorrowerAPR.mean().to_frame().reset_index()
Loss_State = df.groupby('BorrowerState').EstimatedLoss.mean().to_frame().reset_index()

df_new = APR_State.merge(Loss_State, on='BorrowerState')


TOOLS = "pan,wheel_zoom,box_zoom,reset,save"
TITLE = "Estimated Loss vs APR grouped by State"

p = figure(tools=TOOLS, toolbar_location="above", logo="grey", plot_width=950, title=TITLE)
p.background_fill_color= "#dddddd"
p.xaxis.axis_label="Average Estimated Loss"
p.yaxis.axis_label="Average APR"
p.grid.grid_line_color="white"

source = ColumnDataSource(df_new)

p.circle("BorrowerAPR", "EstimatedLoss", size=12, source=source, line_color="black", fill_alpha=0.8)

labels = LabelSet(x="BorrowerAPR", y="EstimatedLoss", text="BorrowerState", y_offset=8,
                  text_font_size="8pt", text_color="#555555",
                  source=source, text_align='center')
p.add_layout(labels)

show(p)


In [45]:
APR_State = df.groupby('BorrowerState').BorrowerAPR.mean().to_frame().reset_index()
Income_State = df.groupby('BorrowerState').StatedMonthlyIncome.mean().to_frame().reset_index()

df_new = APR_State.merge(Income_State, on='BorrowerState')

mapping_palette =  {0:"#053061", 1:"#2166ac", 2:"#4393c3", 3:"#92c5de", 4:"#d1e5f0",
           5:"#f7f7f7", 6:"#fddbc7", 7:"#f4a582", 8:"#d6604d", 9:"#b2182b", 10:"#67001f"}


low = min(df_new.StatedMonthlyIncome)
high= max(df_new.StatedMonthlyIncome)
color_code = 10*(df_new.StatedMonthlyIncome-low)/(high-low)
color_code.astype(int)
df_new['color'] = color_code.round(0).astype(int)
df_new['color'] = df_new['color'].map( mapping_palette )

TOOLS = "pan,wheel_zoom,box_zoom,reset,save"
TITLE = "Estimated Loss vs APR grouped by State"

p = figure(tools=TOOLS, toolbar_location="above", logo="grey", plot_width=950, title=TITLE)
p.background_fill_color= "#dddddd"
p.xaxis.axis_label="Average Stated Monthly Income"
p.yaxis.axis_label="Average APR"
p.grid.grid_line_color="white"

source = ColumnDataSource(df_new)

p.circle("BorrowerAPR", "StatedMonthlyIncome", size=12, source=source, line_color="black", fill_alpha=0.8, color='color')

labels = LabelSet(x="BorrowerAPR", y="StatedMonthlyIncome", text="BorrowerState", y_offset=8,
                  text_font_size="8pt", text_color="#555555",
                  source=source, text_align='center')
p.add_layout(labels)

show(p)