# DATA VISUALIZATION WITH BOKEH

In [1]:
import numpy as np
import pandas as pd
from bokeh.io import output_notebook, push_notebook
from bokeh.plotting import figure, output_file, show, output_notebook
from bokeh.plotting import ColumnDataSource
from bokeh.layouts import row,column,gridplot,widgetbox
from bokeh.models.widgets import Tabs, Panel,Button,Dropdown,CheckboxGroup,\
                                RadioGroup,Slider,TextInput,Select,MultiSelect

from bokeh.models import DateSlider, DateRangeSlider
from datetime import date
from datetime import datetime
from bokeh.palettes import Category20 as palette
import itertools
from bokeh.models import CategoricalColorMapper,CustomJS
from numpy.random import random, normal
from bokeh.io import curdoc
from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application
output_notebook()

In [8]:
df = pd.read_csv('all_stocks_5yr.csv')

df['date']=pd.to_datetime(df['date'], infer_datetime_format=True)
#Filtering for apple stocks

df_apple = df[df['Name'] == 'AAL']

#Create the ColumnDataSource object

data = ColumnDataSource(data = {
    'x' : df_apple['high'],
    'y' : df_apple['low'],
    'x1': df_apple['volume']
    
})

#Creating the scatter plot 

plot = figure(title = 'Attribute selector application')

plot.diamond('x', 'y', source = data, color = 'red')

#Creating the select widget

select_x = Select(options = ['low', 'high','close','open','volume'], value = 'low', title = 'Select a new x axis attribute')
select_y = Select(options = ['high', 'low','open','close','volume'], value = 'high', title = 'Select a new y axis attribute')
#Define the callback function

def callback(attr, old, new):
    name_x=select_x.value
    name_y=select_y.value
    data.data = {'x' : df_apple[name_x], 'y': df_apple[name_y]}
    
    #if new == 'low':
    #data.data = {'x' : df_apple['high'], 'y': df_apple['low']}
    #else:
    #data.data = {'x' : df_apple['high'], 'y': df_apple['volume']}
        
#select_widget.on_change('value', callback)


#Add the layout to the application

layout = row(column(select_x,select_y), plot)

def modify_doc(doc):
    doc.add_root(layout)
    doc.title = "Sliders"
    select_x.on_change('value', callback)
    select_y.on_change('value', callback)


handler = FunctionHandler(modify_doc)
app = Application(handler)
show(app)

In [3]:
date_min=df['date'].min()

In [4]:
date_max=df['date'].max()

In [5]:
df_new=df[(df['date']>date_min)&(df['date']<date_max)]

In [6]:
df_new.head()

Unnamed: 0,date,open,high,low,close,volume,Name
1,2012-08-14,92.36,92.5,92.01,92.3,1843476.0,MMM
2,2012-08-15,92.0,92.74,91.94,92.54,1983395.0,MMM
3,2012-08-16,92.75,93.87,92.21,93.74,3395145.0,MMM
4,2012-08-17,93.93,94.3,93.59,94.24,3069513.0,MMM
5,2012-08-20,94.0,94.17,93.55,93.89,1640008.0,MMM


In [7]:
data = ColumnDataSource(data = {
    'x' : df_apple['high'],
    'y' : df_apple['low'],
    'date': df_apple['date']
})

#Creating the scatter plot 

plot = figure(plot_width=300, plot_height=200,title = 'Attribute selector application')
plot.circle('x', 'y', source = data, color = 'red')

plot1 = figure(plot_width=300, plot_height=200,title = 'Date vs x', x_axis_type = 'datetime')
plot1.line('date', 'x', source = data, color = 'red')

plot2 = figure(plot_width=300, plot_height=200,title = 'Date vs y', x_axis_type = 'datetime')
plot2.line('date', 'y', source = data, color = 'red')

#Creating the select widget

select_x = Select(options = ['low', 'high','close','open','volume'], value = 'low', title = 'Select a new x axis attribute')
select_y = Select(options = ['high', 'low','open','close','volume'], value = 'high', title = 'Select a new y axis attribute')
#Define the callback function

#Creating the select widget date
start_slider = DateSlider(title="Date Start: ", start=date_min, end=date_max, value=date_min, step=1)
end_slider = DateSlider(title="Date End: ", start=start_slider.value, end=date_max, value=date_max, step=1)
#date_slider=DateRangeSlider(title="Date Range: ", start=date_min, end=date_max, value=(date_min,date_max), step=1)

#datetime.combine(d, datetime.min.time())
#my_datetime = datetime(my_date.year, my_date.month, my_date.day)


def callback(attr, old, new):
    name_x=select_x.value
    name_y=select_y.value
    
    date_start=datetime.combine(start_slider.value,datetime.min.time())
    date_end=datetime.combine(end_slider.value,datetime.min.time())
    
    #first=date_slider.value[0]
    #second=date_slider.value[1]
    #date_range_min=first#datetime.combine(first,datetime.min.time())
    #date_range_max=second#datetime.combine(second,datetime.min.time())
    #date_range_min=date_range.value[0]
    #date_range_max=date_range.value[1]
    
    #df_new=df_apple[(df_apple['date']>=date_start)&(df_apple['date']<=date_end)]
    
    df_new=df_apple[(df_apple['date']>=date_start)&(df_apple['date']<=date_end)]
    data.data = {'x' : df_new[name_x], 'y': df_new[name_y],'date': df_new['date']}
    
    #if new == 'low':
    #data.data = {'x' : df_apple['high'], 'y': df_apple['low']}
    #else:
    #data.data = {'x' : df_apple['high'], 'y': df_apple['volume']}
        
#select_widget.on_change('value', callback)


#Add the layout to the application

layout = column(row(column(select_x,select_y),column(start_slider,end_slider)), row(plot,plot1,plot2))

def modify_doc(doc):
    doc.add_root(layout)
    doc.title = "Sliders"
    select_x.on_change('value', callback)
    select_y.on_change('value', callback)
    
    start_slider.on_change('value', callback)
    end_slider.on_change('value', callback)
    #date_slider.on_change('value', callback)

handler = FunctionHandler(modify_doc)
app = Application(handler)
show(app)

ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1) content: {'events': [{'kind': 'ModelChanged', 'model': {'type': 'Select', 'id': '1042'}, 'attr': 'value', 'new': 'high'}], 'references': []}: RuntimeError('_pending_writes should be non-None when we have a document lock, and we should have the lock when the document changes')
ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1) content: {'events': [{'kind': 'ModelChanged', 'model': {'type': 'Select', 'id': '1043'}, 'attr': 'value', 'new': 'low'}], 'references': []}: RuntimeError('_pending_writes should be non-None when we have a document lock, and we should have the lock when the document changes')
ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1) content: {'events': [{'kind': 'ModelChanged', 'model': {'type': 'Select', 'id': '1043'}, 'attr': 'value', 'new': 'close'}], 'references': []}: RuntimeError('_pending_write

In [8]:
date_slider=DateRangeSlider(title="Date Range: ", start=date_min, end=date_max, value=(date_min,date_max), step=1)

In [9]:
a=date_slider.value[0]

In [10]:
date_slider.value[1]

Timestamp('2017-08-11 00:00:00')

In [11]:
timestamp = datetime.fromtimestamp(1502409600)#.strftime('%Y-%m-%d %H:%M:%S')
#print(timestamp.strftime('%Y-%m-%d %H:%M:%S'))
print(timestamp)

2017-08-11 07:00:00


In [12]:
data = ColumnDataSource(data = {
    'x' : df_apple['high'],
    'y' : df_apple['low'],
    'date': df_apple['date']
})

#Creating the scatter plot 

plot = figure(plot_width=300, plot_height=200,title = 'Attribute selector application')
plot.circle('x', 'y', source = data, color = 'red')

plot1 = figure(plot_width=300, plot_height=200,title = 'Date vs x', ')
plot1.line('date', 'x', source = data, color = 'red')

plot2 = figure(plot_width=300, plot_height=200,title = 'Date vs y', x_axis_type = 'datetime')
plot2.line('date', 'y', source = data, color = 'red')

#Creating the select widget

select_x = Select(options = ['low', 'high','close','open','volume'], value = 'low', title = 'Select a new x axis attribute')
select_y = Select(options = ['high', 'low','open','close','volume'], value = 'high', title = 'Select a new y axis attribute')
#Define the callback function

#Creating the select widget date
#start_slider = DateSlider(title="Date Start: ", start=date_min, end=date_max, value=date_min, step=1)
#end_slider = DateSlider(title="Date End: ", start=start_slider.value, end=date_max, value=date_max, step=1)
date_slider=DateRangeSlider(title="Date Range: ", start=date_min, end=date_max, value=(date_min,date_max), step=1)

#datetime.combine(d, datetime.min.time())
#my_datetime = datetime(my_date.year, my_date.month, my_date.day)

def callback(attr, old, new):
    name_x=select_x.value
    name_y=select_y.value
    
    #date_start=datetime.combine(start_slider.value,datetime.min.time())
    #date_end=datetime.combine(end_slider.value,datetime.min.time())
    
    first=datetime.fromtimestamp(date_slider.value[0]/1000)
    second=datetime.fromtimestamp(date_slider.value[1]/1000)
    #date_range_min=first#datetime.combine(first,datetime.min.time())
    #date_range_max=second#datetime.combine(second,datetime.min.time())
    date_start=first#datetime.combine(int(first/1000),datetime.min.time())
    date_end=second#datetime.combine(int(second/1000),datetime.min.time())
    
    #df_new=df_apple[(df_apple['date']>=date_start)&(df_apple['date']<=date_end)]
    
    df_new=df_apple[(df_apple['date']>=date_start)&(df_apple['date']<=date_end)]
    data.data = {'x' : df_new[name_x], 'y': df_new[name_y],'date': df_new['date']}
    
    #if new == 'low':
    #data.data = {'x' : df_apple['high'], 'y': df_apple['low']}
    #else:
    #data.data = {'x' : df_apple['high'], 'y': df_apple['volume']}
        
#select_widget.on_change('value', callback)


#Add the layout to the application

layout = column(row(column(select_x,select_y),column(date_slider)), row(plot,plot1,plot2))



def modify_doc(doc):
    doc.add_root(layout)
    doc.title = "Sliders"
    select_x.on_change('value', callback)
    select_y.on_change('value', callback)
    
    #start_slider.on_change('value', callback)
    #end_slider.on_change('value', callback)
    date_slider.on_change('value', callback)

handler = FunctionHandler(modify_doc)
app = Application(handler)
show(app)#,notebook_url="localhost:8000")


SyntaxError: EOL while scanning string literal (<ipython-input-12-7bfe8647fb0e>, line 12)

# FULL STOCK DATA SET

In [13]:
df = pd.read_csv('all_stocks_5yr.csv')

In [14]:
df['date']=pd.to_datetime(df['date'], infer_datetime_format=True)

In [15]:
stock_name=list(df['Name'].unique()[:20])

In [16]:
df_select=df[df['Name'].isin(stock_name)]
#source=ColumnDataSource(df_select)
s_name=stock_name[:4]
xs=[]
ys=[]
names=[]
df1=df_select[df_select['Name'].isin(s_name)]

for name in s_name:
    df_s=df1[df1['Name']==name]
    
    xs.append(df_s['date'])
    ys.append(df_s['open'])
    names.append(name)

source=ColumnDataSource({
    'xs': xs,
    'ys': ys,
    'names': names
})

colors = palette[20]

plot = figure(plot_width=500, plot_height=300,title = 'Selector stocks')

#for name in stock_name:
    #df_s=df_select[df_select['Name']==name]
    #plot.line('date','open',source=source,color='red')

plot.multi_line('xs','ys',legend='names',source=source)

plot.legend.location = "top_left"
plot.legend.click_policy="hide"

multi_select = MultiSelect(title="Stocks:", value=[],
                           options=stock_name,
                          height=200,width=100)

def callback(attr, old, new):
    ms_name=multi_select.value

    df_s=df_select[df_select['Name'].isin(ms_name)]
    
    xs=[]
    ys=[]
    names=[]
    for name in ms_name:
        df1=df_s[df_s['Name']==name]
    
        xs.append(df1['date'])
        ys.append(df1['open'])
        names.append(name)
        
    source.data = {'xs': xs,
                  'ys': ys,
                   'names': names
                }

layout=row(multi_select,plot)

def modify_doc(doc):
    doc.add_root(layout)
    multi_select.on_change('value', callback)

handler = FunctionHandler(modify_doc)
app = Application(handler)
show(app)

ERROR:bokeh.server.views.ws:Refusing websocket connection from Origin 'http://localhost:8889';                       use --allow-websocket-origin=localhost:8889 or set BOKEH_ALLOW_WS_ORIGIN=localhost:8889 to permit this; currently we allow origins {'localhost:8888'}


In [17]:
stock_name=list(df['Name'].unique())

In [18]:
df_select=df[df['Name'].isin(stock_name)]
#source=ColumnDataSource(df_select)
s_name=stock_name[:4]
xs=[]
ys=[]
names=[]
df1=df_select[df_select['Name'].isin(s_name)]

for name in s_name:
    df_s=df1[df1['Name']==name]
    
    xs.append(df_s['date'])
    ys.append(df_s['open'])
    names.append(name)
    
colors=palette[20]

n=len(names)
source=ColumnDataSource({
    'xs': xs,
    'ys': ys,
    'names': names,
    'color': colors[:n]
})

plot = figure(plot_width=500, plot_height=300,title = 'Selector stocks')

#for name in stock_name:
    #df_s=df_select[df_select['Name']==name]
    #plot.line('date','open',source=source,color='red')

plot.multi_line('xs','ys',legend='names',source=source,line_color='color')

plot.legend.location = "top_left"
plot.legend.click_policy="hide"

multi_select = MultiSelect(title="Stocks:", value=[],
                           options=stock_name,
                          height=200,width=100)

def callback(attr, old, new):
    ms_name=multi_select.value

    df_s=df_select[df_select['Name'].isin(ms_name)]
    
    xs=[]
    ys=[]
    names=[]
    for name in ms_name:
        df1=df_s[df_s['Name']==name]
    
        xs.append(df1['date'])
        ys.append(df1['open'])
        names.append(name)
    
    n=len(ms_name)  
    
    source.data = {'xs': xs,
                  'ys': ys,
                   'names': names,
                   'color': colors[:n]
                }

layout=row(multi_select,plot)

def modify_doc(doc):
    doc.add_root(layout)
    multi_select.on_change('value', callback)

handler = FunctionHandler(modify_doc)
app = Application(handler)
show(app)

ERROR:bokeh.server.views.ws:Refusing websocket connection from Origin 'http://localhost:8889';                       use --allow-websocket-origin=localhost:8889 or set BOKEH_ALLOW_WS_ORIGIN=localhost:8889 to permit this; currently we allow origins {'localhost:8888'}


In [19]:
colors[:4]

['#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78']