# Intro to plotly

Probably for a separate tutorial session...

In [220]:
import numpy as np
import pandas as pd

In [2]:
from plotly import __version__
# from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.offline as py
import plotly.graph_objs as go

In [3]:
print(__version__)

3.5.0


In [4]:
py.init_notebook_mode(connected=True)

There are two basic plotting methods:

* `py.plot` generates HTML that is saved locally and viewed in your browser
* `py.iplot` generates HTML that is displayed in a notebook

In [5]:
# When working in a notebook, use iplot rather than plot
py.iplot([go.Scatter(x=[1, 2, 3], y=[3, 1, 6])])

In [6]:
trace0 = go.Scatter(
    x=[1, 2, 3, 4],
    y=[10, 15, 13, 17]
)
trace1 = go.Scatter(
    x=[1, 2, 3, 4],
    y=[16, 5, 11, 9]
)
data = [trace0, trace1]

In [7]:
py.iplot(data)

### Cufflinks allows you to use pandas DataFrames directly with plotly

In [8]:
import cufflinks as cf

In [9]:
df = cf.datagen.lines()
df.head()

Unnamed: 0,SFR.LE,RYW.RG,GCA.HV,COI.DD,WHK.OR
2015-01-01,-0.422548,-0.892031,0.425958,-0.736781,0.187925
2015-01-02,0.707531,-1.143871,0.918376,-0.668325,-1.677241
2015-01-03,1.45466,-1.824023,1.774993,-1.641642,-1.729927
2015-01-04,1.833125,-1.101095,3.049638,-2.341732,-1.131286
2015-01-05,1.89106,-0.463576,2.63134,-1.941555,-1.691486


In [10]:
py.iplot(df.iplot(asFigure=True, kind='scatter', xTitle='Dates', yTitle='Returns', title='Returns'))

### Tableau superstore dataset

In [11]:
try:
    superstore = pd.read_excel('data/superstore.xlsx')
except FileNotFoundError:
    superstore = pd.read_excel('https://query.data.world/s/n2pyux2nabxy4c43zl3uugxsk5gt6v')

In [12]:
superstore.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51290 entries, 0 to 51289
Data columns (total 24 columns):
Row ID            51290 non-null int64
Order ID          51290 non-null object
Order Date        51290 non-null datetime64[ns]
Ship Date         51290 non-null datetime64[ns]
Ship Mode         51290 non-null object
Customer ID       51290 non-null object
Customer Name     51290 non-null object
Segment           51290 non-null object
Postal Code       9994 non-null float64
City              51290 non-null object
State             51290 non-null object
Country           51290 non-null object
Region            51290 non-null object
Market            51290 non-null object
Product ID        51290 non-null object
Category          51290 non-null object
Sub-Category      51290 non-null object
Product Name      51290 non-null object
Sales             51290 non-null float64
Quantity          51290 non-null int64
Discount          51290 non-null float64
Profit            51290 non-null fl

In [191]:
superstore.nunique()

Row ID            51290
Order ID          25728
Order Date         1430
Ship Date          1464
Ship Mode             4
Customer ID       17415
Customer Name       796
Segment               3
Postal Code         631
City               3650
State              1102
Country             165
Region               23
Market                5
Product ID         3788
Category              3
Sub-Category         17
Product Name       3788
Sales             27200
Quantity             14
Discount             29
Profit            28234
Shipping Cost     16753
Order Priority        4
dtype: int64

In [190]:
superstore.Segment.value_counts()

Consumer       26518
Corporate      15429
Home Office     9343
Name: Segment, dtype: int64

### Plot a line chart of sales by segment

#### First, just do one series

In [14]:
# Summarise by month, then group by Segment so we can iterate through the groups
sales = superstore.set_index('Order Date').groupby('Segment').resample('1M')['Sales'].sum().reset_index().groupby('Segment')

In [15]:
# Get just the Consumer segment
group = sales.get_group('Consumer') 

In [16]:
data = [go.Scatter(x=group['Order Date'], y=group['Sales'], name='Consumer')]

In [17]:
py.iplot(data)

#### Now several series

In [18]:
data = [go.Scatter(x=group['Order Date'], y=group['Sales'], name=key) for key, group in sales]

In [19]:
# Set the layout
layout = dict(title = 'Monthly sales by segment',
              xaxis = dict(title = 'Date'),
              yaxis = dict(title = 'Sales ($)'),
              )

In [20]:
fig = dict(data=data, layout=layout)
py.iplot(fig)

### Now make the chart interactive - display one line at a time

Based on the plotly examples [here](https://plot.ly/python/dropdowns/)

The `updatemenus` object holds details of the dropdown box we'll use to filter the chart. First, we'll hard-code it - then we'll look at a more automated way.

In [51]:
superstore.Segment.value_counts()

Consumer       26518
Corporate      15429
Home Office     9343
Name: Segment, dtype: int64

In [175]:
updatemenus = list([
    dict(active=3, # Sets the initially active item
         type='buttons',
         direction='right',
         xanchor='left',
         yanchor='top',
         buttons=list([   
            dict(label = 'Consumer',
                 method = 'update',
                 args = [{'visible': [True, False, False]}, {'title': 'Consumer'}]),
            dict(label = 'Corporate',
                 method = 'update',
                 args = [{'visible': [False, True, False]}, {'title': 'Corporate'}]),
            dict(label = 'Home Office',
                 method = 'update',
                 args = [{'visible': [False, False, True]}, {'title': 'Home Office'}]),
            dict(label = 'All',
                 method = 'update',
                 args = [{'visible': [True, True, True]}, {'title': 'All Segments'}]),
        ]),
    )
])


In [176]:
updatemenus

[{'active': 3,
  'type': 'buttons',
  'direction': 'right',
  'xanchor': 'left',
  'yanchor': 'top',
  'buttons': [{'label': 'Consumer',
    'method': 'update',
    'args': [{'visible': [True, False, False]}, {'title': 'Consumer'}]},
   {'label': 'Corporate',
    'method': 'update',
    'args': [{'visible': [False, True, False]}, {'title': 'Corporate'}]},
   {'label': 'Home Office',
    'method': 'update',
    'args': [{'visible': [False, False, True]}, {'title': 'Home Office'}]},
   {'label': 'All',
    'method': 'update',
    'args': [{'visible': [True, True, True]}, {'title': 'All Segments'}]}]}]

In [177]:
updatemenus_working = updatemenus.copy()

In [178]:
# Set the layout
layout = dict(title = 'Monthly sales by segment',
              xaxis = dict(title = 'Date'),
              yaxis = dict(title = 'Sales ($)'),
              )
layout['updatemenus'] = updatemenus

In [179]:
fig = dict(data=data, layout=layout)
py.iplot(fig)

### Obviously we should not be hard-coding the values, so here's a more general method:

In [197]:
# Define all elements of updatemenus except for the buttons
updatemenus = list([
    dict(active=0, 
         type='buttons',
         direction='right',
         xanchor='left',
         yanchor='top')
])

In [198]:
# Now add buttons
def make_buttons(menu_items, include_all=True, all_title='All values'):
    """ menu_items = a list of strings, one for each dropdown value. """
    b = []
    
    if include_all:
        b.append(dict(label = "All",
                 method = 'update', 
                 args = [{'visible': [True] * len(menu_items)}, {'title': all_title}])
                )

    for i in menu_items:
        d = dict(label = i,
                 method = 'update', 
                 args = [{'visible': [i == z for z in menu_items]}, {'title': i}])
        b.append(d)
        
    return b

In [199]:
segments = superstore.Segment.unique()
b = make_buttons(segments)

In [200]:
updatemenus[0]['buttons'] = b

In [201]:
updatemenus 

[{'active': 0,
  'type': 'buttons',
  'direction': 'right',
  'xanchor': 'left',
  'yanchor': 'top',
  'buttons': [{'label': 'All',
    'method': 'update',
    'args': [{'visible': [True, True, True]}, {'title': 'All values'}]},
   {'label': 'Consumer',
    'method': 'update',
    'args': [{'visible': [True, False, False]}, {'title': 'Consumer'}]},
   {'label': 'Corporate',
    'method': 'update',
    'args': [{'visible': [False, True, False]}, {'title': 'Corporate'}]},
   {'label': 'Home Office',
    'method': 'update',
    'args': [{'visible': [False, False, True]}, {'title': 'Home Office'}]}]}]

In [202]:
# Set the layout
layout = dict(title = 'Monthly sales by segment',
              xaxis = dict(title = 'Date'),
              yaxis = dict(title = 'Sales ($)'),
              )
layout['updatemenus'] = updatemenus

In [203]:
fig = dict(data=data, layout=layout)
py.iplot(fig)

### Filtering by parent-child relationships

That example demonstrated the principles, but it not really useful in practice. A much more common scenario is where you want to filter a large number of groups down to a smaller set, e.g, display all the sub-categories for a particular product category.

So we will modify our make_buttons function to take a dictionary of parent and child values for menu_items, e.g.

`menu_items = {'cats': ['fluffy', 'tiddles', 'whiskers'], 'dogs': ['rover', 'fido']}`

In [291]:
data = [go.Scatter(x=group['Order Date'], y=group['Sales'], name=key) for key, group in sales]

When creating the button items, we should refer back to the data object, to ensure that we get all the categories in the right order

In [293]:
def get_lookups(df, parent_col, child_col):
    g = df[[parent_col, child_col]].drop_duplicates().groupby(parent_col)
    
    d = dict()
    for key, group in g:
        d[key] = list(group['Sub-Category'])
        
    return d

In [294]:
cats = get_lookups(superstore, 'Category', 'Sub-Category')

In [295]:
cats

{'Furniture': ['Chairs', 'Tables', 'Bookcases', 'Furnishings'],
 'Office Supplies': ['Art',
  'Appliances',
  'Storage',
  'Fasteners',
  'Binders',
  'Labels',
  'Paper',
  'Supplies',
  'Envelopes'],
 'Technology': ['Phones', 'Copiers', 'Machines', 'Accessories']}