# Interactive Visualisation using Plotly

The plotly Python library (plotly.py) is an interactive, open-source plotting library that supports over 40 unique chart types covering a wide range of statistical, financial, geographic, scientific, and 3-dimensional use-cases.

The main aim of this notebook are:<br />
I.   Plot various charts using plotly.<br />
II.  Adding dropdowns and axis sliders to the charts.<br />
III. Save the visualisations as html as stand-alone interactive visualisations.<br />
IV.  Add multiple plots (subplots) to the same space.<br />

Part I is covered in this notebook. Part II, III and IV is tackled in the second notebook.

In [1]:
import pandas as pd
import random
import numpy as np

from plotly import graph_objs as go
import plotly.figure_factory as ff
import ipywidgets as w
from plotly.subplots import make_subplots
from IPython.display import display
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(connected = True)


Let's assume that the dataframe is a sales data of a small company with three products namely CategoryA, CtagoryB and CategoryC. We now try to plot the sales of the three Categories with different charts using plotly.

In [2]:
df = pd.read_excel("data//sales_data.xlsx")
df['date'] = df['date'].dt.date
df.head()

Unnamed: 0,date,category
0,2020-05-07,CategoryA
1,2020-05-07,CategoryB
2,2020-05-07,CategoryA
3,2020-05-07,CategoryA
4,2020-05-07,CategoryC


# I. Plot different charts using plotly

## 1. Pie and Donut chart

### 1.1 Pie Chart 

In [3]:
plot_pie = df.groupby('category').count().reset_index().rename(columns={'date': 'category_count'})

trace = go.Pie(labels=plot_pie['category'], values=plot_pie['category_count'], marker={'colors': ['#CDDC39', '#673AB7', '#F44336']})
fig = go.Figure(data = [trace])
fig.update_layout(title_text='Pie chart showing the distribution of the three categories')
iplot(fig)

### 1.2 Donut chart 

In [4]:
plot_donut = df.groupby('category').count().reset_index().rename(columns={'date': 'category_count'})

trace = go.Pie(labels=plot_donut['category'], values=plot_donut['category_count'], hole=.4, marker={'colors': ['#CDDC39', '#673AB7', '#F44336']})
fig = go.Figure(data = [trace])
fig.update_layout(title_text='Donut chart showing the distribution of the three categories')
iplot(fig)


## 2.  Bar chart

### 2.1 Simple Bar Chart

In [5]:
plot_bar = df.groupby('category').count().reset_index().rename(columns={'date': 'category_count'})

trace = go.Bar(x=plot_bar['category'], y=plot_bar['category_count'])
fig = go.Figure(data=trace)
fig.update_layout(title_text='Bar chart showing the distribution of the three categories')
iplot(fig)



### 2.2 Grouped Bar Chart

In [6]:
df_bar = df.groupby(['date','category']).size().reset_index().rename(columns={0:'category_count'})
plot_bar_group = df_bar.pivot_table(index='date', columns='category', values='category_count', fill_value=0).reset_index()

trace1 = go.Bar(x=plot_bar_group.date, y=plot_bar_group.CategoryA, name = 'CategoryA')
trace2 = go.Bar(x=plot_bar_group.date, y=plot_bar_group.CategoryB, name = 'CategoryB')
trace3 = go.Bar(x=plot_bar_group.date, y=plot_bar_group.CategoryC, name = 'CategoryC')

fig = go.Figure(data=[trace1, trace2, trace3])
fig.update_layout(barmode='group', title_text='Grouped Bar chart showing the distribution of the three categories over time')
iplot(fig)


### 2.3 Stacked Bar Chart  

In [7]:
df_bar = df.groupby(['date','category']).size().reset_index().rename(columns={0:'category_count'})
plot_bar_stack = df_bar.pivot_table(index='date', columns='category', values='category_count', fill_value=0).reset_index()

trace1 = go.Bar(x=plot_bar_stack.date, y=plot_bar_stack.CategoryA, name = 'CategoryA')
trace2 = go.Bar(x=plot_bar_stack.date, y=plot_bar_stack.CategoryB, name = 'CategoryB')
trace3 = go.Bar(x=plot_bar_stack.date, y=plot_bar_stack.CategoryC, name = 'CategoryC')

fig = go.Figure(data=[trace1, trace2, trace3])
fig.update_layout(barmode='stack', title_text='Stacked Bar chart showing the distribution of the three categories over time')
iplot(fig)

## 3. Line Chart 

### 3.1 Scatter Plot

In [8]:
plot_markers = df.groupby('date').count().reset_index().rename(columns={'category': 'category_count'})
trace = go.Scatter(x=plot_markers['date'], y=plot_markers['category_count'], mode='markers')
fig = go.Figure(data=[trace])
fig.update_layout(title_text='Scatter Plot showing the distribution of the three categories')
iplot(fig)

### 3.2 Line Plot 

In [9]:
plot_line = df.groupby('date').count().reset_index().rename(columns={'category': 'category_count'})
trace = go.Scatter(x=plot_line['date'], y=plot_line['category_count'], mode='lines')
fig = go.Figure(data=[trace])
fig.update_layout(title_text='Line chart showing the distribution of the three categories')
iplot(fig)

### 3.3 Line Plot with Markers

In [10]:
plot_linemarkers = df.groupby('date').count().reset_index().rename(columns={'category': 'category_count'})
trace = go.Scatter(x=plot_linemarkers['date'], y=plot_linemarkers['category_count'], mode='lines+markers')
fig = go.Figure(data=[trace])
fig.update_layout(title_text='Line chart (with markers) showing the distribution of the three categories')
iplot(fig)

### 3.4 Bubble Chart

In [11]:
plot_bubble = df.groupby('category').count().reset_index().rename(columns={'date': 'category_count'})
plot_bubble

Unnamed: 0,category,category_count
0,CategoryA,54
1,CategoryB,41
2,CategoryC,64


Now let's add the share of each category to these dataframe. The share column will be used to control the size of the bubble.

In [12]:
share = [50, 25, 100]
plot_bubble['share'] = share
plot_bubble

Unnamed: 0,category,category_count,share
0,CategoryA,54,50
1,CategoryB,41,25
2,CategoryC,64,100


In [13]:

trace = go.Scatter(x=plot_bubble['category'], y=plot_bubble['category_count'], mode='markers',
                   marker_size = plot_bubble['share'], marker_color = ['blue','red','yellow'])
fig = go.Figure(data=[trace])
fig.update_layout(title_text='Bubble chart showing the distribution of the three categories with their shares')
iplot(fig)

## 4. Table 

In [14]:
df_table = df.groupby(['date','category']).size().reset_index().rename(columns={0:'category_count'})
plot_table = df_table.pivot_table(index='date', columns='category', values='category_count', fill_value=0).reset_index()

trace = go.Table(header = dict(values = list(plot_table.columns)),
   cells = dict(values = [plot_table.date, plot_table.CategoryA, plot_table.CategoryB, plot_table.CategoryC]))

data = [trace]
fig = go.Figure(data = data)
fig.update_layout(title_text='Table showing the sales of each category per day')
iplot(fig)

## 5. Histogram and Distplots

### 5.1 Histogram 

Inorder to plot a histogram let us consider a different set of values. 

In [15]:

randomlist = []
for i in range(0,15):
    n = random.randint(1,30)
    randomlist.append(n)

data = [go.Histogram(x = randomlist)]
fig = go.Figure(data)
fig.update_layout(title_text='Histogram showing the distribution of the generated random numbers')
iplot(fig)

### 5.2 Displot

The distplot figure factory displays a combination of statistical representations of numerical data, such as histogram, kernel density estimation or normal curve, and rug plot.

In [16]:

x = np.random.randn(1000)
hist_data = [x]
group_labels = ['distplot']
fig = ff.create_distplot(hist_data, group_labels)
fig.update_layout(title_text='Distplot showing the distribution of the generated random numbers')

iplot(fig)

## 6. Box Plot 

In [17]:
df_box = df.groupby(['date','category']).size().reset_index().rename(columns={0:'category_count'})
plot_box = df_box.pivot_table(index='date', columns='category', values='category_count', fill_value=0).reset_index().drop(columns=['date'])
plot_box

category,CategoryA,CategoryB,CategoryC
0,3,2,1
1,3,2,5
2,2,1,2
3,2,2,3
4,1,1,2
5,2,7,3
6,4,3,2
7,6,5,5
8,4,2,4
9,4,4,7


Combine the CategoryA, CategoryB and CategoryC values to a list to plot the box plot

In [18]:

item_count_list = plot_box.values.tolist()
flat_list = [item for sublist in item_count_list for item in sublist]

trace = go.Box(y = flat_list)
data = [trace]
fig = go.Figure(data)
fig.update_layout(title_text='BoxPlot showing the distribution of unit sales of each category')
iplot(fig)

## 7. Heatmap 

In [19]:
df_heatmap = df.groupby(['date','category']).size().reset_index().rename(columns={0:'category_count'})
plot_heatmap = df_heatmap.pivot_table(index='date', columns='category', values='category_count', fill_value=0).reset_index()

categories = ['CategoryA', 'CategoryB', 'CategoryC'] 
date_field = plot_heatmap['date'].unique()
category_count = plot_heatmap[categories].values

trace = go.Heatmap(x = categories, y = date_field, z = category_count, type = 'heatmap', colorscale = 'Viridis')
data = [trace]
fig = go.Figure(data = data)
iplot(fig)

## 8. World Map

We use the 2014 World GDP of different countries

In [20]:
df_worldmap = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
df_worldmap.head()

Unnamed: 0,COUNTRY,GDP (BILLIONS),CODE
0,Afghanistan,21.71,AFG
1,Albania,13.4,ALB
2,Algeria,227.8,DZA
3,American Samoa,0.75,ASM
4,Andorra,4.8,AND


In [21]:

fig = go.Figure(data=go.Choropleth(
    locations = df_worldmap['CODE'],
    z = df_worldmap['GDP (BILLIONS)'],
    text = df_worldmap['COUNTRY'],
    colorscale = 'Blues',
    autocolorscale=False,
    reversescale=True,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_tickprefix = '$',
    colorbar_title = 'GDP<br>Billions US$',
))

fig.update_layout(
    title_text='2014 Global GDP',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.55,
        y=0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://www.cia.gov/library/publications/the-world-factbook/fields/2195.html">\
            CIA World Factbook</a>',
        showarrow = False
    )]
)

iplot(fig)