In [None]:
import pandas as pd
import numpy as np
from collections import Counter
from wordcloud import WordCloud
import plotly.express as px
import plotly.graph_objects as go
import cufflinks as cf
import datapane as dp 
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline() # Use Plotly locally

pd.set_option('display.max_columns', 150) #Jupyter notebook displays 150 columns
pd.set_option('display.max_rows', 150) #Jupyter notebook displays 150 rows

In [None]:
orders_df = pd.read_csv('orders.csv') # shape: (3,421,083, 7)

In [None]:
orders_df.head()

In [None]:
dow_freq = orders_df['order_dow'].value_counts() #Counting orders by 'day of week'
dow_freq = dict(dow_freq)

dow = list(dow_freq.keys())
count = list(dow_freq.values())

fig = go.Figure()

fig.add_trace(go.Bar(
    x= dow,
    y= count,
    name='Order Count by Day of Week',
    marker_color='RoyalBlue',
    text=count)
             )

fig.update_traces(textposition='outside')

fig.update_layout(title_text="<b>Order Count by Day of Week</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.5,
                 title_y=0.90,
                 title_xanchor='center',
                 title_yanchor='top'
                 )

fig.update_xaxes(
        color='teal',
        title_text='Day of Week',
        title_font_family='Open Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickmode='auto',
        linecolor='red',
        linewidth=3,
        mirror=True)

fig.update_yaxes(
        color='Teal',
        title_text='Count',
        title_font_family='Droid Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickfont_family='Arial',
        linecolor='red',
        linewidth=3,
        mirror = True) 

In [None]:
#Uploading the bar chart on Datapane.com to make it interactive upon embedding it on Medium, Notion etc...

report = dp.Report(
  dp.Text('Order Count by Day of Week'),
  dp.Plot(fig, caption="Interactive plot generated with `plotly`")
)

report.save("Order Count by Day of Week.html", open=True)
report.publish(name='Order Count by Day of Week', visibility=dp.Visibility.PUBLIC)

In [None]:
hod_freq = orders_df['order_hour_of_day'].value_counts() #Counting orders by hour of the day
hod_freq = dict(hod_freq)

hod = list(hod_freq.keys())
count = list(hod_freq.values())

fig = go.Figure()

fig.add_trace(go.Bar(
    x= hod,
    y= count,
    name='Order Count by Hour of the Day',
    marker_color='RoyalBlue',
    text=count)
             )

fig.update_traces(textposition='outside')

fig.update_layout(title_text="<b>Order Count by Hour of the Day</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.5,
                 title_y=0.90,
                 title_xanchor='center',
                 title_yanchor='top'
                 )

fig.update_xaxes(
        color='teal',
        title_text='Hour of the Day',
        title_font_family='Open Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickmode='auto',
        nticks=48,
        linecolor='red',
        linewidth=3,
        mirror=True)

fig.update_yaxes(
        color='Teal',
        title_text='Count',
        title_font_family='Droid Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickfont_family='Arial',
        linecolor='red',
        linewidth=3,
        mirror = True) 

In [None]:
report = dp.Report(
  dp.Text('Order Count by Hour of the Day'),
  dp.Plot(fig, caption="Interactive plot generated with `plotly`")
)

report.save("Order Count by Hour of the Day.html", open=True)
report.publish(name='Order Count by Hour of the Day', visibility=dp.Visibility.PUBLIC)

In [None]:
order_group = orders_df.groupby('user_id')
on_count = order_group['order_number'].aggregate(np.max)
on_count = dict(on_count.value_counts())
max_orders = list(on_count.keys())
max_ordercount = list(on_count.values())

fig = go.Figure()

fig.add_trace(go.Bar(
    x= max_orders,
    y= max_ordercount,
    name='Maximum Order Count',
    marker_color='RoyalBlue',
    text=count)
             )

fig.update_layout(title_text="<b>Maximum Order Count per Customer</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.5,
                 title_y=0.90,
                 title_xanchor='center',
                 title_yanchor='top'
                 )

fig.update_xaxes(
        color='teal',
        title_text='Maximum Order Count',
        title_font_family='Open Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickmode='auto',
        linecolor='red',
        linewidth=3,
        mirror=True)

fig.update_yaxes(
        color='Teal',
        title_text='Frequency',
        title_font_family='Droid Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickfont_family='Arial',
        linecolor='red',
        linewidth=3,
        mirror = True) 

In [None]:
report = dp.Report(
  dp.Text('Maximum Order Count per Customer'),
  dp.Plot(fig, caption="Interactive plot generated with `plotly`")
)

report.save("Maximum Order Count per Customer.html", open=True)
report.publish(name='Maximum Order Count per Customer', visibility=dp.Visibility.PUBLIC)

In [None]:
fig = go.Figure()

fig.add_trace(go.Histogram(x=orders_df['days_since_prior_order'],nbinsx=100))

fig.update_layout(title_text="<b>Distribution of 'Days Since Prior Order'</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.5,
                 title_y=0.9,
                 title_xanchor='center',
                 title_yanchor='top'
                 )

fig.update_xaxes(
        color='teal',
        title_text='Days Since Prior Order',
        title_font_family='Open Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickmode='auto',
        nticks=60,
        linecolor='red',
        linewidth=3,
        mirror=True)

fig.update_yaxes(
        color='Teal',
        title_text='Count',
        title_font_family='Droid Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickfont_family='Arial',
        linecolor='red',
        linewidth=3,
        mirror = True) 
fig.show()

In [None]:
report = dp.Report(
  dp.Text("Distribution of 'Days Since Prior Order"),
  dp.Plot(fig, caption="Interactive plot generated with `plotly`")
)

report.save("Distribution of 'Days Since Prior Order'.html", open=True)
report.publish(name="Distribution of 'Days Since Prior Order'", visibility=dp.Visibility.PUBLIC)

In [None]:
orders_df['eval_set'].value_counts()

In [None]:
evalset_df = orders_df['eval_set'].value_counts().reset_index()

fig = px.pie(evalset_df, values='eval_set', names='index',title='Breakup of Orders Data: Prior/Train/Test',
            hole=.4)
fig.update_traces(textposition='outside', textinfo='percent+label')

fig.update_layout(title_text="<b>Breakup of Orders Data: Prior/Train/Test</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.5,
                 title_y=0.95,
                 title_xanchor='center',
                 title_yanchor='top'
                 )

In [None]:
report = dp.Report(
  dp.Text("Breakup of Orders Data"),
  dp.Plot(fig, caption="Interactive plot generated with `plotly`")
)

report.save("Breakup of Orders Data'.html", open=True)
report.publish(name="Breakup of Orders Data'", visibility=dp.Visibility.PUBLIC)

In [None]:
weekday_df = orders_df.groupby(['order_dow','order_hour_of_day'])['order_number'].aggregate('count').reset_index()
weekday_df.head()

In [None]:
weekday_df = weekday_df.pivot('order_hour_of_day', 'order_dow','order_number')
weekday_df.head()

In [None]:
fig = go.Figure(data=go.Heatmap(
        z=weekday_df.values.tolist(),
        x=weekday_df.columns.tolist(),
        y=weekday_df.index.tolist(),
        colorscale='Viridis'))

fig.update_layout(title_text="<b>Day of the Week vs Hour of the Day vs Order Count</b>",
                 title_font_size=25,
                 title_font_color='RoyalBlue',
                 title_font_family='Titillium Web',
                 title_x=0.5,
                 title_y=0.87,
                 title_xanchor='center',
                 title_yanchor='top')

fig.update_xaxes(
        color='RoyalBlue',
        title_text='Day of the Week',
        title_font_family='Open Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickmode='auto',
        linecolor='red',
        linewidth=3,
        mirror='allticks')

fig.update_yaxes(
        color='RoyalBlue',
        title_text='Hour of the Day',
        title_font_family='Droid Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickfont_family='Arial',
        nticks=48,
        linecolor='red',
        linewidth=3,
        mirror='allticks') 

In [None]:
report = dp.Report(
  dp.Text("Day of the Week vs Hour of the Day vs Order Count"),
  dp.Plot(fig, caption="Interactive plot generated with `plotly`")
)

report.save("Day of the Week vs Hour of the Day vs Order Count.html", open=True)
report.publish(name="Day of the Week vs Hour of the Day vs Order Count", visibility=dp.Visibility.PUBLIC)