In [41]:
import pandas as pd
import numpy as np
from collections import Counter
from wordcloud import WordCloud
import plotly.express as px
import plotly.graph_objects as go
import cufflinks as cf
import datapane as dp 
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline() # Use Plotly locally

pd.set_option('display.max_columns', 150) #Jupyter notebook displays 150 columns
pd.set_option('display.max_rows', 150) #Jupyter notebook displays 150 rows

In [42]:
op_train_df = pd.read_csv('order_products__train.csv') # (1,384,617, 4)
op_prior_df = pd.read_csv('order_products__prior.csv') # (32,434,489, 4)

orders_df = pd.read_csv('orders.csv') # (3,421,083, 7)
products_df = pd.read_csv('products.csv') # (49,688, 4)

aisles_df = pd.read_csv('aisles.csv') # (134, 2)
departments_df = pd.read_csv('departments.csv') # (21, 2)

In [43]:
frames = [op_prior_df, op_train_df]
big_df = pd.concat(frames)

bigger_df = pd.merge(left=big_df, right=products_df, how='inner')

biggest_df = pd.merge(left=bigger_df, right=aisles_df, on='aisle_id')

giant_df = pd.merge(left=biggest_df, right=departments_df,on='department_id')

monolith_df = pd.merge(left=giant_df, right=orders_df,how='inner',on='order_id')

In [45]:
monolith_df.head()

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,aisle,department,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order
0,2,33120,1,1,Organic Egg Whites,86,16,eggs,dairy eggs,202279,prior,3,5,9,8.0
1,2,28985,2,1,Michigan Organic Kale,83,4,fresh vegetables,produce,202279,prior,3,5,9,8.0
2,2,17794,6,1,Carrots,83,4,fresh vegetables,produce,202279,prior,3,5,9,8.0
3,2,43668,9,0,Classic Blend Cole Slaw,123,4,packaged vegetables fruits,produce,202279,prior,3,5,9,8.0
4,2,9327,3,0,Garlic Powder,104,13,spices seasonings,pantry,202279,prior,3,5,9,8.0


In [46]:
monolith_df['reordered'].sum()/monolith_df.shape[0] #59 % of products are reordered in a basket on average

0.5900617242809434

In [47]:
orderid_grp = monolith_df.groupby('order_id')['reordered'].aggregate('sum').reset_index()

In [49]:
filt = (orderid_grp['reordered']==0) #Filtering Orders that didn't have any reorders
zero_df = orderid_grp.loc[filt]

In [50]:
zero_df.shape[0]/orderid_grp.shape[0] # 12% of orders don't have any reorders

0.11868055873091014

In [52]:
dow_grp = monolith_df.groupby('order_dow')['reordered'].aggregate('mean').reset_index()

In [54]:
fig = go.Figure()

fig.add_trace(go.Bar(
    x= dow_grp['order_dow'].tolist(),
    y= dow_grp['reordered'].tolist(),
    marker_color='RoyalBlue'
             ))

fig.update_layout(title_text="<b>Day of Week vs Re-order Ratio</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.57,
                 title_y=0.90,
                 title_xanchor='center',
                 title_yanchor='top',
                 yaxis={'categoryorder':'total ascending'}
                 )

fig.update_xaxes(
        color='teal',
        title_text='Day of Week',
        title_font_family='Open Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        gridcolor='lightblue',
        tickmode='auto',
        linecolor='red',
        linewidth=3,
        mirror=True)

fig.update_yaxes(
        color='Teal',
        title_text='Re-order Ratio',
        title_font_family='Droid Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickfont_family='Arial',
        gridcolor='lightblue',
        linecolor='red',
        linewidth=3,
        mirror = True)

In [55]:
report = dp.Report(
  dp.Text('Day of Week vs Re-order Ratio'),
  dp.Plot(fig, caption="Interactive plot generated with `plotly`")
)

report.save("Day of Week vs Re-order Ratio.html", open=True)
report.publish(name='Day of Week vs Re-order Ratio', visibility=dp.Visibility.PUBLIC)

Publishing document and associated data - *please wait...*

Report successfully published at https://datapane.com/u/vijayv500/reports/day-of-week-vs-re-order-ratio/ - you can edit and add additional text online

In [56]:
hod_grp = monolith_df.groupby('order_hour_of_day')['reordered'].aggregate('mean').reset_index()

In [58]:
fig = go.Figure()

fig.add_trace(go.Bar(
    x= hod_grp['order_hour_of_day'].tolist(),
    y= hod_grp['reordered'].tolist(),
    marker_color='RoyalBlue'
             ))

fig.update_layout(title_text="<b>Hour of the Day vs Re-order Ratio</b>",
                 title_font_size=25,
                 title_font_color='green',
                 title_font_family='Titillium Web',
                 title_x=0.57,
                 title_y=0.90,
                 title_xanchor='center',
                 title_yanchor='top',
                 yaxis={'categoryorder':'total ascending'}
                 )

fig.update_xaxes(
        color='teal',
        title_text='Day of Week',
        title_font_family='Open Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        gridcolor='lightblue',
        tickmode='auto',
        nticks=48,
        linecolor='red',
        linewidth=3,
        mirror=True)

fig.update_yaxes(
        color='Teal',
        title_text='Re-order Ratio',
        title_font_family='Droid Sans',
        title_font_size=20,
        title_font_color='maroon',
        title_standoff = 15,
        tickfont_family='Arial',
        gridcolor='lightblue',
        linecolor='red',
        linewidth=3,
        mirror = True)

In [59]:
report = dp.Report(
  dp.Text('Hour of the Day vs Re-order Ratio'),
  dp.Plot(fig, caption="Interactive plot generated with `plotly`")
)

report.save("Hour of the Day vs Re-order Ratio.html", open=True)
report.publish(name='Hour of the Day vs Re-order Ratio', visibility=dp.Visibility.PUBLIC)

Publishing document and associated data - *please wait...*

Report successfully published at https://datapane.com/u/vijayv500/reports/hour-of-the-day-vs-re-order-ratio/ - you can edit and add additional text online