In [29]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
plt.style.use('ggplot')
import seaborn as sns
import plotly
import plotly.graph_objects as go
plotly.offline.init_notebook_mode(connected=True)

In [301]:
df = pd.read_csv('../Data Cleaning/KS_Cleaned.csv')

### Recipes by Author Group

In [302]:
df['author_group'].value_counts()

Community Member    442
KS Team              95
Contributor          67
Partner               2
Name: author_group, dtype: int64

In [303]:
df1 = pd.DataFrame(df['author_group'].value_counts().sort_values(ascending=False).reset_index())
df1 = df1.rename(columns={"index": "author_type", "author_group": "recipes"})

In [304]:
import plotly.graph_objects as go

colors = ['rgb(27, 103, 224)', 'rgb(237, 169, 21)', 'rgb(68, 128, 19)', 'rgb(129, 50, 168)']

fig = go.Figure(data = [go.Bar(
    y=df1['recipes'],
    x=df1['author_type'],
    width = [0.6]*len(df1['recipes']),
    text = df1['recipes'],
    textposition = 'auto',
    marker_color = colors)])

fig.update_layout(
    xaxis_title = 'Author Type',
    yaxis_title = 'Recipes Written',
    title={
        'text': "Types of Authors in Kitchen Stories",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    font = {'family': 'Arial',
           'size': 13,
           'color': 'rgb(31,33,36)'},
    plot_bgcolor = 'rgba(0,0,0,0)',
    paper_bgcolor = 'rgba(0,0,0,0)',
    bargap = 0.05
)

fig.show()

### Recipes with Calorie Information by Author Group

In [305]:
df2 = pd.DataFrame(df.groupby('author_group')['calories'].count()).reset_index()
df2.rename(columns={'author_group':'author_type'}, inplace = True)
df2 = pd.merge(df2, df1, on = 'author_type')
df2['without_calories'] = df2['recipes']-df2['calories']
df2.rename(columns={'calories':'with_calories'}, inplace = True)
df2['without_cal_p'] = df2['without_calories'] / df2['recipes']*100
df2['without_cal_p'] = df2.without_cal_p.apply(lambda x: round(x))
df2['with_cal_p'] = df2['with_calories'] / df2['recipes']*100
df2['with_cal_p'] = df2.with_cal_p.apply(lambda x: round(x))
df2

Unnamed: 0,author_type,with_calories,recipes,without_calories,without_cal_p,with_cal_p
0,Community Member,35,442,407,92,8
1,Contributor,67,67,0,0,100
2,KS Team,83,95,12,13,87
3,Partner,2,2,0,0,100


In [306]:
colors_with = ['rgb(27, 103, 224)', 'rgb(237, 169, 21)', 'rgb(68, 128, 19)', 'rgb(129, 50, 168)']
colors_wout = ['rgb(158, 190, 240)', 'rgb(252, 210, 119)', 'rgb(153, 189, 125)', 'rgb(202, 171, 217)']

fig = go.Figure(data = [
    go.Bar(name = 'With calories', x = df2['author_type'], y=df2['with_cal_p'], 
           marker_color = colors_with, text = df2['with_cal_p'], textposition = 'auto'),
    go.Bar(name = 'Without calories', x = df2['author_type'], y=df2['without_cal_p'], 
           marker_color = colors_wout, text = df2['without_cal_p'], textposition = 'auto')
    ])

fig.update_layout(
    barmode = 'stack',
    xaxis_title = 'Author Type',
    yaxis_title = 'Recipes Written (%)',
    title={
        'text': "Recipes with Calorie Information by Author",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    font = {'family': 'Arial',
           'size': 13,
           'color': 'rgb(31,33,36)'},
    plot_bgcolor = 'rgba(0,0,0,0)',
    paper_bgcolor = 'rgba(0,0,0,0)',
    bargap = 0.05,
    showlegend = False
)

fig.show()

### Rating vs Difficulty Level

In [307]:
#Number of recipes by difficulty
df.difficulty.value_counts()

Easy      469
Medium    130
Hard        7
Name: difficulty, dtype: int64

In [308]:
#Average rating for difficulty levels
levels = df.groupby('difficulty')['rating'].agg('mean').to_frame(name = 'ave_rating').reset_index()
levels = levels.reindex([0,2,1])
levels

Unnamed: 0,difficulty,ave_rating
0,Easy,2.634328
2,Medium,2.376923
1,Hard,3.357143


In [309]:
colors_level = ['rgb(68, 128, 19)', 'rgb(237, 169, 21)', 'rgb(129, 50, 168)']

fig = go.Figure(data = [go.Bar(
    y=levels['ave_rating'],
    x=levels['difficulty'],
    width = [0.6]*len(df1['recipes']),
    text = round(levels['ave_rating'],1),
    textposition = 'auto',
    marker_color = colors_level)])

fig.update_yaxes(range=[0,5])

fig.update_layout(
    xaxis_title = 'Author Type',
    yaxis_title = 'Recipes Written',
    title={
        'text': "Average Rating per Difficulty Level - Pretty Low?..",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    font = {'family': 'Arial',
           'size': 13,
           'color': 'rgb(31,33,36)'},
    plot_bgcolor = 'rgba(0,0,0,0)',
    paper_bgcolor = 'rgba(0,0,0,0)',
    bargap = 0.05
)

fig.show()

In [334]:
diff_levels = df.groupby('difficulty')['rating'].value_counts().to_frame(name = 'count').\
                reset_index().sort_values(['difficulty', 'rating'])
diff_levels = diff_levels.reindex([0,6,7,5,4,3,2,1,11,16,15,14,12,13,9,8,10])

diff_rating = df.groupby(['difficulty', 'rating'])['user_likes'].agg('mean').to_frame(name = 'ave_likes').\
                reset_index().sort_values(['difficulty', 'rating'])

diff_rating['ave_likes'] = round(diff_rating['ave_likes'])

diff_l_r = pd.merge(diff_levels, diff_rating)

diff_l_r

Unnamed: 0,difficulty,rating,count,ave_likes
0,Easy,0.0,200,36.0
1,Easy,2.0,1,3.0
2,Easy,2.5,1,12.0
3,Easy,3.0,2,324.0
4,Easy,3.5,7,2245.0
5,Easy,4.0,39,4891.0
6,Easy,4.5,101,11703.0
7,Easy,5.0,118,1360.0
8,Medium,0.0,62,35.0
9,Medium,3.0,1,10.0


In [353]:
import plotly.express as px

fig = px.scatter(x=diff_l_r['rating'], y=diff_l_r['count'], color = diff_l_r['difficulty'], 
                 size = diff_l_r['ave_likes'])

fig.update_layout(
    xaxis_title = 'Average Rating (out of 5)',
    yaxis_title = 'Count',
    title={
        'text': "How are dishes rated depending on their difficulty?",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    font = {'family': 'Arial',
           'size': 13,
           'color': 'rgb(31,33,36)'},
    plot_bgcolor = 'rgba(0,0,0,0)',
    paper_bgcolor = 'rgba(0,0,0,0)',
    legend_title = 'Difficulty Level'
)


fig.show()

In [356]:
print(diff_l_r[diff_l_r.ave_likes == diff_l_r.ave_likes.max()]) 

   difficulty  rating  count  ave_likes
15       Hard     4.5      3    21586.0


In [357]:
print(diff_l_r[diff_l_r.rating == diff_l_r.rating.max()]) 

   difficulty  rating  count  ave_likes
7        Easy     5.0    118     1360.0
13     Medium     5.0     26     1304.0
16       Hard     5.0      2       24.0
