In [1]:
import numpy as np
import pandas as pd
import plotly
import plotly.graph_objects as go
plotly.offline.init_notebook_mode(connected=True)

In [2]:
df = pd.read_csv('../Data Cleaning/KS_Cleaned.csv')

### Recipes by Author Group

In [3]:
df['author_group'].value_counts()

Community Member    442
KS Team              95
Contributor          67
Partner               2
Name: author_group, dtype: int64

In [4]:
author_df = pd.DataFrame(df['author_group'].value_counts().sort_values(ascending=False).reset_index())
author_df = author_df.rename(columns={"index": "author_type", "author_group": "recipes"})

In [5]:
import plotly.graph_objects as go

colors = ['rgb(27, 103, 224)', 'rgb(237, 169, 21)', 'rgb(68, 128, 19)', 'rgb(129, 50, 168)']

fig = go.Figure(data = [go.Bar(
    y=author_df['recipes'],
    x=author_df['author_type'],
    width = [0.6]*len(author_df['recipes']),
    text = author_df['recipes'],
    textposition = 'auto',
    marker_color = colors)])

fig.update_layout(
    xaxis_title = 'Author Type',
    yaxis_title = 'Recipes Written',
    title={
        'text': "Types of Authors in Kitchen Stories",
        #'y':0.9,
        #'x':0.5,
        #'xanchor': 'center',
        'yanchor': 'top'},
    font = {'family': 'Arial',
           'size': 18,
           'color': 'rgb(31,33,36)'},
    plot_bgcolor = 'rgba(0,0,0,0)',
    paper_bgcolor = 'rgba(0,0,0,0)',
    bargap = 0.05
)

fig.show()

### Recipes with Calorie Information by Author Group

In [6]:
author_df

Unnamed: 0,author_type,recipes
0,Community Member,442
1,KS Team,95
2,Contributor,67
3,Partner,2


In [7]:
df2 = pd.DataFrame(df.groupby('author_group')['calories'].count()).reset_index()
df2.rename(columns={'author_group':'author_type'}, inplace = True)
df2 = pd.merge(df2, author_df, on = 'author_type')
df2['without_calories'] = df2['recipes']-df2['calories']
df2.rename(columns={'calories':'with_calories'}, inplace = True)
df2['without_cal_p'] = df2['without_calories'] / df2['recipes']*100
df2['without_cal_p'] = df2.without_cal_p.apply(lambda x: round(x))
df2['with_cal_p'] = df2['with_calories'] / df2['recipes']*100
df2['with_cal_p'] = df2.with_cal_p.apply(lambda x: round(x))
df2

Unnamed: 0,author_type,with_calories,recipes,without_calories,without_cal_p,with_cal_p
0,Community Member,35,442,407,92,8
1,Contributor,67,67,0,0,100
2,KS Team,83,95,12,13,87
3,Partner,2,2,0,0,100


In [8]:
colors_with = ['rgb(27, 103, 224)', 'rgb(237, 169, 21)', 'rgb(68, 128, 19)', 'rgb(129, 50, 168)']
colors_wout = ['rgb(158, 190, 240)', 'rgb(252, 210, 119)', 'rgb(153, 189, 125)', 'rgb(202, 171, 217)']

fig = go.Figure(data = [
    go.Bar(name = 'With calories', x = df2['author_type'], y=df2['with_cal_p'], 
           marker_color = colors_with, text = df2['with_cal_p'], textposition = 'auto'),
    go.Bar(name = 'Without calories', x = df2['author_type'], y=df2['without_cal_p'], 
           marker_color = colors_wout, text = df2['without_cal_p'], textposition = 'auto')
    ])

fig.update_layout(
    barmode = 'stack',
    xaxis_title = 'Author Type',
    yaxis_title = 'Recipes Written (%)',
    title={
        'text': "Recipes with Calorie Information by Author",
        #'y':0.9,
        #'x':0.5,
        #'xanchor': 'center',
        'yanchor': 'top'},
    font = {'family': 'Arial',
           'size': 18,
           'color': 'rgb(31,33,36)'},
    plot_bgcolor = 'rgba(0,0,0,0)',
    paper_bgcolor = 'rgba(0,0,0,0)',
    bargap = 0.05,
    showlegend = False
)

fig.show()

### Rating vs Difficulty Level

In [9]:
#Number of recipes by difficulty
df.difficulty.value_counts()

Easy      469
Medium    130
Hard        7
Name: difficulty, dtype: int64

In [10]:
#Average rating for difficulty levels
levels = df.groupby('difficulty')['rating'].agg('mean').to_frame(name = 'ave_rating').reset_index()
levels = levels.reindex([0,2,1])
levels

Unnamed: 0,difficulty,ave_rating
0,Easy,2.634328
2,Medium,2.376923
1,Hard,3.357143


In [12]:
colors_level = ['rgb(68, 128, 19)', 'rgb(237, 169, 21)', 'rgb(129, 50, 168)']

fig = go.Figure(data = [go.Bar(
    y=levels['ave_rating'],
    x=levels['difficulty'],
    width = [0.6]*len(author_df['recipes']),
    text = round(levels['ave_rating'],1),
    textposition = 'auto',
    marker_color = colors_level)])

fig.update_yaxes(range=[0,5])

fig.update_layout(
    xaxis_title = 'Author Type',
    yaxis_title = 'Recipes Written',
    title={
        'text': "Average Rating per Difficulty Level - Pretty Low?..",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    font = {'family': 'Arial',
           'size': 13,
           'color': 'rgb(31,33,36)'},
    plot_bgcolor = 'rgba(0,0,0,0)',
    paper_bgcolor = 'rgba(0,0,0,0)',
    bargap = 0.05
)

fig.show()

In [13]:
diff_levels = df.groupby('difficulty')['rating'].value_counts().to_frame(name = 'count').\
                reset_index().sort_values(['difficulty', 'rating'])
diff_levels = diff_levels.reindex([0,6,7,5,4,3,2,1,11,16,15,14,12,13,9,8,10])

diff_rating = df.groupby(['difficulty', 'rating'])['user_likes'].agg('mean').to_frame(name = 'ave_likes').\
                reset_index().sort_values(['difficulty', 'rating'])

diff_rating['ave_likes'] = round(diff_rating['ave_likes'])

diff_l_r = pd.merge(diff_levels, diff_rating)

diff_l_r

Unnamed: 0,difficulty,rating,count,ave_likes
0,Easy,0.0,200,36.0
1,Easy,2.0,1,3.0
2,Easy,2.5,1,12.0
3,Easy,3.0,2,324.0
4,Easy,3.5,7,2245.0
5,Easy,4.0,39,4891.0
6,Easy,4.5,101,11703.0
7,Easy,5.0,118,1360.0
8,Medium,0.0,62,35.0
9,Medium,3.0,1,10.0


In [14]:
#This plot does not have number of likes associated with size!

import plotly.express as px

fig = px.scatter(x=diff_l_r['rating'], y=diff_l_r['count'], color = diff_l_r['difficulty'], 
                 size = [1.5]*len(diff_l_r['difficulty']))

fig.update_layout(
    xaxis_title = 'Average Rating (out of 5)',
    yaxis_title = 'Count',
    title={
        #'text': "How are dishes rated depending on their difficulty?",
        #'y':0.9,
        #'x':0.5,
        #'xanchor': 'center',
        'yanchor': 'top'},
    font = {'family': 'Arial',
           'size': 18,
           'color': 'rgb(31,33,36)'},
    plot_bgcolor = 'rgba(0,0,0,0)',
    paper_bgcolor = 'rgba(0,0,0,0)',
    #legend_title = 'Difficulty Level',
    showlegend = False
)


fig.show()

In [15]:
import plotly.express as px

fig = px.scatter(x=diff_l_r['rating'], y=diff_l_r['count'], color = diff_l_r['difficulty'], 
                 size = diff_l_r['ave_likes'])

fig.update_layout(
    xaxis_title = 'Average Rating (out of 5)',
    yaxis_title = 'Count',
    title={
        #'text': "How are dishes rated depending on their difficulty?",
        #'y':0.9,
        #'x':0.5,
        #'xanchor': 'center',
        'yanchor': 'top'},
    font = {'family': 'Arial',
           'size': 18,
           'color': 'rgb(31,33,36)'},
    plot_bgcolor = 'rgba(0,0,0,0)',
    paper_bgcolor = 'rgba(0,0,0,0)',
    legend_title = 'Difficulty Level'
)


fig.show()

In [16]:
print(diff_l_r[diff_l_r.ave_likes == diff_l_r.ave_likes.max()]) 

   difficulty  rating  count  ave_likes
15       Hard     4.5      3    21586.0


In [17]:
print(diff_l_r[diff_l_r.rating == diff_l_r.rating.max()]) 

   difficulty  rating  count  ave_likes
7        Easy     5.0    118     1360.0
13     Medium     5.0     26     1304.0
16       Hard     5.0      2       24.0


### Steps vs User Likes

In [18]:
print(df[df.user_likes == df.user_likes.max()]) 

     author                 author_type  bake_time  calories   carb carb_u  \
110  Verena  Founder at Kitchen Stories          0     779.0  347.0      g   

    difficulty dish_description      dish_name   fat  ... protein_u  rating  \
110       Easy              NaN  One-pot pasta  38.0  ...         g     4.5   

    rest_time reviews_for_rating servings  total_steps  user_likes  \
110         0              752.0        2          4.0      139881   

                                              utensils  author_group  \
110  ['cutting board', 'knife', 'cooking spoon', 'd...       KS Team   

     total_time  
110          25  

[1 rows x 27 columns]


In [19]:
color = ['rgb(237, 169, 21)']*len(df['total_steps'])

fig = go.Figure(data = [go.Bar(
    y=df['user_likes'],
    x=df['total_steps'],
    marker_color = color
)])

fig.update_layout(
    xaxis_title = 'Steps Required',
    yaxis_title = 'User Likes',
    title={
        #'text': "Most Liked Step Count",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    font = {'family': 'Arial',
           'size': 18,
           'color': 'rgb(31,33,36)'},
    plot_bgcolor = 'rgba(0,0,0,0)',
    paper_bgcolor = 'rgba(0,0,0,0)',
    bargap = 0.05
)

fig.show()

In [20]:
fig = px.histogram(df, x="total_steps", y="user_likes", color_discrete_sequence = ['rgb(237, 169, 21)'],
                   marginal="violin")#, # or violin, rug)



fig.update_layout(
    xaxis_title = 'Steps Required',
    yaxis_title = 'User Likes',
    title={
        'text': "Most Liked Step Count",
        #'y':0.9,
        #'x':0.5,
        #'xanchor': 'center',
        'yanchor': 'top'},
    font = {'family': 'Arial',
           'size': 18,
           'color': 'rgb(31,33,36)'},
    plot_bgcolor = 'rgba(0,0,0,0)',
    paper_bgcolor = 'rgba(0,0,0,0)',
    bargap = 0.05
)

fig.show()

### Top 10 Ingredients

In [21]:
ingredient_df = pd.read_csv('../Data Cleaning/KS_Ingredients_Cleaned.csv')

In [22]:
ingredient_df.ingredient.value_counts()[16:50]

thyme                    49
eggs                     49
parmesan cheese          49
milk                     46
chicken breasts          46
basil                    45
soy sauce                42
carrots                  40
oil                      40
red onion                38
garlic powder            37
maes                     37
cherry maes              35
ground cumin             35
honey                    35
egg                      35
carrot                   34
potaes                   34
black pepper             33
chili flakes             32
white wine               31
heavy cream              31
rosemary                 31
bay leaves               30
vegetable broth          30
ma paste                 30
bacon                    29
chili powder             29
rice                     29
smoked paprika powder    28
red bell pepper          27
sesame oil               27
celery                   26
ma sauce                 26
Name: ingredient, dtype: int64

In [23]:
ingredient_count = ingredient_df.ingredient.value_counts()[0:15].to_frame(name = 'count').reset_index().\
                    rename(columns = {'index':'ingredient'}).sort_values('count', ascending = False)
ingredient_count

Unnamed: 0,ingredient,count
0,salt,433
1,pepper,292
2,garlic,260
3,olive oil,249
4,onion,156
5,butter,124
6,water,105
7,parsley,85
8,flour,78
9,sugar,77


In [24]:
color = ['rgb(237, 169, 21)']*len(ingredient_count['ingredient'])

fig = go.Figure(data=[go.Bar(
    y=ingredient_count['ingredient'],
    x=ingredient_count['count'],
    orientation = 'h',
    marker_color = color
)])

fig.update_layout(
    xaxis_title = 'Number of Recipes Where Required',
    yaxis_title = 'Ingredient',
    title={
        'text': "Make Sure You Have These Ingredients!",
        #'y':0.9,
        #'x':0.5,
        #'xanchor': 'center',
        'yanchor': 'top'},
    font = {'family': 'Arial',
           'size': 18,
           'color': 'rgb(31,33,36)'},
    plot_bgcolor = 'rgba(0,0,0,0)',
    paper_bgcolor = 'rgba(0,0,0,0)',
)

fig.show()