In [3]:
# Import our data processing library (note: you may have to install this!)
import pandas as pd
import altair as alt


# Let's use this to upload a sample dataset and show the start of the dataset
data= pd.read_csv("starbucks_drinkMenu_expanded.csv")



#clean
data = data[data['Caffeine (mg)'].str.contains('varies|Varies') == False]
data = data[data[' Total Fat (g)'].str.contains('3 2') == False]

# convert mg to g
data[['Caffeine (mg)']] = data[['Caffeine (mg)']].astype(int) / 1000
data = data.rename(columns={'Caffeine (mg)': 'Caffeine (g)'})

data.head()


Unnamed: 0,Beverage_category,Beverage,Beverage_prep,Calories,Total Fat (g),Trans Fat (g),Saturated Fat (g),Sodium (mg),Total Carbohydrates (g),Cholesterol (mg),Dietary Fibre (g),Sugars (g),Protein (g),Vitamin A (% DV),Vitamin C (% DV),Calcium (% DV),Iron (% DV),Caffeine (g)
0,Coffee,Brewed Coffee,Short,3,0.1,0.0,0.0,0,5,0,0,0,0.3,0%,0%,0%,0%,0.175
1,Coffee,Brewed Coffee,Tall,4,0.1,0.0,0.0,0,10,0,0,0,0.5,0%,0%,0%,0%,0.26
2,Coffee,Brewed Coffee,Grande,5,0.1,0.0,0.0,0,10,0,0,0,1.0,0%,0%,0%,0%,0.33
3,Coffee,Brewed Coffee,Venti,5,0.1,0.0,0.0,0,10,0,0,0,1.0,0%,0%,2%,0%,0.41
4,Classic Espresso Drinks,Caffè Latte,Short Nonfat Milk,70,0.1,0.1,0.0,5,75,10,0,9,6.0,10%,0%,20%,0%,0.075


In [6]:
print('sorted nutrition')

data_t = alt.Chart(data).transform_fold(
  [' Total Fat (g)', 'Trans Fat (g) ', ' Total Carbohydrates (g) ', 'Caffeine (g)' ],
  as_=['column', 'nutrition']
)

selection = alt.selection_multi(fields=['column'])
color = alt.condition(selection, alt.Color('column:N', scale=alt.Scale(scheme='set3') ), alt.value('lightgray'))
make_selector = data_t.mark_rect().encode(y='column:N', color=color).add_selection(selection)

tops = data_t.mark_bar().encode(
    y = 'mean(nutrition):Q',
    x = alt.X('Beverage:N', sort=alt.EncodingSortField(field='nutrition', op='mean') ),
    color=alt.Color('column:N', scale=alt.Scale(scheme='set3')),

).transform_filter(selection)

make_selector | tops

sorted nutrition


In [7]:
print('nutrition by category')

# Let's implement filtering using dynamic queries. 
selection_cat = alt.selection(type="multi", fields=['Beverage_category'])
selection_col = alt.selection_multi(fields=['column'])
make_selector_col = data_t.mark_rect().encode(y='column:N', color=color).add_selection(selection_col)

# Create a container for our two different views
base =  data_t.properties(width=700, height=250)

# Let's specify our overview chart
overview = data_t.mark_bar().encode(
    y = 'mean(nutrition):Q',
    x = 'Beverage_category:N',
    color=alt.Color('column:N')
).add_selection(selection_cat).properties(height=250, width=350)

# Create a detail chart
detail = base.mark_bar().encode(
    y = 'mean(nutrition):Q', 
    x = 'Beverage:N',
    color=alt.Color('column:N', scale=alt.Scale(scheme='set3'))
).transform_filter(selection_cat).properties(height=250, width=350)

overview | detail

nutrition by category


In [8]:
print('nutrition by Beverage_prep')

# Let's implement filtering using dynamic queries. 
selection_prep = alt.selection(type="multi", fields=['Beverage_prep'])

# Create a container for our two different views
base =  data_t.properties(width=700, height=250)

# Let's specify our overview chart
overview = data_t.mark_bar().encode(
    y = 'mean(nutrition):Q',
    x = 'Beverage_prep:N',
    color=alt.Color('column:N')
).add_selection(selection_prep).properties(height=250, width=350)

# Create a detail chart
detail = data_t.mark_bar().encode(
    y = 'mean(nutrition):Q', 
    x = 'Beverage:N',
    color=alt.Color('column:N', scale=alt.Scale(scheme='set3'))
).transform_filter(selection_prep).properties(height=250, width=350)

overview | detail

nutrition by Beverage_prep
