In [None]:
import pandas as pd
import numpy as np
import csv
import zipfile

from matplotlib import pyplot as plt
%matplotlib inline
filepath = "../resources/food.csv.zip"
zf = zipfile.ZipFile(filepath)
food = pd.read_csv(zf.open('food.csv'), parse_dates=True, dtype=object, delimiter="\t", quoting=csv.QUOTE_NONE, encoding='utf-8')
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows',10)
food

In [None]:
## Andi ##

# get a list of all candy tags and combinations of candy tags, get all foods who's tags are in that list.
# bad approach because it's slow and if something contains candy tags + non-candy tags (e.g. 'desserts, water')
# it falls through the cracks. Sadly I can't figure out how to do
# "for candy_tag in candy_tags: get items where candy_tag in food['categories_tags']"

from itertools import combinations

candy_tags = ["en:sugary-snacks", "en:desserts", "en:candies", "en:chocolates", "en:pies", "en:puddings", 
 "en:biscuits-and-cakes", "en:confectioneries", "en:chocolate-chip-cookies", "en:chocolate-block", 
 "en:jelly-crystals", "en:carrot-cake", "en:chocolate-covered-muesli-bar"]
augmented_tags = candy_tags

for i in range(2, 4):
    augmented_tags += [",".join(map(str, comb)) for comb in combinations(candy_tags, i)]

#augmented_tags
tagged_candies = food[food['categories_tags'].isin(augmented_tags)]

#tagged_candies = [item for item in food if not set(augmented_tags).isdisjoint(item['categories_tags'])]
                           
tagged_candies

In [None]:
food_not_tagged_as_candy = food[~food['categories_tags'].isin(augmented_tags)]

food_not_tagged_as_candy

In [None]:
candies_around_50_carbs = tagged_candies[tagged_candies['carbohydrates_100g'].astype(float) > 47]
candies_around_50_carbs = candies_around_50_carbs[candies_around_50_carbs['carbohydrates_100g'].astype(float) < 57]

candies_around_50_carbs

#candies_below = tagged_candies[tagged_candies['carbohydrates_100g'].astype(float) < 20]
#candies_below
#candies_above = tagged_candies[tagged_candies['carbohydrates_100g'].astype(float) > 65]
#candies_above

In [None]:
candies_around_50_sugar = tagged_candies[tagged_candies['sugars_100g'].astype(float) > 45]
candies_around_50_sugar = candies_around_50_carbs[candies_around_50_carbs['sugars_100g'].astype(float) < 55]

candies_around_50_sugar

#candies_below = tagged_candies[tagged_candies['sugars_100g'].astype(float) < 30]
#candies_below
candies_above = tagged_candies[tagged_candies['sugars_100g'].astype(float) > 65]
candies_above

In [None]:
candies_around_x_fats = tagged_candies[tagged_candies['fat_100g'].astype(float) < 5]
candies_around_x_fats = candies_around_x_fats[candies_around_x_fats['fat_100g'].astype(float) < 5]

#candies_around_x_fats = candies_around_x_fats[candies_around_x_fats['carbohydrates_100g'].astype(float) > 30]


candies_around_x_fats

In [None]:
candies_around_x_sugar = tagged_candies[tagged_candies['sugars_100g'].astype(float) > 22]
candies_around_x_sugar = candies_around_x_sugar[candies_around_x_sugar['sugars_100g'].astype(float) < 30]

candies_around_x_sugar = candies_around_x_sugar[candies_around_x_sugar['carbohydrates_100g'].astype(float) > 30]


candies_around_x_sugar

In [None]:
candies_around_x_carbs = tagged_candies[tagged_candies['carbohydrates_100g'].astype(float) > 22]
candies_around_x_carbs = candies_around_x_carbs[candies_around_x_carbs['carbohydrates_100g'].astype(float) < 30]

candies_around_x_carbs

In [None]:
import plotly.plotly as py
import plotly.graph_objs as go
py.sign_in('Odin123', '5Ba7VLNrZsqtUXUatHiZ')

candy_sugar = tagged_candies["sugars_100g"].astype(float).mean()
others_sugar = food_not_tagged_as_candy["sugars_100g"].astype(float).mean()

candy_fat = tagged_candies["fat_100g"].astype(float).mean()
others_fat = food_not_tagged_as_candy["fat_100g"].astype(float).mean()

candy_energy = tagged_candies["energy_100g"].astype(float).mean() / 1000
others_energy = food_not_tagged_as_candy["energy_100g"].astype(float).mean() / 1000

candy_carbs = tagged_candies["carbohydrates_100g"].astype(float).mean()
others_carbs = food_not_tagged_as_candy["carbohydrates_100g"].astype(float).mean()

candy_salt = tagged_candies["salt_100g"].astype(float).mean()
others_salt = food_not_tagged_as_candy["salt_100g"].astype(float).mean()

candy_nutrition_score = tagged_candies["nutrition-score-fr_100g"].astype(float).mean()
others_nutrition_score = food_not_tagged_as_candy["nutrition-score-fr_100g"].astype(float).mean()

x_names = ["sugar", "fat", "energy (MJ)", "carbohydrates", "salt", "nutrition score"]
y_values_candy = [candy_sugar, candy_fat, candy_energy, candy_carbs, candy_salt, candy_nutrition_score]
y_values_others = [others_sugar, others_fat, others_energy, others_carbs, others_salt, others_nutrition_score]

trace1 = go.Bar(
    x=x_names,
    y=y_values_candy,
    name='Candy'
)
trace2 = go.Bar(
    x=x_names,
    y=y_values_others,
    name='Other Foods'
)

data = [trace1, trace2]
layout = go.Layout(
    #xaxis=dict(tickangle=-180),
    barmode='group',
    height=700,
    margin=go.Margin(
        l=50,
        r=50,
        b=150,
        t=100,
        pad=4
    ),
)

fig = go.Figure(data=data, layout=layout)
# comment out figure-creation to save API access volume
#py.iplot(fig, filename='candy_others_comparison_bars', bbox_inches='tight')

In [None]:
trace1 = go.Histogram(
    x=tagged_candies["salt_100g"].astype(float),
)

layout = go.Layout(
    title='Salt (in g) in Candy',
    xaxis=dict(
        title='salt in 100g (steps of 5)'
    ),
    yaxis=dict(
        title='Number of candy in interval'
    ),
    bargap=0.01
)
data = [trace1]

fig = go.Figure(data=data, layout=layout)
#py.iplot(fig, filename='candy_salt_histo')

In [None]:
trace1 = go.Histogram(
    x=tagged_candies["sugars_100g"].astype(float),
)

layout = go.Layout(
    title='Sugar (in g) in Candy',
    xaxis=dict(
        title='sugar in 100g (steps of 5)'
    ),
    yaxis=dict(
        title='Number of candy in interval'
    ),
    bargap=0.01
)
data = [trace1]

fig = go.Figure(data=data, layout=layout)
#py.iplot(fig, filename='candy_sugar_histo')

In [None]:
trace1 = go.Histogram(
    x=tagged_candies["fat_100g"].astype(float),
)

layout = go.Layout(
    title='Fat (in g) in Candy',
    xaxis=dict(
        title='fat in 100g'
    ),
    yaxis=dict(
        title='Number of candy in interval'
    ),
    bargap=0.01
)
data = [trace1]

fig = go.Figure(data=data, layout=layout)
#py.iplot(fig, filename='candy_fat_histo')

In [None]:
trace1 = go.Histogram(
    x=tagged_candies["carbohydrates_100g"].astype(float),
)

layout = go.Layout(
    title='Carbohydrates (in g) in Candy',
    xaxis=dict(
        title='carbohydrates in 100g (steps of 5)'
    ),
    yaxis=dict(
        title='Number of candy in interval'
    ),
    bargap=0.01
)
data = [trace1]

fig = go.Figure(data=data, layout=layout)
#py.iplot(fig, filename='candy_carbs_histo')

In [None]:
trace1 = go.Histogram(
    x=food_not_tagged_as_candy["carbohydrates_100g"].astype(float),
)

layout = go.Layout(
    title='Carbohydrates in Others',
    xaxis=dict(
        title='carbohydrates in 100g'
    ),
    yaxis=dict(
        title='Number of items in interval'
    ),
    bargap=0.01
)
data = [trace1]

fig = go.Figure(data=data, layout=layout)
#py.iplot(fig, filename='others_carbs_histo')