# get urls

In [1]:
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
from pathlib import Path
import pandas as pd

In [None]:
base_url_alton = "https://www.foodnetwork.com/profiles/talent/alton-brown/recipes" + "/recentlyaired-/p/{}", 54
base_url_show = "https://www.foodnetwork.com/shows/good-eats/recipes" + "/recentlyaired-/p/{}", 46

In [4]:
def template_to_urls(template, max):
    return [template.format(i) for i in range(1, max+1)]
    
def get_soup(url):
    response = requests.get(url)
    if not response.ok:
        raise ValueError("{} could not be retrieved.".format(url))
    return BeautifulSoup(response.text, "lxml")

def soup_to_reviews(soup):
    recipe_reviews = {
        "https:" + item.a.get("href") :
        (
            item.find(attrs={'class': "gig-rating-stars"}).get('title') if item.find(attrs={'class': "gig-rating-stars"}) else None, 
            item.find(attrs={'class': "gig-rating-ratingsum"}).text if item.find(attrs={'class': "gig-rating-ratingsum"}) else None,
        )
        for item in soup.find(attrs={'class': "l-List"}).find_all(attrs={'class': "m-MediaBlock__m-TextWrap"})
    }
    
    return recipe_reviews

def soup_to_recipes(soup):
    recipe_urls = [
        "https:" + item.a.get("href") 
        for item in soup.find(attrs={'class': "l-List"}).find_all(attrs={'class': "m-MediaBlock__m-TextWrap"})
    ]
    return recipe_urls

In [6]:
recipe_urls1 = {}
for url in tqdm(template_to_urls(*base_url_alton)):
    new_urls = soup_to_reviews(get_soup(url))
    if len(new_urls) != 15:
        print(len(new_urls), url)
    recipe_urls1.update(**new_urls)

print(len(recipe_urls1.keys()), '>', len(set(recipe_urls1.keys())))


  0%|                                                                                           | 0/54 [00:00<?, ?it/s]
  2%|█▌                                                                                 | 1/54 [00:00<00:16,  3.17it/s]
  4%|███                                                                                | 2/54 [00:00<00:16,  3.16it/s]
  6%|████▌                                                                              | 3/54 [00:01<00:27,  1.84it/s]
  7%|██████▏                                                                            | 4/54 [00:02<00:25,  1.96it/s]
  9%|███████▋                                                                           | 5/54 [00:02<00:21,  2.26it/s]
 11%|█████████▏                                                                         | 6/54 [00:02<00:19,  2.47it/s]
 13%|██████████▊                                                                        | 7/54 [00:03<00:17,  2.75it/s]
 15%|████████████▎                     

585 > 585


In [None]:
recipe_urls2 = {}
for url in tqdm(template_to_urls(*base_url_show)):
    new_urls = soup_to_reviews(get_soup(url))
    if len(new_urls) != 15:
        print(len(new_urls), url)
    recipe_urls2.update(**new_urls)

print(len(recipe_urls2.keys()), '>', len(set(recipe_urls2.keys())))


  0%|                                                                                           | 0/46 [00:00<?, ?it/s]
  2%|█▊                                                                                 | 1/46 [00:01<01:23,  1.86s/it]
  4%|███▌                                                                               | 2/46 [00:03<01:24,  1.92s/it]
  7%|█████▍                                                                             | 3/46 [00:05<01:12,  1.68s/it]
  9%|███████▏                                                                           | 4/46 [00:06<01:02,  1.50s/it]
 11%|█████████                                                                          | 5/46 [00:07<00:57,  1.40s/it]
 13%|██████████▊                                                                        | 6/46 [00:08<00:51,  1.29s/it]
 15%|████████████▋                                                                      | 7/46 [00:10<00:56,  1.44s/it]
 17%|██████████████▍                   

In [70]:
df_reviews = pd.DataFrame()
for url, v in recipe_urls1.items():
    name = url.split('/')[-1]
    reviews = int(v[1].split(' ')[0]) if v[1] else None
    stars = float(v[0].split(' ')[0]) if v[0] else None
    assert name not in df_reviews.index
    df_reviews = df_reviews.append(pd.Series(data={'url': url, 'stars': stars, 'reviews': reviews}, name=name))
for url, v in recipe_urls2.items():
    name = url.split('/')[-1]
    reviews = int(v[1].split(' ')[0]) if v[1] else None
    stars = float(v[0].split(' ')[0]) if v[0] else None
    if name in df_reviews.index:
        assert df_reviews.loc[name,'url'] == url
        if stars: assert df_reviews.loc[name,'stars'] == stars
        if reviews: assert df_reviews.loc[name,'reviews'] == reviews
        continue
    df_reviews = df_reviews.append(pd.Series(data={'url': url, 'stars': stars, 'reviews': reviews}, name=name))
df_reviews.sort_values(['stars', 'reviews'], ascending=False).head(10)

Unnamed: 0,reviews,stars,url
good-eats-roast-turkey-recipe-1950271,5419.0,5.0,https://www.foodnetwork.com/recipes/alton-brow...
the-chewy-recipe-1909046,1137.0,5.0,https://www.foodnetwork.com/recipes/alton-brow...
who-loves-ya-baby-back-recipe-1937448,1052.0,5.0,https://www.foodnetwork.com/recipes/alton-brow...
instant-pancake-mix-recipe-1938544,712.0,5.0,https://www.foodnetwork.com/recipes/alton-brow...
pan-seared-rib-eye-recipe-2131274,655.0,5.0,https://www.foodnetwork.com/recipes/alton-brow...
french-toast-recipe-1942216,639.0,5.0,https://www.foodnetwork.com/recipes/alton-brow...
homemade-soft-pretzels-recipe-1948242,626.0,5.0,https://www.foodnetwork.com/recipes/alton-brow...
southern-biscuits-recipe-2041990,616.0,5.0,https://www.foodnetwork.com/recipes/alton-brow...
hot-spinach-and-artichoke-dip-recipe-1912620,556.0,5.0,https://www.foodnetwork.com/recipes/alton-brow...
shepherds-pie-recipe2-1942900,530.0,5.0,https://www.foodnetwork.com/recipes/alton-brow...


In [11]:
sorted_recipe_urls = set(sorted_recipe_urls1).union(sorted_recipe_urls2)
print(len(sorted_recipe_urls))

In [11]:
Path('sorted_recipe_urls.txt').write_text('\n'.join(sorted_recipe_urls))
assert sorted_recipe_urls == set(Path('sorted_recipe_urls.txt').read_text().split('\n'))

True

In [78]:
a = sorted_recipe_urls
b = set(df_reviews.url.values)
len(a), len(b), len(a-b), len(b-a), len(a.union(b))

(675, 683, 99, 107, 782)

In [75]:
set(df_reviews.url.values) - sorted_recipe_urls

{'https://www.foodnetwork.com/recipes/alton-brown/acid-jellies-recipe-1941929',
 'https://www.foodnetwork.com/recipes/alton-brown/alton-browns-buffalo-wings-recipe-1972721',
 'https://www.foodnetwork.com/recipes/alton-brown/angel-food-cake-recipe-1938726',
 'https://www.foodnetwork.com/recipes/alton-brown/avocado-buttercream-frosting-recipe-1945000',
 'https://www.foodnetwork.com/recipes/alton-brown/baked-greens-chips-recipe-1961387',
 'https://www.foodnetwork.com/recipes/alton-brown/baked-macaroni-and-cheese-1-3644279',
 'https://www.foodnetwork.com/recipes/alton-brown/barbecue-pork-butt-recipe-2047110',
 'https://www.foodnetwork.com/recipes/alton-brown/barley-and-lamb-stew-recipe-1945927',
 'https://www.foodnetwork.com/recipes/alton-brown/basic-popovers-recipe-1973913',
 'https://www.foodnetwork.com/recipes/alton-brown/beef-paillard-recipe-1915465',
 'https://www.foodnetwork.com/recipes/alton-brown/best-ever-green-bean-casserole-recipe-1950575',
 'https://www.foodnetwork.com/recipes/

In [76]:
sorted_recipe_urls - set(df_reviews.url.values)

{'https://www.foodnetwork.com/recipes/alton-brown/abs-martini-recipe-1945331',
 'https://www.foodnetwork.com/recipes/alton-brown/all-american-beef-taco-recipe-2014469',
 'https://www.foodnetwork.com/recipes/alton-brown/alton-brown-ma-maes-congealed-christmas-salad-recipe-1965065',
 'https://www.foodnetwork.com/recipes/alton-brown/ants-in-trees-recipe-1923962',
 'https://www.foodnetwork.com/recipes/alton-brown/atomic-apples-recipe-1922083',
 'https://www.foodnetwork.com/recipes/alton-brown/backyard-baby-back-ribs-recipe-1947134',
 'https://www.foodnetwork.com/recipes/alton-brown/bacon-vinaigrette-with-grilled-radicchio-recipe-1939636',
 'https://www.foodnetwork.com/recipes/alton-brown/baked-macaroni-and-cheese-1939524',
 'https://www.foodnetwork.com/recipes/alton-brown/baklava-recipe-1943974',
 'https://www.foodnetwork.com/recipes/alton-brown/beer-bread-recipe-1949292',
 'https://www.foodnetwork.com/recipes/alton-brown/beet-slaw-recipe-1940942',
 'https://www.foodnetwork.com/recipes/alt

# get recipes

In [1]:
import requests
from tqdm import tqdm
from pathlib import Path

In [61]:
sorted_recipe_urls = set(Path('sorted_recipe_urls.txt').read_text().split('\n'))

In [3]:
def download_urls(urls):
    skipped = []
    error = []
    downloaded = []
    for url in tqdm(urls):
        filename = Path('/'.join(url.split('//')[1].split('/')[1:]) + '.html')
        if filename.exists() and filename.read_text(encoding='utf8').strip():
            skipped += [url]
            continue
        filename.parent.mkdir(parents=True, exist_ok=True)

        response = requests.get(url)
        if not response.ok:
            error += [url]
            continue
        text = response.text
        
        filename.write_text(text, encoding='utf8')
        downloaded += [url]
    print(f'skipped: {len(skipped)}  error: {len(error)}  downloaded: {len(downloaded)}  TOTAL: {len(skipped + error + downloaded)}')


download_urls(sorted_recipe_urls)

100%|████████████████████████████████████████████████████████████████████████████████| 675/675 [01:09<00:00,  9.77it/s]


skipped: 675  error: 0  downloaded: 0  TOTAL: 675


# process recipes

In [65]:
from bs4 import BeautifulSoup
from tqdm import tqdm
from pathlib import Path
import json

In [66]:
recipe_files = sorted(Path('recipes').glob('**/*.html'))
recipe_files[:5]

[WindowsPath('recipes/alton-brown/10-minute-apple-sauce-recipe-1950796.html'),
 WindowsPath('recipes/alton-brown/10-minute-apple-sauce-recipe2-1938289.html'),
 WindowsPath('recipes/alton-brown/4-pepper-deviled-eggs-recipe-1917393.html'),
 WindowsPath('recipes/alton-brown/40-cloves-and-a-chicken-recipe-1910661.html'),
 WindowsPath('recipes/alton-brown/abs-b-and-bs-recipe-1908804.html')]

In [67]:
def file_to_soup(filename):
    return BeautifulSoup(Path(filename).read_text(encoding='utf8'), 'lxml')


def soup_to_data(soup):
    ret = {}
    recipe = soup.find(attrs={'class': "o-Recipe"})
    if not recipe:
        return None
    # summary
    recipe_summary = recipe.find(attrs={'class': "m-RecipeSummary"})
    ret['Title'] = recipe_summary.find(attrs={'class': "o-AssetTitle__a-HeadlineText"}).text
    ret['Author'] = recipe_summary.find(attrs={'class': "o-Attribution__m-TextWrap"}).a.text
    ret['Rating_stars'] = recipe_summary.find(attrs={'class': "gig-rating-stars "})
    ret['Rating_stars'] = ret['Rating_stars'] and ret['Rating_stars'].get('title')
    ret['Rating_num'] = recipe_summary.find(attrs={'class': "gig-rating-ratingsum "})
    ret['Rating_num'] = ret['Rating_num'] and ret['Rating_num'].text
    recipe_info = recipe_summary.find(attrs={'class': "o-RecipeInfo"})
    for ul in recipe_info.find_all('ul'):
        for li in ul.find_all('li'):
            span1, *span2 = li.find_all('span')
            k = span1.text.strip(':').strip()
            v = '\n'.join(s.text.strip() for s in span2)
            assert k not in ret
            ret[k] = v
    # footer
    recipe_footer = recipe.find(attrs={'class': "recipe-body-footer"})
    recipe_sources = recipe_footer.find(attrs={'class': "o-VideoPromo"})
    if recipe_sources:
        for recipe_source in recipe_sources.find_all(attrs={'class': "m-MediaBlock__a-Source"}):
            span1, *span2 = recipe_source.find_all('span')
            k = span1.text.strip(':').strip()
            if k == "Episodes":
                k = k[:-1]
            v = '\n'.join(s.text.strip() for s in span2)
            assert k not in ret
            ret[k] = v
    recipe_tags = recipe_footer.find(attrs={'class': "o-Capsule__m-TagList m-TagList"})
    if recipe_tags:
        ret['Categories'] = ';'.join([tag.text for tag in recipe_tags.find_all('a')])
    # body / ingredients
    ingredients = recipe.find(attrs={'class': "o-Ingredients__m-Body"})
    if ingredients:
        ingredient_title = "Ingredients"
        section_count = 0
        ret[ingredient_title] = []
        for ingredient in ingredients.find_all(['p', 'h6']):
            if ingredient.name == 'p':
                ret[ingredient_title] += [ingredient.text]
            else:
                section = ingredient.text.strip().strip(':')
                section_count += 1
                ingredient_title = f"Ingredients.{section_count}.{section}"
                assert ingredient_title not in ret
                ret[ingredient_title] = []
    # body / method
    method = recipe.find(attrs={'class': "o-Method__m-Body"})
    ret['Directions'] = [li.text.strip() for li in method.find_all('li')]
    return ret

# soup = file_to_soup(recipe_files[22])
# soup_to_data(soup)

In [69]:
data = {}
for filename in tqdm(recipe_files):
    k = filename.name.split('.')[0].split('-')[-1]
    while k in data:
        k += '_'
    data[k] = soup_to_data(file_to_soup(filename))
    if data[k] is None:
        print(f"{k} couldn't parse as recipe")

 66%|████████████████████████████████████████████████████▊                           | 446/675 [01:57<00:58,  3.92it/s]

2047369 couldn't parse as recipe


100%|████████████████████████████████████████████████████████████████████████████████| 675/675 [02:54<00:00,  4.65it/s]


In [70]:
with Path('recipe_data.json').open(mode='w') as f:
    json.dump(data, f)
with Path('recipe_data.json').open() as f:
    assert data == json.load(f)

# data

In [5]:
import pandas as pd
from pathlib import Path
from tqdm import tqdm
import json

In [6]:
with Path('recipe_data.json').open() as f:
    data = json.load(f)

In [73]:
df = pd.DataFrame()
for name, d in tqdm(data.items(), leave=False):
    if d:
        d2 = d.copy()
        if "Ingredients" in d2:
            ing_len = 0
            for k in list(d2):
                if k.startswith("Ingredients"):
                    ing_len += len(d2.pop(k))
            d2["n_Ingredients"] = ing_len
        if "Directions" in d2:
            d2["n_Directions"] = len(d2.pop("Directions"))
        df = df.append(pd.Series(d2, name=name.split('-')[-1]))
    else:
        df = df.append(pd.Series(name=name.split('-')[-1]))
new_col_order = [k for k in d2.keys() if k in df.columns] + [c for c in df.columns if c not in (d2.keys())]
df = df[new_col_order]

df.index.name = "foodnetwork_id"
df['Author'] = df['Author'].str.replace("Recipe courtesy of ", "").astype('category')
df['Rating_stars'] = df['Rating_stars'].replace('pending rating', pd.np.NaN).astype(float)
df['Rating_num'] = df['Rating_num'].astype(float)  # Int
df['Level'] = df['Level'].astype('category')
df['Show'] = df['Show'].astype('category')
df['n_Ingredients'] = df['n_Ingredients'].astype(float)  # Int
df['n_Directions'] = df['n_Directions'].astype(float)  # Int
df['Nutrition Info'] = df['Nutrition Info'].str.strip().replace('', pd.np.NaN)
total = df.Total.str.split('\n', expand=True)
df['Total'] = total[0]
df['note_Total'] = total[1].dropna()
for c in 'Total Cook Inactive Prep Active'.split():
    df[f't_{c}'] = pd.to_timedelta(df.pop(c))
df = df.dropna(axis=1, how='all')

df.head()



Unnamed: 0_level_0,Title,Author,Level,Yield,Show,Episode,Categories,n_Ingredients,n_Directions,note_Total,t_Total,t_Cook,t_Inactive,t_Prep,t_Active
foodnetwork_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1950796,10 Minute Apple Sauce,Alton Brown,Easy,1 quart,Good Eats,Apple Family Values,Easy Side Dish Recipes;Easy;Side Dish;Sauce Re...,7.0,2.0,,NaT,NaT,NaT,NaT,NaT
1938289,10 Minute Apple Sauce,Alton Brown,,1 quart,Good Eats,Cable in the Classroom,Sauce Recipes;Apple;Fruit;Side Dish;Fall;Puree...,7.0,2.0,,NaT,NaT,NaT,NaT,NaT
1917393,4-Pepper Deviled Eggs,Alton Brown,Easy,12 deviled eggs,Good Eats,Major Pepper,American;Deviled Egg;Easter;Holiday;Egg Recipe...,10.0,2.0,,02:00:00,00:10:00,01:30:00,00:20:00,NaT
1910661,40 Cloves and a Chicken,Alton Brown,Intermediate,6 servings,Good Eats,In The Bulb of the Night (Garlic),Healthy;Chicken;Poultry;Main Dish;Low Sodium;L...,5.0,3.0,,01:40:00,01:30:00,NaT,00:10:00,NaT
1908804,Ab's B and B's,Alton Brown,,4 servings,Good Eats,American Pickle,Pickle Recipes;Celery;Vegetable;Cucumber;Glute...,10.0,4.0,,NaT,NaT,NaT,NaT,NaT


In [74]:
df.dtypes

Title                     object
Author                  category
Level                   category
Yield                     object
Show                    category
Episode                   object
Categories                object
n_Ingredients            float64
n_Directions             float64
note_Total                object
t_Total          timedelta64[ns]
t_Cook           timedelta64[ns]
t_Inactive       timedelta64[ns]
t_Prep           timedelta64[ns]
t_Active         timedelta64[ns]
dtype: object

In [75]:
df.to_pickle('recipe_df.pickle')
assert pd.read_pickle('recipe_df.pickle').equals(df)

# use DF

In [41]:
import pandas as pd
import pint
from pprint import pprint

In [42]:
df = pd.read_pickle('recipe_df.pickle')
df.head()

Unnamed: 0_level_0,Title,Author,Level,Yield,Show,Episode,Categories,n_Ingredients,n_Directions,note_Total,t_Total,t_Cook,t_Inactive,t_Prep,t_Active
foodnetwork_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1950796,10 Minute Apple Sauce,Alton Brown,Easy,1 quart,Good Eats,Apple Family Values,Easy Side Dish Recipes;Easy;Side Dish;Sauce Re...,7.0,2.0,,NaT,NaT,NaT,NaT,NaT
1938289,10 Minute Apple Sauce,Alton Brown,,1 quart,Good Eats,Cable in the Classroom,Sauce Recipes;Apple;Fruit;Side Dish;Fall;Puree...,7.0,2.0,,NaT,NaT,NaT,NaT,NaT
1917393,4-Pepper Deviled Eggs,Alton Brown,Easy,12 deviled eggs,Good Eats,Major Pepper,American;Deviled Egg;Easter;Holiday;Egg Recipe...,10.0,2.0,,02:00:00,00:10:00,01:30:00,00:20:00,NaT
1910661,40 Cloves and a Chicken,Alton Brown,Intermediate,6 servings,Good Eats,In The Bulb of the Night (Garlic),Healthy;Chicken;Poultry;Main Dish;Low Sodium;L...,5.0,3.0,,01:40:00,01:30:00,NaT,00:10:00,NaT
1908804,Ab's B and B's,Alton Brown,,4 servings,Good Eats,American Pickle,Pickle Recipes;Celery;Vegetable;Cucumber;Glute...,10.0,4.0,,NaT,NaT,NaT,NaT,NaT


In [3]:
df_categories = []
df_categories = pd.Series(sum((cat for cat in df.Categories.str.split(';',).dropna().values if cat), []))
df_categories.value_counts().head(10)

Gluten Free      271
Vegetable        209
American         200
Easy             182
Fruit            177
Dessert          163
Low Sodium       146
Main Dish        131
Dairy Recipes    118
Side Dish        117
dtype: int64

In [4]:
pd.Series(df_categories).value_counts().head(25).iloc[::-1].plot.barh(figsize=(6, 6))

<matplotlib.axes._subplots.AxesSubplot at 0x170b36d34a8>

In [72]:
df[df.Categories.str.contains("Crowd") == True][['Title', 'Episode', 'Yield', 't_Total', ]].head(10)

Unnamed: 0_level_0,Title,Episode,Yield,t_Total
foodnetwork_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1965065,Alton Brown Ma Mae's Congealed Christmas Salad,,10 portions,NaT
1950306,Ambrosia,Puff the Magic Mallow,6 servings,02:30:00
1939636,Bacon Vinaigrette with Grilled Radicchio,Scrap Iron Chef,8 servings,00:20:00
1943974,Baklava,Switched on Baklava,about 28 pieces,10:00:00
1944590,Blackberry Grunt,Cobbled Together,6 to 8 servings,01:30:00
1950476,Blueberry Soda,Kinda Blue (Blueberry),"about 3 cups blueberry syrup, enough for 12 dr...",01:12:00
1949033,Buttermilk Pound Cake,American Classics V: A Pound of Cake,10 to 12 servings,01:40:00
2013561,Butternut Dumplings with Brown Butter and Sage,Squash Court,6 to 8 servings,01:45:00
1951938,Chocolate Mousse,The Art of Darkness,6 to 8 servings,01:40:00
1955790,Chocolate Mousse,,6 to 8 servings,NaT


In [40]:
c_dict = {'g': ['ounce', 'oz', 'pound']}
def convert_unit(ingredient, convert_dict):
    s = ingredient
    ureg = pint.UnitRegistry()
    ureg.default_format = '.1f'
    try:
        for dst_unit, src_units in convert_dict.items():
            for unit in src_units:
                if unit in s:
                    pos = s.find(' ', s.find(unit))
                    before, after = s[:pos], s[pos:]
                    return str(ureg.Quantity(before).to(dst_unit)) + after
    except pint.DimensionalityError:
        return s
    return s

def convert_units(ingredients, convert_dict):
    return [
        convert_unit(ingredient, convert_dict)
        for ingredient in ingredients
    ]

for k in list(data.keys())[:10]:
    pprint(convert_units(data[k]['Ingredients'], c_dict))

['3 Golden Delicious apples, peeled, cored, and quartered',
 '3 Fuji apples, peeled, cored, and quartered',
 '1 cup unfiltered apple juice',
 '2 tablespoons cognac or brandy',
 '2 tablespoons butter',
 '3 tablespoons honey',
 '1/2 teaspoon ground cinnamon']
['3 Golden Delicious apples, peeled, cored, and quartered',
 '3 Fuji apples, peeled, cored, and quartered',
 '1 cup unfiltered apple juice',
 '2 tablespoons cognac or brandy',
 '2 tablespoons butter',
 '3 tablespoons honey',
 '1/2 teaspoon ground cinnamon']
['6 hard boiled eggs, cooled and peeled',
 '1 teaspoon whole pink peppercorns, divided',
 '1/2 teaspoon whole white peppercorns',
 '1/2 teaspoon whole black peppercorns',
 '1/2 teaspoon whole green peppercorns',
 '1/2 teaspoon caper liquid',
 '1/4 cup mayonnaise',
 '1 teaspoon Dijon mustard',
 '1/4 teaspoon kosher salt',
 'Pinch sugar']
['1 whole chicken (broiler/fryer) cut into 8 pieces',
 '1/2 cup plus 2 tablespoons olive oil',
 '10 sprigs fresh thyme',
 '40 peeled cloves garli

In [71]:
def print_recipe(recipe):
    for k, v in recipe.items():
        if v is None:
            continue
        if k == "Categories":
            v = v.split(';')
        elif k == "Ingredients":
            v = convert_units(v, c_dict)
        if isinstance(v, list):
            print()
            print(k)
            print("=" * len(k))
            pprint(v)
        else:
            print(f'{k+":":16} {v}')
print_recipe(data['1939636'])

Title:           Bacon Vinaigrette with Grilled Radicchio
Author:          Recipe courtesy of Alton Brown
Level:           Easy
Total:           20 min
Prep:            10 min
Cook:            10 min
Yield:           8 servings
Show:            Good Eats
Episode:         Scrap Iron Chef

Categories
['American',
 'Salad Dressing Recipes',
 'Bacon Recipes',
 'Meat',
 'Lettuce Recipes',
 'Vegetable',
 'Side Dish',
 'Appetizer',
 'Grilling',
 'Recipes for a Crowd']

Ingredients
['1/4 cup extra-virgin olive oil',
 '1/4 cup cider vinegar',
 '2 tablespoons bacon drippings',
 '1 tablespoon dark brown sugar',
 '1 tablespoon prepared mustard',
 '1 teaspoon salt',
 '1 teaspoon freshly ground pepper',
 '2 heads radicchio, quartered']

Directions
['In a small non- reactive bowl combine all ingredients except radicchio. '
 'Whisk until emulsion is formed and reserve at room temperature.',
 'Place quartered radicchio onto a hot grill or grill pan. Cook on each side '
 'for approximately 2 minutes per