In [1]:
import pandas as pd
import requests
from fake_useragent import UserAgent
from bs4 import BeautifulSoup

In [2]:
ua = UserAgent()
random_agent = ua.random
headers = {'User-Agent': random_agent}

In [3]:
url = 'https://www.allrecipes.com/recipes-a-z-6735880'

In [4]:
r = requests.get(url=url, headers=headers)
soup = BeautifulSoup(r.content, 'html.parser')

In [5]:
food_category = soup.find_all('ul', class_='loc mntl-link-list')

food_category_urls = []

for ul in food_category:
    for a in ul.find_all('a'):
        url = a.get('href')
        food_category_urls.append(url)

In [6]:
print(f'Number of categories from a to z: {len(food_category_urls)}')

Number of categories from a to z: 378


In [7]:
food_recipe_urls = []

for food_category_url in food_category_urls:
    r = requests.get(url=food_category_url, headers=headers)
    soup = BeautifulSoup(r.content, 'html.parser')
    
    recipes = soup.find_all('div', id=lambda x: x and x.startswith('tax-sc__recirc-list_'))

    for recipe in recipes:
        links = recipe.find_all('a', href=True)
        recipe_urls = [link['href'] for link in links]
        food_recipe_urls.extend(recipe_urls)

In [8]:
print(f'Number of food recipes: {len(food_recipe_urls)}')

Number of food recipes: 18096


In [9]:
data = {'food': [], 'ingredients': [], 'steps': [], 'images': []}

for food_recipe_url in food_recipe_urls:
    r = requests.get(url=food_recipe_url, headers=headers)
    soup = BeautifulSoup(r.content, 'html.parser')
    
    # Extract food name
    food = soup.find('h1', class_="article-heading type--lion")
    food_name = ''
    if food:
        food_name = food.text.strip()
        data['food'].append(food_name)
    else:
        print(f"Food name not valid in {food_recipe_url}")
        break

    # Extract ingredients
    ingredient_list = soup.find('ul', class_='mntl-structured-ingredients__list')
    ingredients = []
    if ingredient_list:
        for item in ingredient_list.find_all('li', class_='mntl-structured-ingredients__list-item'):
            # Extract quantity, unit, and ingredient name from <p> tag within <li>
            quantity_span = item.find('span', attrs={'data-ingredient-quantity': 'true'})
            unit_span = item.find('span', attrs={'data-ingredient-unit': 'true'})
            name_span = item.find('span', attrs={'data-ingredient-name': 'true'})
    
            # Extract text content from each span (if exists)
            quantity = quantity_span.text.strip() if quantity_span else None
            unit = unit_span.text.strip() if unit_span else None
            name = name_span.text.strip() if name_span else None
    
            # Print or use the extracted information
            if quantity and unit and name:
                ingredients.append(f"{quantity} {unit} {name}")
            elif quantity and name:
                ingredients.append(f"{quantity} {name}")
            else:
                ingredients.append(f"{name}")
    data['ingredients'].append('\n'.join(ingredients))

    # Extract steps/instructions
    steps_content = soup.find('div', class_='comp recipe__steps mntl-recipe-steps mntl-block')
    i=1
    steps = []
    if steps_content:
        steps_list = steps_content.find_all('li', class_='mntl-sc-block-group--LI')
        
        for step in steps_list:
            step_text = step.find('p', class_='mntl-sc-block-html')
            if step_text:
                step_text = step_text.get_text(strip=True)
    
            if step_text:
                steps.append(f"Step {i}: {step_text}")
                i+=1
    data['steps'].append('\n'.join(steps))

    # Extract iamges
    food_images = soup.find('div', class_='comp photo-dialog__page')
    images_urls = []
    if food_images:
        images = food_images.find_all('img', class_='universal-image__image')
        for img in images:
            if 'data-src' in img.attrs:
                image_url = img['data-src']
                images_urls.append(image_url)
    else:
        print(f"Food images not valid in {food_recipe_url}")
    data['images'].append('\n'.join(images_urls))

Food images not valid in https://www.allrecipes.com/article/i-tried-a-tiktok-trick-for-cleaning-my-air-fryer/
Food images not valid in https://www.allrecipes.com/article/air-fry-meat-fish-frozen/
Food images not valid in https://www.allrecipes.com/article/how-to-convert-recipes-for-an-air-fryer/
Food images not valid in https://www.allrecipes.com/article/how-healthy-is-air-fryer-cooking/
Food images not valid in https://www.allrecipes.com/article/2-ingredient-cake-makes-cheap-and-easy-dessert/
Food images not valid in https://www.allrecipes.com/article/fratelli-beretta-possible-source-salmonella-outbreak-italian-meats/
Food images not valid in https://www.allrecipes.com/most-saved-appetizers-of-all-time-8625395
Food images not valid in https://www.allrecipes.com/article/difference-between-applesauce-and-apple-butter/
Food images not valid in https://www.allrecipes.com/article/bialys-vs-bagels/
Food images not valid in https://www.allrecipes.com/article/we-tried-two-ingredient-dough-thr

Food images not valid in https://www.allrecipes.com/gallery/teriyaki-salmon-recipes/
Food images not valid in https://www.allrecipes.com/gallery/win-your-chili-cook-off-championship-chili-recipes/
Food images not valid in https://www.allrecipes.com/copycat-panda-express-chow-mein-recipe-7556785
Food images not valid in https://www.allrecipes.com/gallery/taiwanese-recipes/
Food images not valid in https://www.allrecipes.com/article/easiest-chocolate-cake-from-scratch/
Food images not valid in https://www.allrecipes.com/article/the-easiest-most-impressive-chocolate-cake/
Food images not valid in https://www.allrecipes.com/article/chocolate-chip-cookies-20-ways/
Food images not valid in https://www.allrecipes.com/recipe/259551/the-best-sugar-free-fudge/
Food images not valid in https://www.allrecipes.com/gallery/one-bowl-desserts/
Food images not valid in https://www.allrecipes.com/article/how-to-make-chowder/
Food images not valid in https://www.allrecipes.com/article/cuisine-of-the-nort

Food images not valid in https://www.allrecipes.com/recipe/266313/fresh-peach-empanadas/
Food images not valid in https://www.allrecipes.com/gallery/energy-bite-recipes/
Food images not valid in https://www.allrecipes.com/article/how-make-whole-wheat-english-muffins/
Food images not valid in https://www.allrecipes.com/gallery/top-chicken-fajitas/
Food images not valid in https://www.allrecipes.com/gallery/best-falafel-recipes/
Food images not valid in https://www.allrecipes.com/article/retiree-food-budget-planning/
Food images not valid in https://www.allrecipes.com/article/perfect-flan/
Food images not valid in https://www.allrecipes.com/gallery/mexican-flan-recipes/
Food images not valid in https://www.allrecipes.com/article/difference-between-skirt-hanger-flank-flat-iron-steak/
Food images not valid in https://www.allrecipes.com/article/how-to-decorate-focaccia-bread-like-work-of-art/
Food images not valid in https://www.allrecipes.com/gallery/grilled-bread-recipes/
Food images not 

Food images not valid in https://www.allrecipes.com/gallery/recipes-with-orange-marmalade/
Food images not valid in https://www.allrecipes.com/gallery/vintage-jello-dessert-recipes/
Food images not valid in https://www.allrecipes.com/article/layered-jello-yogurt-dessert-cups/
Food images not valid in https://www.allrecipes.com/article/vintage-grandmother-jell-o-recipes/
Food images not valid in https://www.allrecipes.com/recipe/18208/congeal-salad/
Food images not valid in https://www.allrecipes.com/article/how-to-make-jell-o-shots/
Food images not valid in https://www.allrecipes.com/recipe/275057/sex-on-the-beach-jell-o-shots/
Food images not valid in https://www.allrecipes.com/article/we-tried-boozy-rainbow-jello-salad/
Food images not valid in https://www.allrecipes.com/article/chef-johns-korean-bbq-short-ribs-and-teriyaki-marinade/
Food images not valid in https://www.allrecipes.com/article/keto-dinner-ideas/
Food images not valid in https://www.allrecipes.com/gallery/keto-desserts

Food images not valid in https://www.allrecipes.com/gallery/seven-mediterranean-diet-menus/
Food images not valid in https://www.allrecipes.com/gallery/best-slow-cooker-recipes-for-the-mediterranean-diet/
Food images not valid in https://www.allrecipes.com/gallery/budget-friendly-mediterranean-diet-recipes/
Food images not valid in https://www.allrecipes.com/what-is-birria-8598668
Food images not valid in https://www.allrecipes.com/how-to-make-the-best-conchas-7371151
Food images not valid in https://www.allrecipes.com/article/what-is-mincemeat-pie/
Food images not valid in https://www.allrecipes.com/gallery/best-monkey-bread-recipes/
Food images not valid in https://www.allrecipes.com/gallery/pull-apart-breads-to-share/
Food images not valid in https://www.allrecipes.com/article/how-to-make-monkey-bread/
Food images not valid in https://www.allrecipes.com/gallery/chocolate-mousse-cake-recipes/
Food images not valid in https://www.allrecipes.com/top-10-muffin-recipes-7370124
Food image

Food images not valid in https://www.allrecipes.com/article/how-to-grill-baby-back-ribs/
Food images not valid in https://www.allrecipes.com/article/cook-pork-shoulder-roast/
Food images not valid in https://www.allrecipes.com/gallery/the-worlds-best-pork-shoulder-recipes/
Food images not valid in https://www.allrecipes.com/recipe/276222/easy-pot-pie-with-biscuits/
Food images not valid in https://www.allrecipes.com/article/top-tips-home-cooks-how-make-best-pot-roast/
Food images not valid in https://www.allrecipes.com/gallery/slow-cooker-pot-roast-recipes/
Food images not valid in https://www.allrecipes.com/gallery/eye-of-round-steak-recipes/
Food images not valid in https://www.allrecipes.com/gallery/best-pot-roast-recipes/
Food images not valid in https://www.allrecipes.com/article/best-potatoes-for-potato-salad/
Food images not valid in https://www.allrecipes.com/best-potato-salad-recipe-review-7852966
Food images not valid in https://www.allrecipes.com/article/perfect-potato-salad

Food images not valid in https://www.allrecipes.com/gallery/easy-scones-recipes/
Food images not valid in https://www.allrecipes.com/recipe/268815/scrumpets/
Food images not valid in https://www.allrecipes.com/gallery/cheese-scone-recipes/
Food images not valid in https://www.allrecipes.com/gallery/buttermilk-scone-recipes/
Food images not valid in https://www.allrecipes.com/gallery/apple-scone-recipes/
Food images not valid in https://www.allrecipes.com/article/how-to-make-savory-pumpkin-scones/
Food images not valid in https://www.allrecipes.com/article/how-to-cook-brisket/
Food images not valid in https://www.allrecipes.com/article/what-is-a-matzo-ball/
Food images not valid in https://www.allrecipes.com/gallery/passover-seder-menu/
Food images not valid in https://www.allrecipes.com/gallery/passover-kugel-recipes/
Food images not valid in https://www.allrecipes.com/gallery/best-shepherds-pie-recipes/
Food images not valid in https://www.allrecipes.com/article/how-to-make-shepherds-

ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))

In [10]:
df = pd.DataFrame(data)
df

Unnamed: 0,food,ingredients,steps,images
0,4 Ingredient Air Fryer Pepper Poppers,"1 bell pepper, any color\n8 ounces cream chees...",Step 1: Preheat an air fryer to 390 degrees F ...,https://www.allrecipes.com/thmb/DqLx4iEB4tGbps...
1,Air Fryer Bell Pepper Poppers,9 mini pepper mini bell peppers\n5.2 ounce Gou...,Step 1: Preheat the air fryer to 400 degrees F...,https://www.allrecipes.com/thmb/Gc0cSAo6L83fPT...
2,Air Fryer Cinnamon Roll Bites,1 (12.4 ounce) cinnamon rolls\n1/4 cup white s...,Step 1: Preheat an air fryer to 325 degrees F ...,https://www.allrecipes.com/thmb/oqVIA1oHBilFH4...
3,Air Fryer Ham and Cheese Wraps,4 (7-inch flour tortillas\n1/4 cup spicy musta...,Step 1: Place tortillas on a clean work surfac...,https://www.allrecipes.com/thmb/oXVObTvi9ZjGQ0...
4,Air Fryer Buffalo Cauliflower,½ cup Buffalo wing sauce (such as Frank's)\n1 ...,"Step 1: Combine hot sauce, butter, and honey i...",https://imagesvc.meredithcorp.io/v3/mm/image?u...
...,...,...,...,...
15056,Banh Mi Sliders,½ pound ground beef\n½ pound ground pork\n½ cu...,"Step 1: Gently mix beef, pork, oats, bread, eg...",https://www.allrecipes.com/thmb/vaJHsbFn7wQdlf...
15057,Buffalo Tempeh Sliders,"3 (8 ounce) packages tempeh, cut into 2-inch s...",Step 1: Place tempeh in a large bowl and cover...,https://imagesvc.meredithcorp.io/v3/mm/image?u...
15058,Bar Chicks - Blackened Chicken Sliders,2 cups white wine\n2 tablespoons soy sauce\n1 ...,"Step 1: Mix white wine, soy sauce, 1 tablespoo...",https://www.allrecipes.com/thmb/tur4V3rPsxUkAl...
15059,Asian Turkey Sliders,cooking spray\n1 pound lean ground turkey\n1 ½...,Step 1: Preheat oven to 375 degrees F (190 deg...,https://imagesvc.meredithcorp.io/v3/mm/image?u...


In [11]:
total_count_sum = df['images'].str.split('\n').apply(lambda x: len(x)).sum()
print("\nSum of counts:", total_count_sum)


Sum of counts: 93087


In [12]:
df[df['images'] == ''].shape[0]

574

In [13]:
df = df.drop_duplicates(subset=['images'],  keep=False)
df

Unnamed: 0,food,ingredients,steps,images
5,Air Fryer Honey-Mustard Chicken Thighs,4 boneless skinless chicken thighs\n1/2 teaspo...,Step 1: Preheat the air fryer to 390 degrees F...,https://www.allrecipes.com/thmb/-CYgGC7jTLUw5_...
6,Air Fryer Hearts of Palm Sticks,1/4 cup all-purpose flour\n1/4 teaspoon salt\n...,"Step 1: Stir flour, salt, and pepper together ...",https://www.allrecipes.com/thmb/Kef01dPFOlBSmG...
9,Air Fryer Spanakopita,"2 (10- ounces) pkg. spinach, thawed and squeez...","Step 1: For filling, stir together spinach, fe...",https://www.allrecipes.com/thmb/FuRqOs7lAsE79g...
10,Air Fryer Pecan Crusted Trout,2/3 cup chopped pecans\n1 teaspoon dried rosem...,Step 1: Preheat air fryer to 400 degrees F (20...,https://www.allrecipes.com/thmb/yGwefCpl2pUVNp...
11,Air Fryer Blooming Onion,1 cup milk\n1 large egg\n1 cup flour\n3/4 teas...,Step 1: Preheat air fryer to 375 degrees F (19...,https://www.allrecipes.com/thmb/9hzWCm81o4EbdU...
...,...,...,...,...
15056,Banh Mi Sliders,½ pound ground beef\n½ pound ground pork\n½ cu...,"Step 1: Gently mix beef, pork, oats, bread, eg...",https://www.allrecipes.com/thmb/vaJHsbFn7wQdlf...
15057,Buffalo Tempeh Sliders,"3 (8 ounce) packages tempeh, cut into 2-inch s...",Step 1: Place tempeh in a large bowl and cover...,https://imagesvc.meredithcorp.io/v3/mm/image?u...
15058,Bar Chicks - Blackened Chicken Sliders,2 cups white wine\n2 tablespoons soy sauce\n1 ...,"Step 1: Mix white wine, soy sauce, 1 tablespoo...",https://www.allrecipes.com/thmb/tur4V3rPsxUkAl...
15059,Asian Turkey Sliders,cooking spray\n1 pound lean ground turkey\n1 ½...,Step 1: Preheat oven to 375 degrees F (190 deg...,https://imagesvc.meredithcorp.io/v3/mm/image?u...


In [14]:
df = df.drop_duplicates(subset='food', keep=False).dropna()
df

Unnamed: 0,food,ingredients,steps,images
5,Air Fryer Honey-Mustard Chicken Thighs,4 boneless skinless chicken thighs\n1/2 teaspo...,Step 1: Preheat the air fryer to 390 degrees F...,https://www.allrecipes.com/thmb/-CYgGC7jTLUw5_...
6,Air Fryer Hearts of Palm Sticks,1/4 cup all-purpose flour\n1/4 teaspoon salt\n...,"Step 1: Stir flour, salt, and pepper together ...",https://www.allrecipes.com/thmb/Kef01dPFOlBSmG...
9,Air Fryer Spanakopita,"2 (10- ounces) pkg. spinach, thawed and squeez...","Step 1: For filling, stir together spinach, fe...",https://www.allrecipes.com/thmb/FuRqOs7lAsE79g...
10,Air Fryer Pecan Crusted Trout,2/3 cup chopped pecans\n1 teaspoon dried rosem...,Step 1: Preheat air fryer to 400 degrees F (20...,https://www.allrecipes.com/thmb/yGwefCpl2pUVNp...
11,Air Fryer Blooming Onion,1 cup milk\n1 large egg\n1 cup flour\n3/4 teas...,Step 1: Preheat air fryer to 375 degrees F (19...,https://www.allrecipes.com/thmb/9hzWCm81o4EbdU...
...,...,...,...,...
15056,Banh Mi Sliders,½ pound ground beef\n½ pound ground pork\n½ cu...,"Step 1: Gently mix beef, pork, oats, bread, eg...",https://www.allrecipes.com/thmb/vaJHsbFn7wQdlf...
15057,Buffalo Tempeh Sliders,"3 (8 ounce) packages tempeh, cut into 2-inch s...",Step 1: Place tempeh in a large bowl and cover...,https://imagesvc.meredithcorp.io/v3/mm/image?u...
15058,Bar Chicks - Blackened Chicken Sliders,2 cups white wine\n2 tablespoons soy sauce\n1 ...,"Step 1: Mix white wine, soy sauce, 1 tablespoo...",https://www.allrecipes.com/thmb/tur4V3rPsxUkAl...
15059,Asian Turkey Sliders,cooking spray\n1 pound lean ground turkey\n1 ½...,Step 1: Preheat oven to 375 degrees F (190 deg...,https://imagesvc.meredithcorp.io/v3/mm/image?u...


In [15]:
# df.to_excel('./data/allrecipes_data.xlsx', index=False) 
df = pd.read_excel('./data/allrecipes_data.xlsx')

In [35]:
# # if more than one image is desired for each recipe
# df['images'] = df['images'].str.split('\n')
# df = df.explode('images')
# df

In [24]:
output_folder = "./data/images/allrecipes_images"

In [25]:
# Create an output directory if it doesn't exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

for index, row in df.iterrows():
    url = row['images'].split('\n')[0]
    try:
        response = requests.get(url, stream=True)
        if response.status_code == 200:
            image_content = response.content
            image_name = f'image_{index}.jpg'

            with open(os.path.join(output_folder, image_name), 'wb') as image_file:
                image_file.write(image_content)
            print(f'Successfully downloaded {image_name}')
        else:
            print(f'Failed to download image from {url}')
    except:
        print(f'An error occurred while downloading image from {url}')