In [11]:
import pandas as pd
from requests_html import HTMLSession
from bs4 import BeautifulSoup
import json
import time

In [2]:
# Creating a requests html session and browsing top recipes page

session = HTMLSession()
s = session.get("https://www.epicurious.com/search/")

In [9]:
# Extracting hyperlinks from the page and filtering down irrelevant links
links = list(s.html.links)
for i in links:
    if not '/recipes/food/' in i:
        links.remove(i)

In [10]:
links

['/recipes/food/views/chili-crisp-bucatini',
 '/recipes/food/views/mango-pudding',
 '/recipes/food/views/ba-syn-cheesy-pork-tomatillo-skillet',
 '/recipes/food/views/graham-cracker-crust',
 '/recipes/food/views/tomato-and-egg-shakshuka',
 '/recipes/food/views/spicy-celery-tofu-and-glass-noodle-salad',
 '/recipes/food/views/monkey-bread-recipe',
 '/recipes/food/views/chess-pie',
 '/recipes/food/views/apple-pie-cookies-recipe',
 '/recipes/food/views/miso-oats-with-egg-and-avocado']

In [16]:
# Initializing empty lists for retrieved data

recipe_names = []
recipe_ingredients = []
recipe_times = []
recipe_ratings = []
recipe_instructions = []

# Looping through hyperlinks retrieved from the search page

for link in links:
    recipe = session.get(f"https://www.epicurious.com{link}")
    soup = BeautifulSoup(recipe.content, 'lxml')
    
    # Retrieving LD+JSON data

    data = [json.loads(x.string) for x in soup.find_all("script", type="application/ld+json")]

    # Retrieving recipe information
    recipe_name = data[0]['headline']
    recipe_ingredient = data[0]['recipeIngredient']
    # Some recipes don't have totalTime and recipe rating, handling errors
    try:
        recipe_time = data[0]['totalTime']
    except:
        print("No recipe time found.")
        recipe_time = 0
    try:
        recipe_rating = data[0]['aggregateRating']['ratingValue']
    except:
        print("No rating found.")
        recipe_rating = 0
    recipe_instruction = [data[0]['recipeInstructions'][i]['text'] for i in range(len(data[0]['recipeInstructions']))]
    
    # Appending data to recipe list
    recipe_names.append(recipe_name)
    recipe_ingredients.append(recipe_ingredient)
    recipe_times.append(recipe_time)
    recipe_ratings.append(recipe_rating)
    recipe_instructions.append(recipe_instruction)
    
    # Cooldown period
    
    time.sleep(5)

No rating found.
No recipe time found.
No rating found.
No rating found.
No recipe time found.
No rating found.
No recipe time found.
No rating found.
No recipe time found.
No rating found.


In [23]:
# Creating pandas dataframe from collected data

formatted_data = {
    'Recipe Name': recipe_names,
    'Recipe Ingredients': recipe_ingredients,
    'Recipe Time': recipe_times,
    'Recipe Rating': recipe_ratings,
    'Recipe Instructions': recipe_instructions
}
df = pd.DataFrame(formatted_data)

In [24]:
df

Unnamed: 0,Recipe Name,Recipe Ingredients,Recipe Time,Recipe Rating,Recipe Instructions
0,Chili Crisp Bucatini,"[4 ounces pancetta, cut into small cubes, 3 la...",30 minutes,0.0,[In a large skillet or sauté pan over medium-l...
1,Mango Pudding,[1 pound cubed mango (from about 3 ripe mangoe...,0,0.0,[Set aside one-quarter of the cubed mango and ...
2,Cheesy Pork and Tomatillo Skillet,"[1 lb. ground pork, 1 tsp. Diamond Crystal or ...",35 minutes,0.0,[Place 1 lb. ground pork in a medium bowl and ...
3,Graham Cracker Crust,[1½ cups (5 oz.) finely ground graham crackers...,10 minutes,4.0,[Combine 1½ cups (5 oz.) finely ground graham ...
4,Tomato and Egg “Shakshuka”,"[2 Tbsp. red chile flakes, 2 Tbsp. Sichuan pep...",0,0.0,"[In a heatproof bowl, add the chile flakes, Si..."
5,"Spicy Celery, Tofu, and Glass Noodle Salad","[2 Tbsp. red chile flakes, 2 Tbsp. Sichuan pep...",0,0.0,"[In a heatproof bowl, add the chile flakes, Si..."
6,Homemade Monkey Bread,"[1½ tsp. instant yeast, 1½ tsp. fine sea salt,...",5 hours,5.0,"[Pulse 1½ tsp. instant yeast, 1½ tsp. fine sea..."
7,Chess Pie,"[1 disk Our Favorite Pie Crust, chilled, 4 lar...",1 hour plus cooling,5.0,[Place rack in middle of oven and preheat oven...
8,Apple Pie Cookies,"[¼ cup granulated sugar, 1 tsp. Diamond Crysta...",2 hours 30 minutes,4.5,"[Pulse ¼ cup granulated sugar, 1 tsp. Diamond ..."
9,Miso Oats With Egg and Avocado,"[1 cup old-fashioned rolled oats, 4 cups veget...",0,0.0,"[Place the oats in a saucepan, add the stock a..."
