In [None]:
import pandas as pd
import requests
import json
from tqdm import tqdm
import time
from bs4 import BeautifulSoup
import asyncio
import aiohttp
from tqdm.asyncio import tqdm_asyncio
import nest_asyncio
import os



In [2]:
ingredients = [
    "rating", "almond", "apple", "apricot", "artichoke", "arugula", "asian pear", "asparagus", "avocado",
    "bacon", "banana", "barley", "basil", "beef", "beet", "bell pepper",
    "berry", "blackberry", "blue cheese", "blueberry", "bok choy", "broccoli", "broccoli rabe", "brussel sprout",
    "butter", "buttermilk", "butternut squash", "cabbage", "capers", "carrot", "cashew",
    "cauliflower", "celery", "cheese", "cherry", "chestnut", "chickpea", "chile", "chile pepper", "chili",
    "chive", "chocolate", "cilantro", "cinnamon", "clove", "coconut", "cod", "collard greens",
    "corn", "cornmeal", "cottage cheese", "crab", "cranberry", "cream cheese", "cucumber", "cumin", "currant", "curry",
    "date", "dill", "egg", "eggplant", "endive", "feta", "fig", "fish", "fontina",
    "garlic", "ginger", "goat cheese", "gouda", "grape", "grapefruit", "green bean",
    "green onion/scallion", "ground beef", "ground lamb", "guava", "hazelnut", "honey", "honeydew", "horseradish",
    "hot pepper", "jalapeño", "jam or jelly", "kale", "lamb",
    "lentil", "lettuce", "lima bean", "lime", "lobster",
    "macadamia nut", "mango", "maple syrup", "melon", "mint", "molasses", "monterey jack", "mozzarella", "mushroom",
    "mussel", "mustard", "mustard greens", "nutmeg", "oat", "okra", "olive", "onion", "orange",
    "oregano", "orzo", "oyster", "parmesan", "parsley", "parsnip", "pea", "peach", "peanut", "peanut butter",
    "pear", "pecan", "pepper", "persimmon", "pine nut", "pineapple", "pistachio", "plantain", "plum", "pomegranate",
    "pork", "potato", "prune", "pumpkin", "quince", "quinoa", "radicchio", "radish", "raisin", "raspberry",
    "rhubarb", "rice", "ricotta", "root vegetable", "rosemary", "rye",
    "sage", "salmon", "sardine", "sausage", "scallop", "sesame",
    "sesame oil", "shallot", "shellfish", "shrimp", "snapper", "soy", "spinach", "squash", "squid", "strawberry",
    "sugar snap pea", "sweet potato/yam", "tilapia", "tofu", "tomatillo", "tomato", "tree nut", "turnip",
    "vanilla", "veal", "vinegar", "wasabi", "watercress", "watermelon", "wild rice", "yogurt", "yuca",
    "zucchini", "marshmallow"
]

In [3]:
ingredients=pd.Series(ingredients)

In [7]:
ingredients.to_csv("data/ingredients.csv", index=False, header=False)

In [33]:
api_key = "8nQnIXeIWHj5iIe1tvb10mf275UZa5Hic1KLVAvV"
nutrition_url = "https://api.nal.usda.gov/fdc/v1/foods/search"

In [128]:
ingredients=ingredients[1:]

In [116]:
DV=pd.read_csv("data/DailyValuesNutrient.csv")


## 🍎 Сбор данных о пищевой ценности ингредиентов через **USDA FoodData Central API**

🔹 Используем **USDA FoodData Central API** для запроса по каждому ингредиенту из списка.

🔹 Для каждого ингредиента:

- Получаем **его питательные вещества (`foodNutrients`)** через API.
- Находим **`nutrientId`** нутриентов и сопоставляем с таблицей **дневных норм потребления (DV)**.
- Рассчитываем **% от дневной нормы:**

$$
\%DV = \frac{\text{nutrient value}}{\text{daily value}} \times 100
$$

- Сохраняем данные в **CSV**

In [130]:
nutrition_data = []

for ingr in tqdm(ingredients):
    if "/" in ingr:
        ingr_req = ingr.split("/")
    else:
        ingr_req = [ingr]

    for ingredient in ingr_req:
        response = requests.get(nutrition_url, params={
            "query": ingredient,
            "api_key": api_key,
            "pageSize": 1
        })
        if response.status_code == 200:
            nutrition_info = response.json()
            if nutrition_info['totalHits'] == 0:
                continue
            nutrients = nutrition_info["foods"][0]["foodNutrients"]
            nutrient_dict = {"ingredient": ingredient}
            for nutrient in nutrients:
                for name,dailyValue,id in DV.itertuples(index=False):
                    if nutrient['nutrientId'] == id:
                        nutrient_dict[name] = round((nutrient['value']/dailyValue)*100)
                        break
            nutrition_data.append(nutrient_dict)

  0%|          | 0/180 [00:00<?, ?it/s]

100%|██████████| 180/180 [03:47<00:00,  1.27s/it]


In [138]:
nutrition_df = pd.DataFrame(nutrition_data)
nutrition_df.to_csv("data/ingredient_nutrition_dv.csv", index=False)

In [3]:
recipes=pd.read_csv('data/epi_r.csv', usecols=[0])
recipes

Unnamed: 0,title
0,"Lentil, Apple, and Turkey Wrap"
1,Boudin Blanc Terrine with Red Onion Confit
2,Potato and Fennel Soup Hodge
3,Mahi-Mahi in Tomato Olive Sauce
4,Spinach Noodle Casserole
...,...
20047,Parmesan Puffs
20048,Artichoke and Parmesan Risotto
20049,Turkey Cream Puff Pie
20050,Snapper on Angel Hair with Citrus Cream


In [4]:
recipes=recipes['title']

In [22]:
headers = {
    "User-Agent": "Mozilla/5.0"
}

recipes_data = []

In [19]:
for query in tqdm(recipes[0:1]):
    search_url = f"https://www.epicurious.com/search?q={query.replace(' ', '+')}"
    response = requests.get(search_url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to fetch: {query}")
        continue

    soup = BeautifulSoup(response.text, 'html.parser')

    # Найти первую ссылку на рецепт
    first_result = soup.find('div', class_='ClampContent-hilPkr fvKowN')
    if not first_result:
        continue

    a_tag = first_result.find('a', href=True)
    if not a_tag:
        continue

    url = "https://www.epicurious.com" + a_tag['href']
    title = a_tag.get_text(strip=True)

    # Переход на страницу рецепта
    recipe_resp = requests.get(url, headers=headers)
    if recipe_resp.status_code != 200:
        rating = "N/A"
        description = "N/A"
    else:
        recipe_soup = BeautifulSoup(recipe_resp.text, 'html.parser')

        # Рейтинг
        rating_tag = recipe_soup.find("p", class_="RatingRating-btVmKd")
        rating = rating_tag.get_text(strip=True) if rating_tag else "N/A"

        # Краткое описание
        tip_block = recipe_soup.find("div", {"data-testid": "cn-wrapper"})
        if tip_block:
            desc_p = tip_block.find("p")
            description = desc_p.get_text(strip=True) if desc_p else "N/A"
        else:
            description = "N/A"


    recipes_data.append({
        "searched_query": query,
        "title": title,
        "rating": rating,
        "description": description,
        "url": url
    })

    time.sleep(0.5)  # минимальная пауза на всякий случай

# Сохранение в DataFrame


100%|██████████| 1/1 [00:03<00:00,  3.27s/it]


In [21]:
df = pd.DataFrame(recipes_data)
df

Unnamed: 0,searched_query,title,rating,description,url
0,"Lentil, Apple, and Turkey Wrap","Lentil, Apple, and Turkey Wrap",2.5,"Cooking time for lentils can vary, depending o...",https://www.epicurious.com/recipes/food/views/...


In [None]:
df.to_csv("data/recipes_with_urls.csv", index=False)

In [26]:
nest_asyncio.apply()

headers = {"User-Agent": "Mozilla/5.0"}
missed = []

def simplify_query(query):
    return query.replace(",", "").replace("and", "").lower().strip()

async def fetch_recipe(session, query):
    simplified_query = simplify_query(query)

    try:
        search_url = f"https://www.epicurious.com/search?q={simplified_query.replace(' ', '+')}"
        async with session.get(search_url, headers=headers) as resp:
            if resp.status != 200:
                print(f"[✗] Failed to fetch search for: {query}")
                missed.append(query)
                return None

            html = await resp.text()
            soup = BeautifulSoup(html, 'html.parser')

            # Найти первую ссылку
            first_result = soup.find('div', class_='ClampContent-hilPkr fvKowN')
            if not first_result:
                print(f"[!] No result found for: {query}")
                missed.append(query)
                return None

            a_tag = first_result.find('a', href=True)
            if not a_tag:
                print(f"[!] No link in result for: {query}")
                missed.append(query)
                return None

            url = "https://www.epicurious.com" + a_tag['href']
            title = a_tag.get_text(strip=True)

        # Step 2: Перейти на страницу рецепта
        async with session.get(url, headers=headers) as recipe_resp:
            if recipe_resp.status != 200:
                print(f"[!] Failed to fetch recipe page: {url}")
                return {
                    "searched_query": query,
                    "title": title,
                    "rating": "N/A",
                    "description": "N/A",
                    "url": url
                }

            html = await recipe_resp.text()
            recipe_soup = BeautifulSoup(html, 'html.parser')

            # Рейтинг
            rating_tag = recipe_soup.find("p", class_="RatingRating-btVmKd")
            rating = rating_tag.get_text(strip=True) if rating_tag else "N/A"

            # Описание
            tip_block = recipe_soup.find("div", {"data-testid": "cn-wrapper"})
            if tip_block:
                desc_p = tip_block.find("p")
                description = desc_p.get_text(strip=True) if desc_p else "N/A"
            else:
                description = "N/A"

        return {
            "searched_query": query,
            "title": title,
            "rating": rating,
            "description": description,
            "url": url
        }

    except Exception as e:
        print(f"[ERROR] {query}: {e}")
        missed.append(query)
        return None

async def scrape_all(recipes):
    async with aiohttp.ClientSession() as session:
        tasks = [fetch_recipe(session, query) for query in recipes]
        results = await tqdm_asyncio.gather(*tasks)
        return [r for r in results if r]

def run_scraper(recipes, output_file="data/recipes_with_urls.csv"):
    loop = asyncio.get_event_loop()
    results = loop.run_until_complete(scrape_all(recipes))
    df = pd.DataFrame(results)

    # Дозапись в CSV, если файл уже существует
    if os.path.exists(output_file):
        old_df = pd.read_csv(output_file)
        df = pd.concat([old_df, df], ignore_index=True)

    df.to_csv(output_file, index=False)
    print(f"[✓] Сохранено в {output_file} ({len(df)} рецептов всего)")

    # Сохраняем пропущенные
    if missed:
        with open("missed_queries.txt", "w", encoding="utf-8") as f:
            f.write("\n".join(missed))
        print(f"[!] Пропущено {len(missed)} рецептов. Сохранено в missed_queries.txt")

In [None]:
run_scraper(recipes, "data/recipes_with_urls2.csv")

In [4]:
with open("missed_queries2.txt", "r", encoding="utf-8") as f:
    missed_queries = [line.strip() for line in f if line.strip()]

In [5]:
run_scraper(missed_queries[1000:], "data/recipes_with_urls2.csv")

  3%|▎         | 387/13520 [01:09<46:44,  4.68it/s]  

[!] Failed to fetch recipe page: https://www.epicurious.com/simple-syrup-368889-recipe


  3%|▎         | 472/13520 [01:20<16:52, 12.89it/s]  

[✗] Failed to fetch search for: Harissa Shrimp And Summer Vegetable Sauté
[✗] Failed to fetch search for: Dark Chocolate Avocado Brownies
[✗] Failed to fetch search for: Classic Turkey Stock


  4%|▎         | 480/13520 [01:20<14:28, 15.01it/s]

[✗] Failed to fetch search for: Cointreau Spritz


  4%|▎         | 482/13520 [01:21<18:58, 11.46it/s]

[✗] Failed to fetch search for: Pappardelle with Chicken and Mushroom Ragù


  4%|▎         | 484/13520 [01:21<17:42, 12.27it/s]

[✗] Failed to fetch search for: Teriyaki Black Cod with Sticky Rice Cakes and Seared Baby Bok Choy


  4%|▎         | 486/13520 [01:21<19:55, 10.90it/s]

[✗] Failed to fetch search for: Grilled Chicken Breasts with Spicy Yellow Pepper Purée


  4%|▎         | 489/13520 [01:22<33:14,  6.53it/s]

[✗] Failed to fetch search for: Smoked-Sable Tartare with Beets and Watercress


  4%|▎         | 492/13520 [01:22<31:47,  6.83it/s]

[✗] Failed to fetch search for: Sweet Potato Noodle Stir-Fry with Choy Sum and Shiitake Mushrooms


  4%|▍         | 515/13520 [01:26<30:22,  7.13it/s]  

[✗] Failed to fetch search for: Chinese-Hawaiian "Barbecued" Ribs


  5%|▍         | 636/13520 [01:43<26:15,  8.18it/s]

[!] Failed to fetch recipe page: https://www.epicurious.com/simple-syrup-368889-recipe


  6%|▌         | 770/13520 [02:03<24:15,  8.76it/s]  

[✗] Failed to fetch search for: Sweet-Potato Soup with Nutmeg and Maple Syrup


  6%|▌         | 772/13520 [02:03<24:00,  8.85it/s]

[✗] Failed to fetch search for: Laxmi's Fresh Coconut Milk


  6%|▌         | 786/13520 [02:06<28:08,  7.54it/s]

[✗] Failed to fetch search for: Corn and Bell Pepper Salad
[✗] Failed to fetch search for: Pink Grapefruit Tart with Edamame Ice Cream and Black Sesame Seeds


  7%|▋         | 942/13520 [02:30<56:19,  3.72it/s]  

[!] Failed to fetch recipe page: https://www.epicurious.com/simple-syrup-368889-recipe


  8%|▊         | 1024/13520 [02:41<31:55,  6.52it/s] 

[✗] Failed to fetch search for: Blue Cheese Canapes with Pecans and Grapes


  8%|▊         | 1027/13520 [02:41<32:29,  6.41it/s]

[✗] Failed to fetch search for: Roasted Racks of Lamb with Artichokes, Red Onions, and Garlic Cloves
[✗] Failed to fetch search for: Lamb Bacon
[✗] Failed to fetch search for: Yellow Rice Pilaf


  8%|▊         | 1032/13520 [02:41<18:49, 11.06it/s]

[✗] Failed to fetch search for: Garlic Croutes
[✗] Failed to fetch search for: Pickled Peppers with Shallots and Thyme
[✗] Failed to fetch search for: Corn Bread and Chestnut Stuffing


  8%|▊         | 1034/13520 [02:42<22:12,  9.37it/s]

[✗] Failed to fetch search for: Cranberry and Tart-Cherry Compote
[✗] Failed to fetch search for: Corn and Herb-Bread Sausage Stuffing
[✗] Failed to fetch search for: Duck Prosciutto Breadsticks with Ricotta and Dried Figs
[✗] Failed to fetch search for: White Chocolate Frosting


  8%|▊         | 1045/13520 [02:42<12:09, 17.10it/s]

[✗] Failed to fetch search for: Citrus, Celery, and Shaved Fennel Salad


  8%|▊         | 1116/13520 [02:53<26:05,  7.92it/s]  

[✗] Failed to fetch search for: Spicy Roasted Vegetable Soup with Toasted Tortillas


  8%|▊         | 1143/13520 [02:57<22:27,  9.18it/s]  

[!] Failed to fetch recipe page: https://www.epicurious.com/simple-syrup-368889-recipe


 10%|▉         | 1290/13520 [03:18<19:50, 10.27it/s]  

[✗] Failed to fetch search for: Broiled Grouper Fillets with Romesco Sauce


 10%|▉         | 1309/13520 [03:21<20:56,  9.72it/s]  

[✗] Failed to fetch search for: Cream Cheese Hamantaschen


 10%|▉         | 1314/13520 [03:22<17:49, 11.41it/s]

[✗] Failed to fetch search for: Peach Baked Alaskas with Spiced Peaches and Raspberries
[✗] Failed to fetch search for: Berry and Ricotta Slice


 10%|▉         | 1338/13520 [03:28<1:46:04,  1.91it/s]

[✗] Failed to fetch search for: Margarita
[✗] Failed to fetch search for: Green-Peppercorn Cornmeal Crackers


 10%|▉         | 1345/13520 [03:29<36:40,  5.53it/s]  

[✗] Failed to fetch search for: Lemon Pudding Cake with Cranberry Syrup


 11%|█         | 1447/13520 [03:42<26:42,  7.53it/s]  

[✗] Failed to fetch search for: Sweet Potato Soup with Buttered Pecans
[✗] Failed to fetch search for: Tuscan White Bean Salad with Spinach, Olives, and Sun-Dried Tomatoes


 11%|█         | 1449/13520 [03:42<25:26,  7.91it/s]

[✗] Failed to fetch search for: Spice-Rubbed Duck Legs Braised with Green Olives and Carrots


 11%|█         | 1454/13520 [03:43<21:16,  9.45it/s]

[✗] Failed to fetch search for: Tangerine Granita with Vanilla Bean Cream


 11%|█         | 1458/13520 [03:43<20:08,  9.98it/s]

[✗] Failed to fetch search for: Mulled Wine Melange
[✗] Failed to fetch search for: Cargamanto and Green-Bean Salad with Chimichurri Dressing


 11%|█         | 1508/13520 [03:50<21:25,  9.35it/s]  

[✗] Failed to fetch search for: Currant and Molasses Spice Cookies
[✗] Failed to fetch search for: Butternut Squash and Sweet Potato Purée


 12%|█▏        | 1619/13520 [04:08<25:47,  7.69it/s]  

[✗] Failed to fetch search for: Apple Bok Choy Salad


 12%|█▏        | 1622/13520 [04:08<24:19,  8.15it/s]

[✗] Failed to fetch search for: Peel-and-Eat Spiced Shrimp with Chipotle Remoulade
[✗] Failed to fetch search for: Garlicky Broccoli Rabe


 14%|█▍        | 1951/13520 [05:00<31:17,  6.16it/s]  

[ERROR] Sausage and Leek Soup: 
[ERROR] The Ultimate Bolognese Sauce: 
[ERROR] Avgolemono: 
[ERROR] Garlic and Sage Marinated Antipasto: 
[ERROR] Pan-Roasted Pork Chops with Cranberries and Red Swiss Chard: 
[ERROR] Peanut Butter–Sriracha Toast: 
[ERROR] Fried Green Plantains: 
[ERROR] Honey-Roasted Chicken with Lemon and Tarragon: 
[ERROR] Pork Pot Stickers: 
[ERROR] Sake-Marinated Sea Bass with Coconut-Curry Sauce: 
[ERROR] Ricotta and Red Chard Cannelloni: 
[ERROR] Chorizo-Lemon Butter: 
[ERROR] Cassoulet: 
[ERROR] Shrimp Uggie: 
[ERROR] Chocolate-Dipped Orange Peel: 
[ERROR] Pureed Celery Root, Parsnips and Potatoes: 
[ERROR] Roasted Butternut Squash with Lime Juice: 
[ERROR] Strawberry Yogurt Ice Pops: 
[ERROR] Coconut Tuile Cones with Passion-Fruit Ice Cream: 
[ERROR] Pastrami-Style Grilled Turkey Breast: 
[ERROR] Smash: 
[ERROR] Lamb with White Poppy Seeds: 
[ERROR] Rhubarb Compote with Quick Coconut Sorbet: 
[ERROR] Bourbon Punch with Pink Grapefruit and Mint: 
[ERROR] Chicken 

 25%|██▌       | 3425/13520 [05:00<00:08, 1243.08it/s]

[ERROR] Grill-Roasted Rack of Lamb in Red Mole: 
[ERROR] Veal Scallops with Parsley, Lemon and Garlic: 
[ERROR] Heirloom Tomato Salad with Mozzarella and Basil: 
[ERROR] Crudites and Grilled Sausage with Sweet and Hot Chutneys: 
[ERROR] Tiny Baked Potatoes with Chervil Cream: 
[ERROR] Peanut Noodles with Shrimp: 
[ERROR] Sangria: 
[ERROR] Red Bell Pepper Corn Bread: 
[ERROR] Herb-Rubbed Turkey with Roasted-Garlic Gravy: 
[ERROR] Artichokes, Capers, Olives, Lemon Zest, and Italian Tuna on Pasta Shells: 
[ERROR] Tomato-Anchovy Pasta: 
[ERROR] Steamed Mussels with Sausages and Fennel: 
[ERROR] Sausage, Cranberry, and Pecan Stuffing: 
[ERROR] Ricotta-Filled Ravioli (Ravioli di Ricotta): 
[ERROR] Tiny Choux Puffs: 
[ERROR] Peach, Strawberry, and Banana Bruschetta: 
[ERROR] Mascarpone Tart with Honey, Oranges, and Pistachios: 
[ERROR] Lemongrass-Shallot Sambal: 
[ERROR] Polenta Budino With Plum Marmellata: 
[ERROR] Kentucky Bourbon Burgoo: 
[ERROR] Pan-Fried Pork Chops: 
[ERROR] Pepper, Rose

 97%|█████████▋| 13145/13520 [05:05<00:00, 2802.43it/s]

[ERROR] Blueberry Banana Shake: 
[ERROR] Delicata Squash and Roasted Mushrooms with Thyme: 
[ERROR] Grilled Lobster Salad with Avocado and Papaya: 
[ERROR] Baked Ziti with Mushrooms, Peppers, and Parmesan: 
[ERROR] Aunt Holly's Banana Bread: 
[!] Failed to fetch recipe page: https://www.epicurious.com/simple-syrup-368889-recipe


100%|██████████| 13520/13520 [05:41<00:00, 39.57it/s]  


[✓] Сохранено в data/recipes_with_urls2.csv (11106 рецептов всего)
[!] Пропущено 11228 рецептов. Сохранено в missed_queries.txt


In [8]:
with open("missed_queries3.txt", "r", encoding="utf-8") as f:
    missed_queries = [line.strip() for line in f if line.strip()]

In [9]:
run_scraper(missed_queries, "data/recipes_with_urls2.csv")

  3%|▎         | 291/11228 [00:53<20:28,  8.90it/s] 

[!] Failed to fetch recipe page: https://www.epicurious.com/simple-syrup-368889-recipe


 17%|█▋        | 1869/11228 [04:59<38:47,  4.02it/s]  

[!] Failed to fetch recipe page: https://www.epicurious.com/simple-syrup-368889-recipe


 17%|█▋        | 1871/11228 [05:00<58:38,  2.66it/s]

[ERROR] Beef Stock: 
[ERROR] Cabbage and White Bean Soup: 
[ERROR] Mustard and Cheese Crackers: 
[ERROR] Roast Fresh Ham: 


 17%|█▋        | 1890/11228 [05:01<19:27,  8.00it/s]

[ERROR] Wild Mushroom Lasagne: 
[ERROR] Golden Egg White Omelets with Spinach and Cheese: 


 17%|█▋        | 1894/11228 [05:01<17:41,  8.80it/s]

[ERROR] Mozzarella Arrabiata Salsa: 
[ERROR] Pineapple Ice-Cream Cake with Coconut Topping: 
[ERROR] Tomato Toast with Peaches, Mozzarella, and Mint: 
[ERROR] Raspberry Coulis: 
[ERROR] Chicken Parmesan: 
[ERROR] Chocolate-Dipped Cherries: 
[ERROR] Cabbage Salad with Mustard Vinaigrette: 
[ERROR] Asparagus with Horseradish Butter: 
[ERROR] Turkey Giblet Stock: 
[ERROR] Na'ama's Fattoush: 
[ERROR] South American Corn Packets: 
[ERROR] Seared Scallops with Lemon and Dill: 
[ERROR] Green Goddess Dressing: 
[ERROR] Smoked-Paprika Pork Rib Roast with Sherry Raisin Vinaigrette: 
[ERROR] Blue Cheese Bread: 
[ERROR] Gingerbread with Vanilla Ice Cream and Exotic Caramel Sauce: 
[ERROR] Spicy Curried Mussels: 
[ERROR] Ginger-Tomato Chutney: 
[ERROR] Watercress, Pear and Walnut Salad with Poppy Seed Dressing: 
[ERROR] Stir-Fried Tofu and Vine-Ripened Tomatoes: 
[ERROR] Yams with Crispy Skins and Brown-Butter Vinaigrette: 
[ERROR] Sage and Onion "Roast": 
[ERROR] Roasted Carrots with Cumin Yogurt:

 17%|█▋        | 1910/11228 [05:03<14:33, 10.67it/s]

[ERROR] Baby Eggplant, Olive, and Herb-Cheese Frittata: 
[ERROR] Cold Tomato-Thyme Soup with Grilled Garlic Croutons: 
[ERROR] Pickled Cucumbers: 
[ERROR] Little Quinoa Patties: 
[ERROR] Roasted Butternut Squash Soup: 
[ERROR] Gingerbread Trifle With Candied Kumquats and Wine-Poached Cranberries: 
[ERROR] Salsa Verde: 
[ERROR] Hot Fudge Sauce: 
[ERROR] Grilled Ground Lamb Kebabs with Fresh Hot-Pepper Paste: 
[ERROR] Lemon-Herb Roast Chicken: 
[ERROR] Shredded Brussels Sprouts with Maple Hickory Nuts: 
[ERROR] Standing Rib Roast, Spinach-Porcini Stuffing, Irish Whiskey Gravy, and Horseradish Cream: 
[ERROR] Spinach and Chick Peas with Bacon: 
[ERROR] Apricot, Date, and Pistachio Haroseth: 
[ERROR] Chocolate Walnut Biscotti: 
[ERROR] Tex-Mex Roasted Lamb Ribs With Cheese Grits: 
[ERROR] Pan-Seared Salmon Over Red Cabbage and Onions with Merlot Gastrique: 
[ERROR] Red Cabbage Salad with Green Apple, Lingonberry Preserves, and Toasted Walnuts: 
[ERROR] To Zest Citrus Fruits: 
[ERROR] Mexic

 96%|█████████▌| 10806/11228 [05:03<00:00, 5550.46it/s]

[ERROR] Soba Salad with Carrot and Zucchini: 
[ERROR] Lamb Chops with Lemon: 
[ERROR] Pink Grapefruit, Grape, Raspberry, and Olive Oil Salad with Minted Yogurt: 
[ERROR] Orange-Soy-Braised Pork Ribs: 
[ERROR] Louisiana Deviled Crab Cakes: 


 98%|█████████▊| 11022/11228 [05:24<00:00, 280.01it/s] 

[!] Failed to fetch recipe page: https://www.epicurious.com/simple-syrup-368889-recipe
[ERROR] Steamed Mussels with Orange, Fennel, and Garlic: 
[ERROR] Prune Armagnac Sorbet: 
[ERROR] Linguine with Tomatoes, Olives, Feta, and Parsley: 
[ERROR] Sauteed Turkey Cutlets with Cranberry Orange Glaze: 
[ERROR] Lemon Cream Tart: 


100%|██████████| 11228/11228 [05:56<00:00, 31.50it/s] 

[ERROR] Cucumber and Radish Salad: 





[✓] Сохранено в data/recipes_with_urls2.csv (13434 рецептов всего)
[!] Пропущено 8900 рецептов. Сохранено в missed_queries.txt


In [11]:
with open("missed_queries4.txt", "r", encoding="utf-8") as f:
    missed_queries = [line.strip() for line in f if line.strip()]

In [13]:
run_scraper(missed_queries, "data/recipes_with_urls2.csv")

  3%|▎         | 270/8900 [00:52<16:01,  8.97it/s] 

[!] Failed to fetch recipe page: https://www.epicurious.com/simple-syrup-368889-recipe


  4%|▍         | 381/8900 [01:11<32:58,  4.31it/s]

[✗] Failed to fetch search for: Classic Date Bars


  4%|▍         | 387/8900 [01:11<26:28,  5.36it/s]

[!] Failed to fetch recipe page: https://www.epicurious.com/simple-syrup-368889-recipe


  5%|▍         | 433/8900 [01:20<17:57,  7.86it/s]  

[✗] Failed to fetch search for: BA's Best Carrot Cake


  5%|▌         | 486/8900 [01:28<29:11,  4.80it/s]

[✗] Failed to fetch search for: Chicken Wings Five Ways
[✗] Failed to fetch search for: Southwestern Club Sandwich


 10%|█         | 925/8900 [02:39<41:04,  3.24it/s]  

[!] Failed to fetch recipe page: https://www.epicurious.com/simple-syrup-368889-recipe


 11%|█         | 996/8900 [02:49<20:06,  6.55it/s]

[ERROR] Pizza Crust: Cannot connect to host www.epicurious.com:443 ssl:default [None]


 16%|█▌        | 1398/8900 [03:54<16:33,  7.55it/s]  

[ERROR] Fried Cardoons: Cannot connect to host www.epicurious.com:443 ssl:default [Connect call failed ('13.227.146.62', 443)]


 20%|██        | 1807/8900 [04:58<14:13,  8.31it/s]  

[ERROR] Raisin-Nut Spice Cookies: 
[ERROR] Jalebi: 
[ERROR] Fig and Raspberry Galette: 
[ERROR] Vanilla Strawberry Cooler: 
[ERROR] Tortilla Soup with Crisp Tortillas and Avocado Relish: 
[ERROR] Custom-Made Ice Cream Sandwich: 
[ERROR] Frenchy's Pasties: 
[ERROR] Praline Cheesecake with Hazelnut Crust: 
[ERROR] Mint Chutney: 
[ERROR] Grilled Halloumi With Watercress: 
[ERROR] Sauteed Fennel, Capers and Arugula: 
[ERROR] Lemon-Nutmeg Ice Cream: 
[ERROR] Niçoise Toasts: 
[ERROR] Mango and Red Pepper Barbeque Sauce: 
[ERROR] Snap Beans with Mustard and Country Ham: 
[ERROR] Linguine with Butternut Squash, Spinach, and Mussels: 
[ERROR] Provencal Oven-Roasted Tomato Sauce: 
[ERROR] Thai-Style Chicken and Vegetable Stir-Fry: 
[ERROR] Spicy Oven-Roasted Potatoes: 
[ERROR] Marinated Feta With Roasted Lemon: 
[ERROR] Grilled Cheddar, Tomato and Bacon Sandwiches: 
[ERROR] Maple-Oatmeal Sandwich Bread: 
[ERROR] Ham Steaks With Curry Sauce (Jambon Le Tout Paris): 
[ERROR] Hot White Russian: 
[ER

 20%|██        | 1809/8900 [05:03<1:30:00,  1.31it/s]

[ERROR] 3-Ingredient Sausage Dinner With Lentils and Fennel: 
[ERROR] Fluffy Baked Eggs with Roasted-Vegetable Hash: 
[ERROR] South Indian Vegetable Curry: 
[ERROR] Mozzarella Pesto Spread: 
[ERROR] Sweet Potato Soup: 
[ERROR] Root Vegetable "Lasagna" with Mushroom Broth: 
[ERROR] Sauteed Chicken with Parsnip, Apple, and Sherry Pan Sauce: 
[ERROR] Oven-Roasted Spring Vegetables with Salsa Verde: 
[ERROR] Fish Masala: 
[ERROR] Grilled Pork Chops with Bourbon-Mustard Glaze: 
[ERROR] Sherried Mushroom Empañadas: 
[ERROR] Parmesan, Rosemary, and Walnut Shortbread: 
[ERROR] Burgers with Artichokes, Gorgonzola, and Tomatoes: 
[ERROR] Veal Scaloppine with Mushroom Cream Sauce: 
[ERROR] Fresh Fennel Pan Gravy: 
[ERROR] Lumpia Wrappers: 
[ERROR] Archbishop: 
[ERROR] Butter Beans with Butter, Mint, and Lime: 
[ERROR] Coconut Milk Ice Cream with Ginger and Lime: 
[ERROR] Buttermilk Ice Cream with Spiced Fruit Compote: 
[ERROR] Pasta Shells with Escarole, Sausage, and Cheese: 
[ERROR] Mushroom Ris

 26%|██▌       | 2311/8900 [05:05<01:11, 92.63it/s]  

[ERROR] Canal House Teriyaki Sauce: 
[ERROR] Cilantro-Lime Tartar Sauce: 
[ERROR] Orzo with Spicy Sauce Schlussel-Leeds: 
[ERROR] Crown Roast of Lamb: 
[ERROR] Tomato Salsa: 
[ERROR] Roast Parsnips: 
[ERROR] Roasted Clams with Pancetta and Red Bell Pepper Coulis: 
[ERROR] Grilled Mackerel with Spicy Tomato Jam: 
[ERROR] Tofu Scramble with Yukon Gold and Sweet Potato Home Fries: 
[ERROR] Red Cabbage, Blue Cheese, and Walnut Empañadas: 
[ERROR] French 75 Cocktail II: 
[ERROR] Egg Fried Rice: 
[ERROR] Swordfish with Herb Sauce: 
[ERROR] Chocolate-Peanut Butter Cake with Cream Cheese and Butterfinger Frosting: 
[ERROR] Strawberry-Watermelon Smoothie with Ginger: 
[ERROR] Café de Paris: 
[ERROR] Chickpea Slather: 
[ERROR] Charred Rosemary–Infused Vodka: 
[ERROR] Citrus-and-Clove-Marinated Shrimp: 
[ERROR] Bobby's Dry-Rubbed Rib-Eye Steaks with Mesa Barbecue Sauce: 
[ERROR] Coconut-Marinated Short Rib Kebabs: 
[ERROR] Two-Bean Salad with Balsamic Vinaigrette: 
[ERROR] German-Style Fried Pota

 94%|█████████▍| 8375/8900 [05:05<00:00, 2169.63it/s]

[ERROR] Quince Sambal: 


 96%|█████████▋| 8567/8900 [05:21<00:01, 256.93it/s] 

[!] Failed to fetch recipe page: https://www.epicurious.com/simple-syrup-368889-recipe
[ERROR] Braised Pork Loin Cutlets with Thyme-Lemon Sauce: 
[ERROR] Marmalade Chicken: 
[ERROR] Puerto Rican Crab: 
[ERROR] Arugula Salad: 


 99%|█████████▉| 8829/8900 [05:41<00:00, 86.80it/s] 

[ERROR] Milk-Braised Pork: 


100%|██████████| 8900/8900 [05:53<00:00, 25.18it/s]

[ERROR] Cheddar Chutney Toasts: 





[✓] Сохранено в data/recipes_with_urls2.csv (15822 рецептов всего)
[!] Пропущено 6512 рецептов. Сохранено в missed_queries.txt


In [17]:
with open("missed_queries5.txt", "r", encoding="utf-8") as f:
    missed_queries = [line.strip() for line in f if line.strip()]

In [18]:
run_scraper(missed_queries, "data/recipes_with_urls2.csv")

  9%|▉         | 584/6512 [01:37<15:39,  6.31it/s] 

[✗] Failed to fetch search for: Tomatillo Salsa


  9%|▉         | 595/6512 [01:38<10:44,  9.19it/s]

[✗] Failed to fetch search for: Tomatillo Salsa


 11%|█         | 697/6512 [01:52<12:39,  7.66it/s]

[✗] Failed to fetch search for: Falafel


 14%|█▍        | 931/6512 [02:29<12:44,  7.30it/s]

[✗] Failed to fetch search for: Roasted Apricot Sorbet


 21%|██        | 1358/6512 [03:32<06:32, 13.13it/s]

[✗] Failed to fetch search for: Matzo Crumble


 28%|██▊       | 1802/6512 [04:41<16:39,  4.71it/s]

[✗] Failed to fetch search for: Almond Cookies


 28%|██▊       | 1851/6512 [04:48<08:47,  8.84it/s]

[ERROR] Salmon in Saffron Mussel Sauce: Response payload is not completed: <TransferEncodingError: 400, message='Not enough data to satisfy transfer length header.'>
[ERROR] Rib Eye Roast: Response payload is not completed: <TransferEncodingError: 400, message='Not enough data to satisfy transfer length header.'>


 28%|██▊       | 1854/6512 [04:49<08:37,  9.00it/s]

[ERROR] Blue Lake Green Beans with Lemon and Thyme: Server disconnected
[ERROR] Watercress, Radish, and Endive Salad with Mustard Seed Vinaigrette: Server disconnected
[ERROR] Couscous with Golden Raisins, Pine Nuts, and Green Onions: Server disconnected
[ERROR] Herbed Bulgur: Server disconnected
[ERROR] Beer-Marinated Tri-Tip with Blue Cheese, Wild Mushrooms, and Onions: Server disconnected
[ERROR] Chilled Beet Soup With Buttermilk, Cucumbers, and Dill (Chlodnik): Server disconnected
[ERROR] Tiramisu Affogato: Server disconnected
[ERROR] Pear and Almond Frangipane with Apricot Sauce: Server disconnected
[ERROR] Louisiana Deviled Crab Cakes: Response payload is not completed: <TransferEncodingError: 400, message='Not enough data to satisfy transfer length header.'>


 29%|██▉       | 1873/6512 [04:49<03:32, 21.82it/s]

[ERROR] Garlic Lime Chicken Breasts: Response payload is not completed: <TransferEncodingError: 400, message='Not enough data to satisfy transfer length header.'>
[ERROR] Crunchy Veg Bowl With Warm Peanut Sauce: Response payload is not completed: <TransferEncodingError: 400, message='Not enough data to satisfy transfer length header.'>
[ERROR] Mussels in Romesco Sauce: Response payload is not completed: <TransferEncodingError: 400, message='Not enough data to satisfy transfer length header.'>


 29%|██▉       | 1878/6512 [04:50<04:02, 19.12it/s]

[ERROR] Roast Chicken with Pesto and Potatoes: Response payload is not completed: <TransferEncodingError: 400, message='Not enough data to satisfy transfer length header.'>
[ERROR] Fried Chicken Salad: Response payload is not completed: <TransferEncodingError: 400, message='Not enough data to satisfy transfer length header.'>
[ERROR] Toasted Pumpkin Seeds: Response payload is not completed: <TransferEncodingError: 400, message='Not enough data to satisfy transfer length header.'>
[ERROR] Malt-Beer-Brined Turkey with Malt Glaze: Server disconnected
[ERROR] Maiden's Blush No.ii: Server disconnected
[ERROR] Sauteed Radishes and Sugar Snap Peas with Dill: Response payload is not completed: <TransferEncodingError: 400, message='Not enough data to satisfy transfer length header.'>
[ERROR] Leland Palmer: Response payload is not completed: <TransferEncodingError: 400, message='Not enough data to satisfy transfer length header.'>
[ERROR] Pumpkin Plum Tart: Response payload is not completed: <Tr

 30%|██▉       | 1936/6512 [05:00<20:08,  3.79it/s]

[ERROR] Chicken Salad with Grapes and Walnuts: 
[ERROR] B & B Peach Conserve: 
[ERROR] Mango and Red Onion Salsa: 
[ERROR] Sunday Supper Macaroni and Cheese: 
[ERROR] Strawberry Daiquiri: 
[ERROR] Southwestern Christmas Salad: 
[ERROR] Butternut Squash and Sage Soup with Sage Breadcrumbs: 
[ERROR] Grilled Flank Steak with Sauteed Beet Greens and Creamy Horseradish Beets: 
[ERROR] Cranberry-Ribbon Apple Pie: 
[ERROR] Olive and Eggplant Spread: 
[ERROR] Tomato, Eggplant and Black Olive Sauce with Rosemary: 
[ERROR] Tomato and Tapenade Tartlets: 
[ERROR] Braised Chicken with Green Peppers and Tomatoes: 
[ERROR] Crab Cake BLT: 
[ERROR] Ginger-Curry Aïoli: 
[ERROR] Pasta with Shrimp and Artichokes: 
[ERROR] Slivered Endive Salad: 
[ERROR] Kaffir Lime Mousse with Honeydew Water: 
[ERROR] Poached Eggs with Mushroom, Tamarillo, and Sage: 
[ERROR] Mango in Ginger-Mint Syrup: 
[ERROR] Hoffman House (or Astoria): 
[ERROR] Southwest-Style Salisbury Steaks: 
[ERROR] Lemon Herb Chicken Burgers with 

 30%|██▉       | 1937/6512 [05:01<26:10,  2.91it/s]

[ERROR] Sweetened Whipped Cream: 
[ERROR] Flaky Pie Crust: 
[ERROR] Cranberry, Tangerine, and Crystallized-Ginger Relish: 
[ERROR] Wilted Greens with Garlic and Anchovies: 
[ERROR] Pecan Praline: 
[ERROR] Grasshopper Hot Chocolate: 
[ERROR] Frisée Salad with Cucumber and Radishes: 
[ERROR] Crab and Roasted Red Bell Pepper Soup: 
[ERROR] Cumin-Crusted Lamb with Apricots: 
[ERROR] Gingerbread: 
[ERROR] Open-Face Ham, Cheddar, and Apple Butter Sandwiches: 
[ERROR] Jasmine Whipped Cream: 
[ERROR] Turkey Breast Braciola: 
[ERROR] Boneless Leg of Lamb Stuffed with Swiss Chard and Feta: 
[ERROR] Pineapple-Mint Vodka: 
[ERROR] Lobster Gelees with Fresh Tarragon Oil: 
[ERROR] Ishbel's Yorkshire Pudding: 
[ERROR] Stabilized Whole-Milk Yogurt: 
[ERROR] Herbed Chicken Burgers: 
[ERROR] Mango Chutney: 
[ERROR] Chorizo-Lemon Butter: 
[ERROR] Asian Noodle Salad with Eggplant, Sugar Snap Peas, and Lime Dressing: 
[ERROR] Savory Semolina: 
[ERROR] Beef Stock: 
[ERROR] Red Rice Salad with Pecans, Fennel

 95%|█████████▍| 6159/6512 [05:01<00:00, 2699.83it/s]

[ERROR] Grilled Seafood Salad Niçoise: 
[!] Failed to fetch recipe page: https://www.epicurious.com/simple-syrup-368889-recipe
[ERROR] Sticky Toffee Pudding with Blood Orange, Tangerine, and Whipped Crème Fraîche: 


 97%|█████████▋| 6286/6512 [05:14<00:01, 219.21it/s] 

[ERROR] Caramelized-Shallot Mashed Potatoes: 
[ERROR] Goan Curried Clams: 
[ERROR] Mint Julep Pineapple: 
[ERROR] Roast Beef, Red Pepper, Spinach, and Feta Sandwiches with Tahini Dressing: 
[ERROR] Potato, Apple, and Prune Stuffing: 
[ERROR] Garlic-Miso Pork Chops with Orange Bell Pepper and Arugula: 
[ERROR] Banana-Oatmeal Bars with Chocolate Chunks: 
[ERROR] Shrimp Toast with Pickled Ginger: 


100%|██████████| 6512/6512 [05:47<00:00, 18.74it/s] 


[✓] Сохранено в data/recipes_with_urls2.csv (18073 рецептов всего)
[!] Пропущено 4261 рецептов. Сохранено в missed_queries.txt


In [20]:
with open("missed_queries6.txt", "r", encoding="utf-8") as f:
    missed_queries = [line.strip() for line in f if line.strip()]

In [21]:
run_scraper(missed_queries, "data/recipes_with_urls2.csv")

  9%|▉         | 394/4261 [01:10<09:41,  6.65it/s]

[✗] Failed to fetch search for: Scallops, Okra, and Tomatoes in Coconut Curry Sauce


 15%|█▌        | 652/4261 [01:51<06:27,  9.31it/s]

[✗] Failed to fetch search for: Pecan Praline


 17%|█▋        | 738/4261 [02:08<59:10,  1.01s/it]

[✗] Failed to fetch search for: Tagliatelle with Shredded Beets, Sour Cream, and Parsley


 20%|██        | 860/4261 [02:25<10:38,  5.33it/s]

[✗] Failed to fetch search for: Grilled Fresh Sardines with Fennel and Preserved Lemon


 23%|██▎       | 974/4261 [02:43<05:31,  9.91it/s]

[!] Failed to fetch recipe page: https://www.epicurious.com/simple-syrup-368889-recipe


 27%|██▋       | 1153/4261 [03:10<03:08, 16.52it/s]

[✗] Failed to fetch search for: Chicken with Herbed Vegetable Sauce


 27%|██▋       | 1158/4261 [03:10<03:32, 14.58it/s]

[✗] Failed to fetch search for: Black Pepper Cornmeal Crisps
[✗] Failed to fetch search for: Tuna, Pickle, and Chopped-Vegetable Pita Sandwiches
[✗] Failed to fetch search for: Root Vegetable "Lasagna" with Mushroom Broth
[✗] Failed to fetch search for: Trout Grenobloise
[✗] Failed to fetch search for: Potato, Caramelized Onion, and Goat Cheese Gratin


 27%|██▋       | 1163/4261 [03:11<03:36, 14.30it/s]

[✗] Failed to fetch search for: Breakfast Polenta with Sausage, Onion and Peppers


 27%|██▋       | 1170/4261 [03:12<07:59,  6.44it/s]

[✗] Failed to fetch search for: Chicken Legs with Achiote Garlic Sauce


 28%|██▊       | 1181/4261 [03:14<06:49,  7.53it/s]

[✗] Failed to fetch search for: White Barbecue Sauce


 32%|███▏      | 1382/4261 [03:55<11:09,  4.30it/s]  

[✗] Failed to fetch search for: Oven-Roasted Spring Vegetables with Salsa Verde
[!] Failed to fetch recipe page: https://www.epicurious.com/simple-syrup-368889-recipe


 33%|███▎      | 1408/4261 [03:57<07:12,  6.60it/s]

[✗] Failed to fetch search for: Steak Salad with Pickled Vegetables


 35%|███▍      | 1484/4261 [04:09<07:53,  5.87it/s]

[!] Failed to fetch recipe page: https://www.epicurious.com/simple-syrup-368889-recipe


 37%|███▋      | 1579/4261 [04:20<05:06,  8.76it/s]

[!] No result found for: Pumpkin-Raisin Bars


 43%|████▎     | 1846/4261 [05:06<06:02,  6.66it/s]

[ERROR] Rhubarb and Pistachios over Thick Yogurt: 
[ERROR] Black Cod with Olives and Potatoes in Parchment: 
[ERROR] Mixed Greens with Sheep's-Milk Cheese and Honey Vinaigrette: 
[ERROR] Todd English's Backyard New England Clam Bake: 
[ERROR] T-Bone Steak with Thyme and Garlic Butter: 
[ERROR] Apple and Persimmon Tarte Tatin: 
[ERROR] Green Beans with Ginger and Cashews: 
[ERROR] Cilantro-Tomato Salsa: 
[ERROR] Chermoula With Red Chile: 
[ERROR] Potato-Green Chile Gratin: 
[ERROR] Pigs in Sleeping Bags: 
[ERROR] Roast Beef Stock: 
[ERROR] Rosemary Chicken with Broccoli Rabe: 
[ERROR] Steamed Radishes with Lemon Dill Butter: 
[ERROR] Cornmeal-Crusted Trout with Warm Tomato and Tarragon Salsa: 
[ERROR] Orange, Fennel and Garlic Marinated Olives: 
[ERROR] Goat Cheese Crostini with Blood Orange and Black Pepper Marmalade: 
[ERROR] Grilled Beef Tenderloin with Roasted Garlic Sauce and Leek-Tomato Quinoa: 
[ERROR] Chocolate Gateaux: 
[ERROR] Cranberry-Ginger Chutney: 
[ERROR] Artichoke Brusc

 44%|████▍     | 1870/4261 [05:07<04:40,  8.53it/s]

[ERROR] Almond Cookies: 
[ERROR] Grilled Porterhouse Steak with Paprika-Parmesan Butter: 
[ERROR] Broiled Peaches with Cookie-Crumb Topping: 
[ERROR] Garlic-Rosemary Roast Chicken: 
[ERROR] Potato, Green Cabbage, and Leek Soup with Lemon Crème Fraîche: 
[ERROR] Panfried Black Bean Coriander Cakes: 
[ERROR] Nectarine and Almond Crisp: 
[ERROR] Pan-Roasted Halibut with Herbed Corona Beans: 
[ERROR] Queso Fundido: 
[ERROR] Grilled Quail Salad with Honey-Dijon Dressing: 
[ERROR] New Chicken Parmesan: 
[ERROR] Angel-Hair Pasta with Fresh Tomato Sauce: 
[ERROR] Horseradish Mashed Potatoes: 
[ERROR] Korean-Style Tuna Tartare: 
[ERROR] Salted Caramel Ice Cream: 
[ERROR] Tricolor Salad with Honey-Cumin Dressing: 
[ERROR] Charred Spring Onion and Sesame-Chile Butter: 
[ERROR] Duck Fat-Potato Galette with Caraway and Sweet Onions: 
[ERROR] Lemony Strawberry-Rhubarb Cobbler: 
[ERROR] To Make Lobster Stock: 
[ERROR] Key Lime Pie with Almond Crumb Crust: 
[ERROR] Artichoke Fennel Sauce with Prosciut

 91%|█████████ | 3878/4261 [05:08<00:00, 438.56it/s]

[ERROR] Malt-Beer-Brined Turkey with Malt Glaze: 
[ERROR] Pesce Per Due: 
[ERROR] Pear and Fig Pie with Hazelnut Crust: 
[ERROR] Imperial Hotel Fizz: 
[!] Failed to fetch recipe page: https://www.epicurious.com/simple-syrup-368889-recipe
[ERROR] Sugar Snap Peas and Pasta: 
[ERROR] Pine Nut Cardamom Scones: 
[ERROR] Chicken Salad with Rosemary, Almonds, and Green Onions: 


 97%|█████████▋| 4121/4261 [05:27<00:01, 75.25it/s] 

[ERROR] Guinness BBQ Sauce: 
[ERROR] Roasted Pears with Hazelnut Syrup and Candied Hazelnuts: 


100%|██████████| 4261/4261 [05:46<00:00, 12.28it/s]


[✓] Сохранено в data/recipes_with_urls2.csv (20328 рецептов всего)
[!] Пропущено 2006 рецептов. Сохранено в missed_queries.txt


In [23]:
with open("missed_queries7.txt", "r", encoding="utf-8") as f:
    missed_queries = [line.strip() for line in f if line.strip()]

In [24]:
run_scraper(missed_queries, "data/recipes_with_urls2.csv")

 19%|█▉        | 389/2006 [01:19<04:47,  5.62it/s]

[!] Failed to fetch recipe page: https://www.epicurious.com/simple-syrup-368889-recipe


 23%|██▎       | 456/2006 [01:33<04:48,  5.37it/s]

[✗] Failed to fetch search for: Oatmeal, Fig, and Walnut Bars


 26%|██▌       | 518/2006 [01:39<03:06,  7.98it/s]

[✗] Failed to fetch search for: Slow-Roasted Pork with Lime Mojo


 28%|██▊       | 554/2006 [01:41<02:13, 10.89it/s]

[!] Failed to fetch recipe page: https://www.epicurious.com/simple-syrup-368889-recipe


 57%|█████▋    | 1136/2006 [03:26<02:33,  5.68it/s]

[✗] Failed to fetch search for: Lamb and Polenta "Lasagne"


 77%|███████▋  | 1544/2006 [04:25<01:33,  4.93it/s]

[✗] Failed to fetch search for: Watercress and Radish Salad


 96%|█████████▌| 1916/2006 [05:03<00:04, 19.53it/s]

[ERROR] Plums with Prosciutto, Goat Cheese, Baby Arugula, and Champagne Vinegar: 


100%|██████████| 2006/2006 [05:06<00:00,  6.54it/s]


[✓] Сохранено в data/recipes_with_urls2.csv (22329 рецептов всего)
[!] Пропущено 5 рецептов. Сохранено в missed_queries.txt


In [27]:
with open("missed_queries8.txt", "r", encoding="utf-8") as f:
    missed_queries = [line.strip() for line in f if line.strip()]

In [28]:
run_scraper(missed_queries, "data/recipes_with_urls2.csv")

100%|██████████| 5/5 [00:04<00:00,  1.20it/s]

[✓] Сохранено в data/recipes_with_urls2.csv (22334 рецептов всего)





In [29]:
df = pd.read_csv("data/recipes_with_urls2.csv")

In [30]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22334 entries, 0 to 22333
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   searched_query  22334 non-null  object 
 1   title           22334 non-null  object 
 2   rating          18413 non-null  float64
 3   description     1776 non-null   object 
 4   url             22334 non-null  object 
dtypes: float64(1), object(4)
memory usage: 872.6+ KB


In [31]:
df_unique = df.drop_duplicates(subset=["searched_query","title", "url"], keep="first")

In [32]:
df_unique.info()

<class 'pandas.core.frame.DataFrame'>
Index: 18605 entries, 0 to 22333
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   searched_query  18605 non-null  object 
 1   title           18605 non-null  object 
 2   rating          15380 non-null  float64
 3   description     1501 non-null   object 
 4   url             18605 non-null  object 
dtypes: float64(1), object(4)
memory usage: 872.1+ KB


In [33]:
df_unique.to_csv("data/recipes_with_urls_cleaned.csv", index=False)

print(f"[✓] Удалено {len(df) - len(df_unique)} дубликатов")

[✓] Удалено 3729 дубликатов


In [None]:
for query in tqdm(recipes[0:1]):
    search_url = f"https://www.epicurious.com/search?q={query.replace(' ', '+')}"
    response = requests.get(search_url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to fetch: {query}")
        continue

    soup = BeautifulSoup(response.text, 'html.parser')

    # Найти первую ссылку на рецепт
    first_result = soup.find('div', class_='ClampContent-hilPkr fvKowN')
    if not first_result:
        continue

    a_tag = first_result.find('a', href=True)
    if not a_tag:
        continue

    url = "https://www.epicurious.com" + a_tag['href']
    title = a_tag.get_text(strip=True)

    # Переход на страницу рецепта
    recipe_resp = requests.get(url, headers=headers)
    if recipe_resp.status_code != 200:
        rating = "N/A"
        description = "N/A"
    else:
        recipe_soup = BeautifulSoup(recipe_resp.text, 'html.parser')

        # Рейтинг
        rating_tag = recipe_soup.find("p", class_="RatingRating-btVmKd")
        rating = rating_tag.get_text(strip=True) if rating_tag else "N/A"

        # Краткое описание
        tip_block = recipe_soup.find("div", {"data-testid": "cn-wrapper"})
        if tip_block:
            desc_p = tip_block.find("p")
            description = desc_p.get_text(strip=True) if desc_p else "N/A"
        else:
            description = "N/A"


    recipes_data.append({
        "searched_query": query,
        "title": title,
        "rating": rating,
        "description": description,
        "url": url
    })

    time.sleep(0.5)  # минимальная пауза на всякий случай

# Сохранение в DataFrame


100%|██████████| 1/1 [00:03<00:00,  3.27s/it]
