In [None]:
from selenium import webdriver
from bs4 import BeautifulSoup as bs
import time
import pandas as pd
import numpy as np
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By

In [None]:
import requests
import json
from bs4 import BeautifulSoup
import pandas as pd

def clean_json_string(json_string):
    """
    Clean JSON string by removing invalid control characters, non-breaking spaces,
    and zero-width spaces.
    """
    cleaned_string = json_string.replace('\xa0', ' ').replace('\u200b', '')
    return cleaned_string

def food_info(name, max_recipes=150):
    '''
    This function gives you food information for the given input.

    PARAMETERS
        - name(str): name of Korean food in Korean ex) food_info("김치찌개")
        - max_recipes(int): maximum number of recipes to fetch
    RETURN
        - res(list): list of dicts containing info for some Korean foods related to 'name'
            - res['name'](str): name of food
            - res['ingredients'](str): ingredients to make the food
            - res['recipe'](list[str]): recipe steps in order
    '''
    all_food_info = []
    page = 1

    while len(all_food_info) < max_recipes:
        url = f"https://www.10000recipe.com/recipe/list.html?q={name}&order=reco&page={page}"
        response = requests.get(url)
        if response.status_code != 200:
            print("HTTP response error:", response.status_code)
            break

        html = response.text
        soup = BeautifulSoup(html, 'html.parser')

        food_list = soup.find_all(attrs={'class':'common_sp_link'})
        if not food_list:
            print("No more recipes found.")
            break

        for food in food_list:
            if len(all_food_info) >= max_recipes:
                break

            food_id = food['href'].split('/')[-1]
            new_url = f'https://www.10000recipe.com/recipe/{food_id}'
            new_response = requests.get(new_url)
            if new_response.status_code != 200:
                print("HTTP response error:", new_response.status_code)
                continue

            html = new_response.text
            soup = BeautifulSoup(html, 'html.parser')

            food_info = soup.find(attrs={'type':'application/ld+json'})
            if not food_info:
                print(f"No recipe info found for {food_id}")
                continue

            try:
                # Clean JSON string before loading
                cleaned_json_text = clean_json_string(food_info.text)
                result = json.loads(cleaned_json_text)
            except json.JSONDecodeError as e:
                print(f"JSON decode error for recipe {food_id}: {e}")
                continue

            title = result.get('name', 'No Title')
            ingredients = result.get('recipeIngredient', [])
            recipe_instructions = result.get('recipeInstructions', [])

            if not ingredients or not recipe_instructions:
                print(f"Skipping recipe {food_id} due to missing information.")
                continue

            ingredient_str = ', '.join(ingredients)
            recipe = [f'{i+1}. {step["text"]}' for i, step in enumerate(recipe_instructions) if 'text' in step]

            res = {
                'name': name,
                'title': title,
                'ingredients': ingredient_str,
                'recipe': recipe
            }
            all_food_info.append(res)

        page += 1

    return all_food_info

# Example usage:
name = "양념치킨"
food_info_result = food_info(name, max_recipes=150)

# Convert the list of dictionaries to a pandas DataFrame
df = pd.DataFrame(food_info_result)

# Display the DataFrame
print(df)

# Save the DataFrame to a CSV file
df.to_excel("양념치킨.xlsx", index=False)

Skipping recipe 3392979 due to missing information.
Skipping recipe 4950428 due to missing information.
Skipping recipe 6845849 due to missing information.
Skipping recipe 1456808 due to missing information.
Skipping recipe 1662654 due to missing information.
Skipping recipe 129704 due to missing information.
Skipping recipe 354868 due to missing information.
Skipping recipe 6830815 due to missing information.
Skipping recipe 664730 due to missing information.
Skipping recipe 5756194 due to missing information.
Skipping recipe 594006 due to missing information.
Skipping recipe 683092 due to missing information.
     name                            title  \
0    양념치킨        백종원 양념치킨 소스 만드는법 양념치킨 만들기   
1    양념치킨  [마리텔 백종원 치킨]집에서 치킨만들기 양념치킨소스만들기   
2    양념치킨             완전쉬운 양념치킨 소스만들기+치킨너겟   
3    양념치킨                양념치킨, 양념치킨소스 만들기    
4    양념치킨                  팝콘치킨으로 양념치킨 만들기   
..    ...                              ...   
145  양념치킨           백종원표 양념치킨 소스로 만드는 멸치볶음   
146  양념치킨    