Importing of Packages

In [9]:
import requests
from bs4 import BeautifulSoup
import re
import math
import pandas as pd

Create the baseline url I am using for the site

In [10]:
baseurl = 'https://www.bakingmad.com'
page = requests.get(baseurl)
soup = BeautifulSoup(page.content, 'html.parser')

Function to figure out the number of pages needed to scrape

In [11]:
def getnumpages(section):
    dessertsurl = 'https://www.bakingmad.com/recipes/' + section + '?pagesize=72&sort=Date&page=1'
    dessertspage = requests.get(dessertsurl)
    desserts = BeautifulSoup(dessertspage.content, 'html.parser')
    total_results = desserts.find(class_ = 'listing-footer__results js-results').text.split(" ")[2]
    pages = math.ceil(float(total_results)/72)
    return pages

Function to pull the urls for the recipes into a list

In [12]:
def getrecipeurls(recipelist):
    urllist = []
    for i in recipelist:
        link = i.find(class_ = 'summary__block-link')
        actualurl = link['href']
        urllist.append(actualurl)
    return urllist

Function to pull ingredients from the recipe

In [13]:
def get_ingredients(recipesoup):
    full_ingredient_list = []
    ingredients = recipesoup.find_all(itemprop = "recipeIngredient")
    
    for things in ingredients:
        listy=[]
        item_dict = {}
        spans = things.find_all('span')
        
        for span in spans:
            listy.append(span.string)
        item_dict['Amount'] = listy[0]
        item_dict['Item'] = listy[2]
        full_ingredient_list.append(item_dict)
        
    return full_ingredient_list

Function to pull the method from the recipe

In [14]:
def get_method(recipesoup):
    method = recipesoup.find_all(class_  = 'method__text')
    stepnum = 1
    recipesteps = {}
    
    for i in method:
        step = i.text.strip('\n')
        recipesteps[stepnum] = step
        stepnum += 1
    total_steps = stepnum-1
    
    return(recipesteps, total_steps)

Function to pull the recipes from a certain section

In [15]:
def recipes_from_section(urllist):
    fullrecipe = {}
    for page in urllist:
        details = {}
        recipepage = requests.get(baseurl + page)
        recipesoup = BeautifulSoup(recipepage.content, 'html.parser')
    
        details['Ingredients'] = get_ingredients(recipesoup)
        details['Steps'], details['Total Steps'] = get_method(recipesoup)
        details["Time"] = recipesoup.find(class_ = 'recipe-info__total-time').text.replace("TotalTime ", "").replace('\n', '').replace('\r', '').replace(' ', '')
        details["Yield"] = recipesoup.find(class_ = 'recipe-info__yield').text.replace('\n', " ")
        details["Skill Level"] = recipesoup.find(class_ = 'recipe-info__skill')['data-skill-level']
        recipename = recipesoup.find(itemprop = 'name').string
        fullrecipe[recipename] = details
    return fullrecipe

Function to run through sections list

In [16]:
def get_full_url_list_section(section, numpagessection):
    recipe_url_list = []
    for num in range(1,numpagessection+1):
        currentdesserturl ='https://www.bakingmad.com/recipes/' + section + '?pagesize=72&sort=Date&page=' + str(num)
        currentdessertspage = requests.get(currentdesserturl)
        currentdesserts = BeautifulSoup(currentdessertspage.content, 'html.parser')
        recipelist = currentdesserts.find_all(class_='summary summary--recipe')
        recipe_url_list.extend(getrecipeurls(recipelist))
    
    return recipe_url_list

Providing the sections I want scraped and having it scraped into a dictionary

In [17]:
sections = ['desserts', 'bread-dough', 'cakes', 'confectionery', 'cookies-biscuits', 'cupcakes-muffins', 'ice-cream',
           'icing-buttercream', 'jams-preserves', 'pancakes-batters', 'pastries', 'scones', 'sauces', 'traybakes']
complete_baking_mad = {}
for section in sections:
    numpagessection = getnumpages(section)
    full_urls_current_section = get_full_url_list_section(section, numpagessection)
    all_recipes_current_section = recipes_from_section(full_urls_current_section)
    complete_baking_mad[section] = all_recipes_current_section

Moving the dictionary that has been formed from scraped form into a more usable dataframe

In [18]:
df_wip = pd.DataFrame(complete_baking_mad['desserts']).transpose()
df_wip['Section'] = 'desserts'
for section in sections:
    if section == 'desserts':
        continue
    else:
        new_df = pd.DataFrame(complete_baking_mad[section]).transpose()
        new_df['Section'] = section
        df_wip = pd.concat([df_wip, new_df], axis = 0)

Exporting the dataframe to CSV

In [19]:
df_wip.to_csv('C:/Users/yanni/Desktop/BakingMad.csv')