# Webscraping lab

Practice your webscraping and parsing skills! 🎉

In [1]:
# Import libaries
import pandas as pd
import requests
import numpy as np
from bs4 import BeautifulSoup
from time import sleep

### Step 1: Create a soup object from the home page

In [2]:
url = 'https://pages.git.generalassemb.ly/rldaggie/for-scraping/'
res = requests.get(url)
soup = BeautifulSoup(res.content, 'lxml')

### Step 2: Scrape the home page soup for every restaurant

Note: Your best bet is to create a list of dictionaries, one for each restaurant. Each dictionary contains the restaurant's name and path from the `href`. The result of your scrape should look something like this:

```python
restaurants = [
    {'name': 'A&W Restaurants', 'href': 'restaurants/1.html'}, 
    {'name': "Applebee's", 'href': 'restaurants/2.html'},
    ...
]
```

In [3]:
restaurants = []
for item in soup.find('tbody').find_all('a'):
    restaurant = {}
    restaurant['name'] = item.text
    restaurant['href'] = item.attrs['href'] 
    restaurants.append(restaurant)

for _ in restaurants[0:3]:
    print(_)

{'name': 'A&W Restaurants', 'href': 'restaurants/1.html'}
{'name': "Applebee's", 'href': 'restaurants/2.html'}
{'name': "Arby's", 'href': 'restaurants/3.html'}


### Step 3: Using the `href`, scrape each restaurant's page and create a single list of food dictionaries.

Your list of foods should look something like this:
```python
foods = [
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    ...
]
```

**Note**: Remove extra white space from each category

In [4]:
foods = []
for r in restaurants:

    res_rest = requests.get(url + r['href'])
    soup_r = BeautifulSoup(res_rest.content)

    restau_name = soup_r.find('h3', {'class':'display-3'}).text.strip()
    # grab headers
    headers = []
    for _ in soup_r.find('table').find('thead').find_all('th'):
        headers.append(_.text.strip())

    # grab content
    for _ in soup_r.find('table').find('tbody').find_all('tr'):    
        row = {}
        row['Restaurant'] = restau_name
        row[headers[0]] = _.find_all('td')[0].text.strip()
        row[headers[1]] = _.find_all('td')[1].text.strip()
        row[headers[2]] = _.find_all('td')[2].text.strip()
        row[headers[3]] = _.find_all('td')[3].text.strip()
        row[headers[4]] = _.find_all('td')[4].text.strip()
        foods.append(row)
    
    sleep(1)

### Step 4: Create a pandas DataFrame from your list of foods

**Note**: Your DataFrame should have 5,131 rows

In [5]:
foods = pd.DataFrame(foods)
foods.head()

Unnamed: 0,Restaurant,Name,Category,Calories,Fat,Carbs
0,A&W Restaurants,Original Bacon Double Cheeseburger,Burgers,760,45,45
1,A&W Restaurants,Coney (Chili) Dog,Entrees,340,20,26
2,A&W Restaurants,Chili Fries,French Fries,370,15,49
3,A&W Restaurants,Strawberry Milkshake (small),Shakes,670,29,90
4,A&W Restaurants,A&W® Root Beer Freeze (large),Shakes,820,18,150


In [6]:
foods.shape

(5131, 6)

### Step 5: Export to csv

**Note:** Don't export the index column from your DataFrame

In [7]:
foods.to_csv('./foods.csv', index=False)

### Step 6: Use `pd.read_html`
Do the same thing as above, but use `pd.read_html()` to scrape the table from each page instead of BS4.

In [8]:
foods_quick = []
for r in restaurants:
    data = pd.read_html(url + r['href'])[0]
    data['Restaurant'] = np.full_like(data.iloc[:,0], r['name'])
    foods_quick.append(data)

In [9]:
foods_quick = pd.concat(foods_quick)
foods_quick.reset_index(inplace=True, drop=True)
foods_quick = foods_quick[[foods_quick.columns[-1]] + foods_quick.columns[0:-1].to_list()]
foods_quick

Unnamed: 0,Restaurant,Name,Category,Calories,Fat,Carbs
0,A&W Restaurants,Original Bacon Double Cheeseburger,Burgers,760,45,45
1,A&W Restaurants,Coney (Chili) Dog,Entrees,340,20,26
2,A&W Restaurants,Chili Fries,French Fries,370,15,49
3,A&W Restaurants,Strawberry Milkshake (small),Shakes,670,29,90
4,A&W Restaurants,A&W® Root Beer Freeze (large),Shakes,820,18,150
...,...,...,...,...,...,...
5126,Wendy's,Jr. Original Chocolate Frosty™,Shakes,200,5,32
5127,Wendy's,Grilled Chicken Go Wrap,Wraps,260,10,25
5128,Wendy's,Asiago Ranch Chicken Club,Sandwiches,670,32,57
5129,Wendy's,Spicy Chicken Go Wrap,Wraps,330,16,30
