In [150]:
# Import libaries

import pandas as pd
import requests
import html
from bs4 import BeautifulSoup

### Step 1: Create a soup object from the home page

In [151]:
url = 'https://pages.git.generalassemb.ly/rldaggie/for-scraping/'
res = requests.get(url)

In [152]:
res.status_code

200

In [153]:
soup = BeautifulSoup(res.content, 'lxml')


### Step 2: Scrape the home page soup for every restaurant

Note: Your best bet is to create a list of dictionaries, one for each restaurant. Each dictionary contains the restaurant's name and path from the `href`. The result of your scrape should look something like this:

```python
restaurants = [
    {'name': 'A&W Restaurants', 'href': 'restaurants/1.html'}, 
    {'name': "Applebee's", 'href': 'restaurants/2.html'},
    ...
]
```

In [154]:
name = soup.find('tbody')




In [155]:
names = soup.find_all('a')
len(names)

45

In [156]:

restaurants = []

for row in names[1:]:
    reste = {}
    reste['name'] =  row.text
    reste['href'] = row.attrs['href']
    restaurants.append(reste)

restaurants

[{'name': 'A&W Restaurants', 'href': 'restaurants/1.html'},
 {'name': "Applebee's", 'href': 'restaurants/2.html'},
 {'name': "Arby's", 'href': 'restaurants/3.html'},
 {'name': 'Atlanta Bread Company', 'href': 'restaurants/4.html'},
 {'name': "Bojangle's Famous Chicken 'n Biscuits",
  'href': 'restaurants/5.html'},
 {'name': 'Buffalo Wild Wings', 'href': 'restaurants/6.html'},
 {'name': 'Burger King', 'href': 'restaurants/7.html'},
 {'name': "Captain D's", 'href': 'restaurants/8.html'},
 {'name': "Carl's Jr.", 'href': 'restaurants/9.html'},
 {'name': "Charley's Grilled Subs", 'href': 'restaurants/10.html'},
 {'name': 'Chick-fil-A', 'href': 'restaurants/11.html'},
 {'name': "Chili's", 'href': 'restaurants/12.html'},
 {'name': 'Chipotle Mexican Grill', 'href': 'restaurants/13.html'},
 {'name': "Church's", 'href': 'restaurants/14.html'},
 {'name': 'Corner Bakery Cafe', 'href': 'restaurants/15.html'},
 {'name': 'Dairy Queen', 'href': 'restaurants/16.html'},
 {'name': "Denny's", 'href': 'res

### Step 3: Using the slug, scrape each restaurant's page and create a single list of food dictionaries.

Your list of foods should look something like this:
```python
foods = [
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    {
        'calories': '0',
        'carbs': '0',
        'category': 'Drinks',
        'fat': '0',
        'name': 'A&W® Diet Root Beer',
        'restaurant': 'A&W Restaurants'
    },
    ...
]
```

**Note**: Remove extra white space from each category

### Step 4: Create a pandas DataFrame from your list of foods

**Note**: Your DataFrame should have 5,131 rows

In [157]:
BASE_URL = 'https://pages.git.generalassemb.ly/rldaggie/for-scraping/'
com_board_1 = 'restaurants/1.html'

url = BASE_URL + com_board_1

url

'https://pages.git.generalassemb.ly/rldaggie/for-scraping/restaurants/1.html'

In [158]:
res = requests.get(url)

In [159]:
res.status_code

200

In [160]:
res.content

b'<!DOCTYPE html>\n<html lang="en">\n  <head>\n    <meta charset="utf-8"/>\n    <meta http-equiv="X-UA-Compatible" content="IE=edge"/>\n    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>\n    <title>Nutrition Information</title>\n    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">\n\n  </head>\n  <body>\n    <header>\n      <section class="container">\n        <nav role="navigation" class="navbar navbar-expand-lg navbar-light bg-light">\n<a class="navbar-brand" href="/">Nutrition Information</a>        </nav>\n      </section>\n    </header>\n    <main role="main" class="container">\n      <br>\n      <div class="alert alert-danger">\n        NOTE: This data is super old and rife with errors. It\'s meant for scraping practice only.\n      </div>\n<h3 class=display-3>\nA&amp;W Restaurants</h3>\n\n<p c

In [161]:
soup = BeautifulSoup(res.content, 'lxml')

In [162]:
soup

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<title>Nutrition Information</title>
<link crossorigin="anonymous" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" rel="stylesheet"/>
</head>
<body>
<header>
<section class="container">
<nav class="navbar navbar-expand-lg navbar-light bg-light" role="navigation">
<a class="navbar-brand" href="/">Nutrition Information</a> </nav>
</section>
</header>
<main class="container" role="main">
<br/>
<div class="alert alert-danger">
        NOTE: This data is super old and rife with errors. It's meant for scraping practice only.
      </div>
<h3 class="display-3">
A&amp;W Restaurants</h3>
<p class="lead">Data Source: http://www.awrestaurants.com/</p>
<table class="table">
<thead>
<tr>
<th>Name<

In [163]:
soup.find('tbody')

<tbody>
<tr>
<td>Original Bacon Double Cheeseburger</td>
<td>Burgers</td>
<td>760</td>
<td>45</td>
<td>45</td>
</tr>
<tr>
<td>Coney (Chili) Dog</td>
<td>Entrees</td>
<td>340</td>
<td>20</td>
<td>26</td>
</tr>
<tr>
<td>Chili Fries</td>
<td>French Fries</td>
<td>370</td>
<td>15</td>
<td>49</td>
</tr>
<tr>
<td>Strawberry Milkshake (small)</td>
<td>Shakes</td>
<td>670</td>
<td>29</td>
<td>90</td>
</tr>
<tr>
<td>A&amp;W® Root Beer Freeze (large)</td>
<td>Shakes</td>
<td>820</td>
<td>18</td>
<td>150</td>
</tr>
<tr>
<td>Caramel Sundae</td>
<td>Desserts</td>
<td>340</td>
<td>9</td>
<td>57</td>
</tr>
<tr>
<td>Strawberry Banana Smoothee</td>
<td>Shakes</td>
<td>420</td>
<td>6</td>
<td>86</td>
</tr>
<tr>
<td>Chocolate Fudge Blendrrr</td>
<td>Desserts</td>
<td>1010</td>
<td>59</td>
<td>152</td>
</tr>
<tr>
<td>Strawberry Limeade</td>
<td>Drinks</td>
<td>420</td>
<td>0</td>
<td>105</td>
</tr>
<tr>
<td>Watermelon Slushee</td>
<td>Drinks</td>
<td>270</td>
<td>0</td>
<td>73</td>
</tr>
<tr>
<td>Lime Slu

In [164]:
food_all = soup.find_all('tr')

In [165]:
food_all

[<tr>
 <th>Name</th>
 <th>Category</th>
 <th>Calories</th>
 <th>Fat</th>
 <th>Carbs</th>
 </tr>, <tr>
 <td>Original Bacon Double Cheeseburger</td>
 <td>Burgers</td>
 <td>760</td>
 <td>45</td>
 <td>45</td>
 </tr>, <tr>
 <td>Coney (Chili) Dog</td>
 <td>Entrees</td>
 <td>340</td>
 <td>20</td>
 <td>26</td>
 </tr>, <tr>
 <td>Chili Fries</td>
 <td>French Fries</td>
 <td>370</td>
 <td>15</td>
 <td>49</td>
 </tr>, <tr>
 <td>Strawberry Milkshake (small)</td>
 <td>Shakes</td>
 <td>670</td>
 <td>29</td>
 <td>90</td>
 </tr>, <tr>
 <td>A&amp;W® Root Beer Freeze (large)</td>
 <td>Shakes</td>
 <td>820</td>
 <td>18</td>
 <td>150</td>
 </tr>, <tr>
 <td>Caramel Sundae</td>
 <td>Desserts</td>
 <td>340</td>
 <td>9</td>
 <td>57</td>
 </tr>, <tr>
 <td>Strawberry Banana Smoothee</td>
 <td>Shakes</td>
 <td>420</td>
 <td>6</td>
 <td>86</td>
 </tr>, <tr>
 <td>Chocolate Fudge Blendrrr</td>
 <td>Desserts</td>
 <td>1010</td>
 <td>59</td>
 <td>152</td>
 </tr>, <tr>
 <td>Strawberry Limeade</td>
 <td>Drinks</td>
 <td

In [234]:
def get_food(restaurants):
    foods_list = []
    for i in (restaurants):
        BASE_url = 'https://pages.git.generalassemb.ly/rldaggie/for-scraping/'
        restaurant = i['href']
        url = BASE_url + restaurant
        res = requests.get(url)
        soup = BeautifulSoup(res.content, 'lxml')
        food = soup.find('tbody')
        food_tr = food.find_all('tr')
        food1 = []
        food2 = []
        #foods = []
        for i in range(len(food_tr)):
            food_dict = {}
            food1.append(food_tr[i].text.split('\n'))
            food2.append([food.strip() for food in food1[i] if food.strip() != ''])
            for f in food2:
                food_dict['name'] = f[0]
                food_dict['calories'] = f[2]
                food_dict['carbs'] = f[4]
                food_dict['fat'] = f[3]
                food_dict['category'] = f[1]
            foods_list.append(food_dict)
    return foods_list

In [235]:
food_list = get_food(restaurants)

In [236]:
food_df = pd.DataFrame(food_list)

In [237]:
food_df

Unnamed: 0,name,calories,carbs,fat,category
0,Original Bacon Double Cheeseburger,760,45,45,Burgers
1,Coney (Chili) Dog,340,26,20,Entrees
2,Chili Fries,370,49,15,French Fries
3,Strawberry Milkshake (small),670,90,29,Shakes
4,A&W® Root Beer Freeze (large),820,150,18,Shakes
...,...,...,...,...,...
5126,Jr. Original Chocolate Frosty™,200,32,5,Shakes
5127,Grilled Chicken Go Wrap,260,25,10,Wraps
5128,Asiago Ranch Chicken Club,670,57,32,Sandwiches
5129,Spicy Chicken Go Wrap,330,30,16,Wraps


### Step 5: Export to csv

**Note:** Don't export the index column from your DataFrame

In [219]:
food_df.to_csv('./foodlist.csv',index = False)