In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import numpy as np
import json 
urls=["https://www.yelp.com/menu/five-guys-north-bergen-2","https://www.yelp.com/menu/mcdonalds-new-york-411","https://www.yelp.com/menu/starbucks-fort-lee-2"]

In [2]:
def TopRest_Menu(urls,name):#accepts an URL parameter.
    page = requests.get(urls) 
    soup = BeautifulSoup(page.content, 'html.parser') #parser HTML
    right_table = soup.find('div', {'class':'menu-sections'}) #finds div tags with menu-sections class.
    rows=right_table.findAll('h4') #Finds h4 tags
    for row in rows: #iterates a loop to find <a> tags inside <h4>
        name.append(str((row.get_text()).strip()).replace("/","")) #appends the foods name into L1 list.
    return name

In [3]:
def Menu_Price(urls,price):
    page = requests.get(urls) 
    soup = BeautifulSoup(page.content, 'html.parser') #parser HTML
    right_table = soup.find('div', {'class':'menu-sections'}) #finds div tags with menu-sections class.
    rows=right_table.findAll('ul') #Finds <ul> tags
    for row in rows: #iterates a loop inside <ul> tags
        price.append(str(row.get_text()).strip()) #appends the price list by removing tags but take the string text
    return price  

In [4]:
df1=pd.DataFrame()
name_list=[] #foods name list
price_list=[]#price list
for i in urls:
    name_list=TopRest_Menu(i,name_list) #calls TopRest_Menu function to scrape and append the names into name_list.
    price_list=Menu_Price(i,price_list)
df1['Food Name']=name_list #adds menu name into df1
df1['Price List']=price_list # Please note that Starbucks do not have their price on YELP menu.
df1

Unnamed: 0,Food Name,Price List
0,Coke Bottle,$3.59
1,Diet Coke Bottle,$3.59
2,Sprite Bottle,$3.59
3,Hamburger,$8.75
4,Cheeseburger,$9.59
5,Bacon Burger,$9.95
6,Bacon Cheeseburger,$10.79
7,Little Hamburger,$6.35
8,Little Cheeseburger,$7.19
9,Little Bacon Burger,$7.55


In [5]:
def API_Query(query_food):    
    api_id='Write your Api ID here' #API ID is required
    api_key='Write your Api Key' #API Key is required.
    #For post method. application/x-www-form-unlencoded is a URL encoded form. 
    #this content type describes form data that is sent in a single block in the HTTP message body.
    headers={'Content-Type': 'application/x-www-form-urlencoded','X-APP-ID':api_id,'X-APP-KEY':api_key,'x-remote-user-id':'0'}
    #The above "x-remote-user-id": 0 for development, 1 for commercial purposes according to this API
    url='https://trackapi.nutritionix.com/v2/natural/nutrients' #Nutritionix API url
    #data for post method to be used as a query.
    query = {'query':query_food}
    response = requests.request('POST',url, headers=headers,data=query)#Post method should be used otherwise error 400 will be displayed.
    return response.json()
item1=name_list[1]
query1 = API_Query(item1)#Call API_Query function for the first item in the list.
print(query1)

{'foods': [{'food_name': 'diet coke', 'brand_name': None, 'serving_qty': 1, 'serving_unit': 'bottle', 'serving_weight_grams': 474, 'nf_calories': 0, 'nf_total_fat': 0, 'nf_saturated_fat': 0, 'nf_cholesterol': 0, 'nf_sodium': 75.84, 'nf_total_carbohydrate': 0.47, 'nf_dietary_fiber': 0, 'nf_sugars': 0, 'nf_protein': 0, 'nf_potassium': 18.96, 'nf_p': 52.14, 'full_nutrients': [{'attr_id': 203, 'value': 0}, {'attr_id': 204, 'value': 0}, {'attr_id': 205, 'value': 0.474}, {'attr_id': 207, 'value': 0.474}, {'attr_id': 208, 'value': 0}, {'attr_id': 221, 'value': 0}, {'attr_id': 255, 'value': 473.052}, {'attr_id': 262, 'value': 52.14}, {'attr_id': 263, 'value': 0}, {'attr_id': 268, 'value': 0}, {'attr_id': 269, 'value': 0}, {'attr_id': 291, 'value': 0}, {'attr_id': 301, 'value': 18.96}, {'attr_id': 303, 'value': 0.0948}, {'attr_id': 304, 'value': 4.74}, {'attr_id': 305, 'value': 52.14}, {'attr_id': 306, 'value': 18.96}, {'attr_id': 307, 'value': 75.84}, {'attr_id': 309, 'value': 0.1422}, {'attr_

In [10]:
#Defines API_Analysis function to receive a foods list parameter to execute.
def Api_Analysis(foods,price):
    #Column List. Rows (values) will be stored in the data frame after API calls and appends the webscraped values
    food_name = [] 
    price_list = []
    weight=[] 
    calories =  [] 
    carbohydrate=[] 
    protein=[]
    fat=[]
    saturated_fat=[]
    fiber=[]
    cholesterol=[]
    sodium=[]
    sugars=[]
    grams_per_calories=[] #new list to append the calculation result from calories/weight
    #List of Foods that will be iterated to query in the API call functions. 
    for i in foods: #iterates a loop in the food list.
        data = API_Query(i)   #Call the API function
        food_name.append(data['foods'][0]['tags']['item']) #append food name
        weight.append(data["foods"][0]['serving_weight_grams']) #append the weight of the food webscraped.
        calories.append(data["foods"][0]['nf_calories'])       #append calories
        carbohydrate.append(data['foods'][0]['nf_total_carbohydrate']) #append carbohydrate
        protein.append(data['foods'][0]['nf_protein']) # append protein
        fat.append(data['foods'][0]['nf_total_fat']) # append fat
        saturated_fat.append(data['foods'][0]['nf_saturated_fat']) #append saturated fat
        fiber.append(data['foods'][0]['nf_dietary_fiber']) #append dietary fiber
        cholesterol.append(data['foods'][0]['nf_cholesterol']) #append cholesterol
        sodium.append(data['foods'][0]['nf_sodium']) #append sodium
        sugars.append(data['foods'][0]['nf_sugars']) #append sugars
        grams_per_calories.append(float(data['foods'][0]['nf_calories'])/float(data['foods'][0]['serving_weight_grams']))#Calculates calories per gram
        grams_per_calories=list(np.around(np.array(grams_per_calories),2)) #Rounds the list to two decimal points
    for j in price:
        price_list.append(j)
    df=pd.DataFrame()
    #set data frame columns name
    df['Food Name'] = food_name
    df['Price'] = price_list
    df['Serving Size (grams)'] = weight
    df['Calories'] = calories
    df['Carbohydrate'] = carbohydrate
    df['Protein'] = protein
    df['Total Fat'] = fat
    df['Saturated Fat'] = saturated_fat
    df['Fiber'] = fiber
    df['Cholesterol'] = cholesterol
    df['Sodium'] = sodium
    df['Sugar'] = sugars
    df['Grams Per Calories'] = grams_per_calories
    return df

In [11]:
df2=pd.DataFrame()#df2_a and df2_b will be appended into df2.
df2_a=pd.DataFrame()#Five Guy's Menu
df2_b=pd.DataFrame()#Mc Donald's Menu
df2_c=pd.DataFrame()#Starbucks Menu
#Daily API Usage limit is 200.
df2_a=Api_Analysis(name_list[:20],price_list[:20]) #Call API_Analysis function. Because of API usage limit, from 0 to 19
df2_b=Api_Analysis(name_list[379:396],price_list[379:396]) #Call API_Analysis function. range from 379 - 395
df2_c=Api_Analysis(name_list[510:529],price_list[510:529])#Call the function 510-528

In [12]:
df2 = df2_a.append(df2_b,ignore_index=True)#appends df2_a+df2_b
df2 = df2_a.append(df2_c,ignore_index=True)#appends df2_c and sets as a df2
df2

Unnamed: 0,Food Name,Price,Serving Size (grams),Calories,Carbohydrate,Protein,Total Fat,Saturated Fat,Fiber,Cholesterol,Sodium,Sugar,Grams Per Calories
0,coca cola,$3.59,431.0,181.02,44.65,0.0,1.08,0.0,0.0,0.0,12.93,42.84,0.42
1,diet coke,$3.59,474.0,0.0,0.47,0.0,0.0,0.0,0.0,0.0,75.84,0.0,0.0
2,sprite soda,$3.59,492.0,201.72,51.27,0.44,0.0,0.0,0.0,0.0,49.2,51.07,0.41
3,hamburger,$8.75,226.0,540.14,40.27,34.28,26.56,10.52,,122.04,791.0,,2.39
4,cheeseburger,$9.59,199.0,535.31,39.24,30.27,28.66,14.0,2.39,95.52,1176.09,7.16,2.69
5,bacon cheeseburger,$9.95,211.0,595.02,39.86,33.3,33.46,12.93,2.74,105.5,1422.14,9.77,2.82
6,bacon cheeseburger,$10.79,211.0,595.02,39.86,33.3,33.46,12.93,2.74,105.5,1422.14,9.77,2.82
7,hamburger,$6.35,226.0,540.14,40.27,34.28,26.56,10.52,,122.04,791.0,,2.39
8,cheeseburger,$7.19,199.0,535.31,39.24,30.27,28.66,14.0,2.39,95.52,1176.09,7.16,2.69
9,bacon cheeseburger,$7.55,211.0,595.02,39.86,33.3,33.46,12.93,2.74,105.5,1422.14,9.77,2.82


In [16]:
df3 = pd.DataFrame()#Creates df3 to contain df1 and df2
df3=pd.merge(df1, df2, on='Food Name', how='right')#right inner join since df2 has nutrition facts into df3
df3=df3.drop_duplicates(subset=['Food Name']) #Drops duplicated food name since there were identical menu on Five Guys, McDonald's and Starbucks
df3=df3.drop('Price List',axis=1)#Drops the price list column.
df3

Unnamed: 0,Food Name,Price,Serving Size (grams),Calories,Carbohydrate,Protein,Total Fat,Saturated Fat,Fiber,Cholesterol,Sodium,Sugar,Grams Per Calories
0,coca cola,$3.59,431.0,181.02,44.65,0.0,1.08,0.0,0.0,0.0,12.93,42.84,0.42
1,diet coke,$3.59,474.0,0.0,0.47,0.0,0.0,0.0,0.0,0.0,75.84,0.0,0.0
2,sprite soda,$3.59,492.0,201.72,51.27,0.44,0.0,0.0,0.0,0.0,49.2,51.07,0.41
3,hamburger,$8.75,226.0,540.14,40.27,34.28,26.56,10.52,,122.04,791.0,,2.39
5,cheeseburger,$9.59,199.0,535.31,39.24,30.27,28.66,14.0,2.39,95.52,1176.09,7.16,2.69
7,bacon cheeseburger,$9.95,211.0,595.02,39.86,33.3,33.46,12.93,2.74,105.5,1422.14,9.77,2.82
11,hot dog,$5.99,48.0,154.56,1.28,5.61,14.09,5.56,0.0,27.84,408.96,0.61,3.22
12,cheese dog,$6.83,163.0,409.26,31.64,14.92,24.5,9.79,1.25,46.11,1187.48,4.66,2.51
13,bacon,$7.19,34.5,161.46,0.59,11.7,12.11,4.13,0.0,34.16,580.98,0.0,4.68
15,grilled cheese,$5.99,106.19,365.76,28.41,11.74,22.88,13.24,1.35,63.26,886.01,5.44,3.44


In [17]:
df3.describe() #df3 analysis

Unnamed: 0,Serving Size (grams),Calories,Carbohydrate,Protein,Total Fat,Saturated Fat,Fiber,Cholesterol,Sodium,Sugar,Grams Per Calories
count,28.0,28.0,28.0,28.0,28.0,28.0,27.0,28.0,28.0,27.0,28.0
mean,272.679286,263.481786,26.713571,10.726786,12.843571,5.397857,1.237407,39.489286,381.714286,13.996296,1.615
std,159.209241,187.081477,17.082253,9.486347,13.318761,5.96582,1.734854,62.283411,417.042108,13.943335,1.361058
min,2.6,0.0,0.47,0.0,0.0,0.0,0.0,0.0,0.26,0.0,0.0
25%,139.975,150.8,14.9425,3.6025,0.925,0.1725,0.0,0.0,95.3775,2.135,0.435
50%,307.6,212.305,30.025,11.155,9.5,4.19,0.33,26.175,207.17,11.29,0.88
75%,425.6,376.635,39.9625,12.8525,23.285,8.14,1.615,48.39,538.86,20.325,2.835
max,492.0,595.02,51.27,34.28,44.36,25.45,6.79,306.58,1422.14,51.07,4.68


In [18]:
df3.to_csv('df3.csv') #saves as a csv file