# Introduction

This capstone project will be used to determine the number of restaurants in given neighborhoods around Toronto and identify neighborhoods with little to no nearby restaurants. This data will be relevant to anyone interested in opening a restaurant in the Toronto area, particularly someone looking to open a smaller, less expensive restaurant. My reasoning for this is typically people who are looking to go to an expensive restaurant are usually willing to travel farther, in other words, proximity of a restaurant is not so much of an issue, but if someone were to open an inexpensive restaurant in a neighborhood that did not previously have one, locals would be inclined to go to this restaurant that would only be a short drive, or maybe even walking distance for a quick bite to eat. 

In [14]:
#Importing the necessary libraries
import pandas as pd
import numpy as np
import folium
import requests
import urllib.request
import time
from bs4 import BeautifulSoup
from pandas.io.json import json_normalize

In [2]:
#This is where we will get the data for Toronto neighborhoods
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = requests.get(url)

In [3]:
#Data is parsed from the html script....
soup = BeautifulSoup(response.text, 'html.parser')
wiki_page = soup.findAll('td')
for script in soup(["script", "style"]):
    script.extract()    # rip it out

text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
text = '\n'.join(chunk for chunk in chunks if chunk)

html_list = text.split('\n')[13:877]

In [4]:
#...and put into a data frame
nbhd_df = pd.DataFrame(columns=['Postcode','Borough','Neighbourhood'])
p_list = []
b_list = []
n_list = []

for x in range(0,288):
    p_list.append(html_list[x*3])
    b_list.append(html_list[x*3+1])
    n_list.append(html_list[x*3+2])
    
nbhd_df['Postcode'] = p_list
nbhd_df['Borough'] = b_list
nbhd_df['Neighbourhood'] = n_list

In [5]:
nbhd_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [6]:
#The 'not assigned' values are removed as these postal code will not be relevant to our purposes
nbhd_df = nbhd_df.loc[nbhd_df['Borough'] != 'Not assigned']
for x in range(len(nbhd_df)):
    if nbhd_df['Neighbourhood'].iloc[x] == 'Not assigned':
        nbhd_df['Neighbourhood'].iloc[x] = nbhd_df['Borough'].iloc[x]

In [7]:
nbhd_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [8]:
#Joins on unique Borough and Postcode combinations, concatenating the Neighbourhood names ', '
grouped_df = nbhd_df.groupby(['Postcode','Borough'],as_index=False).agg(lambda x : x.sum() if x.dtype=='float64' else ', '.join(x))

In [9]:
#Fill the dataframe with the correct LAT and LONG values
df_ll = pd.read_csv('http://cocl.us/Geospatial_data')
grouped_df['Latitude'] = ''
grouped_df['Longitude'] = ''
for x in range(len(df_ll)):
    temp_str = df_ll['Postal Code'].iloc[x]
    grouped_df.loc[grouped_df['Postcode'] == temp_str, 'Latitude'] = df_ll['Latitude'].iloc[x]
    grouped_df.loc[grouped_df['Postcode'] == temp_str, 'Longitude'] = df_ll['Longitude'].iloc[x]

In [10]:
grouped_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.8067,-79.1944
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.7845,-79.1605
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.7636,-79.1887
3,M1G,Scarborough,Woburn,43.771,-79.2169
4,M1H,Scarborough,Cedarbrae,43.7731,-79.2395


In [12]:
#Now we will use Foursquare to determine the total number of restaurants in each borough
grouped_df['Total Restaurants'] = ''

In [15]:
for x in range(len(grouped_df)):    
    latitude = grouped_df['Latitude'].iloc[x]
    longitude = grouped_df['Longitude'].iloc[x]
    CLIENT_ID = 'DLQABC0JMZ1PEVDHD1NOH3SBQGYRUXOFIFSJRQASKHY40G43' # your Foursquare ID
    CLIENT_SECRET = 'V33O5XGLJV1LIX4LJMHZ1ZURWZIIY3TNJ0F5US14OBJGDOVM' # your Foursquare Secret
    VERSION = '20180604'
    search_query = 'restaurant'
    radius = 500
    LIMIT = 100
    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
    results = requests.get(url).json()
    venues = results['response']
    dataframe = json_normalize(venues)
    if dataframe.totalResults is None:
        grouped_df['Total Restaurants'].iloc[x] = 0
    else:
        grouped_df['Total Restaurants'].iloc[x] = dataframe.totalResults.iloc[0]

In [16]:
grouped_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Total Restaurants
0,M1B,Scarborough,"Rouge, Malvern",43.8067,-79.1944,2
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.7845,-79.1605,0
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.7636,-79.1887,5
3,M1G,Scarborough,Woburn,43.771,-79.2169,3
4,M1H,Scarborough,Cedarbrae,43.7731,-79.2395,5


In [17]:
#Now we will generate a map of Toronto with the locations of the boroughs with less than 3 restaurants
few_restaurants_df = grouped_df.loc[grouped_df['Total Restaurants'] < 3]

map_toronto = folium.Map(location=[43.6532, -79.3832], zoom_start=10)

for lat, lng, borough, neighborhood in zip(few_restaurants_df['Latitude'], few_restaurants_df['Longitude'], few_restaurants_df['Borough'], few_restaurants_df['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto