In [19]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib imports
import matplotlib.cm as cm
import matplotlib.colors as colors

import folium # map rendering library
from bs4 import BeautifulSoup
print("imports done")

imports done


In [20]:
# Set the display options 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


In [29]:
# The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood
column_names = ['PostalCode', 'Borough', 'Neighborhood', 'Latitude', 'Longitude'] 
toronto_neighborhoods = pd.DataFrame(columns=column_names)

# Fetch Wikipedia page details and use BeautifulSoup library to fetch Wikitable class details
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

soup = BeautifulSoup(website_url,'lxml')
My_table = soup.find('table',{'class':'wikitable sortable'})
toronto_neighborhoods.drop(toronto_neighborhoods.index, inplace=True)
num = 0 

# Loop through each wikitable row and fetch individual columns values to build the dataframe values using conditions specified in assignment
for row in My_table.findAll("tr"):
    cells = row.findAll("td")
    # Assign each column/cell of a given row to a variable 
    if len(cells) == 3:
        pc = cells[0].find(text=True)
        bo = cells[1].find(text=True)
        nb = cells[2].find(text=True)
        # Replace new line character with blank
        nb = nb.replace("\n","")
        
        # If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
        # if Neighborhood contains value of 'Not assigned', Assign value in Borough to Neighborhood 
        if nb.find('Not assigned',0,13) != -1:
            nb = bo
        
        # Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
        if bo != 'Not assigned':     
            #print( len(toronto_neighborhoods.Postal_code))
            # More than one neighborhood can exist in one postal code area and those rows has to be combined into one row with the neighborhoods separated with a comma.
            if num == 0:
                # First Successful Entry
                toronto_neighborhoods = toronto_neighborhoods.append({'PostalCode': pc, 'Borough': bo, 'Neighborhood': nb}, ignore_index=True)
                num += 1
            else:
                # If the Postal Code already exists in toronto neighborhood, add value to existing Neighborhood of Postal Code
                found = 0
                for index, row in toronto_neighborhoods.iterrows() :
                    if row['PostalCode'] == pc:
                        nb = row['Neighborhood'] + ',' + nb
                        found = 1
                        row['Neighborhood'] = nb
                         # If the Postal Code is not found, add new value
                if found == 0:    
                    toronto_neighborhoods = toronto_neighborhoods.append({'PostalCode': pc, 'Borough': bo, 'Neighborhood': nb}, ignore_index=True)
                    
# Fetch latitude and longitude stored in CSV from url
latlon = pd.read_csv('http://cocl.us/Geospatial_data')
for lindex, llrow in latlon.iterrows() :
    for dindex, dfrow in toronto_neighborhoods.iterrows() :
        if dfrow['PostalCode'] == llrow['Postal Code']:
            dfrow['Latitude'] = llrow['Latitude']
            dfrow['Longitude'] = llrow['Longitude']

# Display first few rows
toronto_neighborhoods.head()


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7533,-79.3297
1,M4A,North York,Victoria Village,43.7259,-79.3156
2,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.6543,-79.3606
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.7185,-79.4648
4,M7A,Queen's Park,Queen's Park,43.6623,-79.3895


In [22]:
toronto_neighborhoods.groupby('Borough').count()


Unnamed: 0_level_0,PostalCode,Neighborhood,Latitude,Longitude
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Central Toronto,9,9,9,9
Downtown Toronto,18,18,18,18
East Toronto,5,5,5,5
East York,5,5,5,5
Etobicoke,12,12,12,12
Mississauga,1,1,1,1
North York,24,24,24,24
Queen's Park,1,1,1,1
Scarborough,17,17,17,17
West Toronto,6,6,6,6


In [73]:
#Let us generate te map of toronto showin all the neihborhoods

address = 'Toronto, Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create neighborhood map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_neighborhoods['Latitude'], toronto_neighborhoods['Longitude'], toronto_neighborhoods['Borough'], toronto_neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto



In [85]:
#GET NEARBY VENUES
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
        
        #enter your foursquare credentials - mine hardcoded here
        CLIENT_ID = 'KLUWXXRAS1ZX5ETAZX53UUHFSJIK2EQ1YL0VCTLA1LYOOQ0J'
        CLIENT_SECRET = 'T33PBHQXNB4TMKKH3I5K5NPMTE1KEJOJTWJF0AN1IUGODG0R'
        VERSION='20181207'
        LIMIT=100   
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        # results = requests.get(url).json()["response"]['groups'][0]['items']
        results = requests.get(url)
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    print("Venues retrieved")
    return(nearby_venues)

In [None]:
#North York has 24 neihborhoods and Downtown seems saturated already so why not focus on NORTH YORK
# If anoy other area is needed, they can easily be changed here 

northyork_data = toronto_neighborhoods[toronto_neighborhoods['Borough'] == 'North York'].reset_index(drop=True)
# create map of North York using latitude and longitude values
latitude = northyork_data['Latitude'].tolist()
longitude = northyork_data['Longitude'].tolist()
neigh = northyork_data['Neighborhood'].tolist()
map_northyork = folium.Map(location=[latitude, longitude], zoom_start=11)
print('Mapped it')
# add markers to map
for lat, lng, label in zip(latitude, longitude, neigh):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_northyork)  
    
map_northyork

In [None]:
# Capture all Venues in North York Neighborhoodsnorthyork_data = toronto_neighborhoods[toronto_neighborhoods['Borough'] == 'North York'].reset_index(drop=True)
northyork_venues = getNearbyVenues(names=northyork_data['Neighborhood'],
                                   latitudes=latitude,
                                   longitudes=longitude
                                  )

print('There are {} uniques categories in North York.'.format(len(northyork_venues['Venue Category'].unique())))

In [None]:
#Now check which neigborhood in North York has the most Venues
northyork_venues.groupby('Neighborhood').count()