In [15]:
#Installing required libraries

#!pip install lxml
#!conda install -c conda-forge geopy --yes
#!pip install BeautifulSoup4
#!pip install html5lib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json, lxml
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
# import k-means from clustering stage
from sklearn.cluster import KMeans
# import folium # map rendering library
from bs4 import BeautifulSoup
import warnings
warnings.filterwarnings('ignore')

In [16]:
#Import folium if its not installed then install it
try:
    import folium
except:
    !pip install folium
    import folium

In [19]:
#importing data

page = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M' 
source = requests.get(page).text
soup = BeautifulSoup(source)

table_data = soup.find('div', class_='mw-parser-output')
table = table_data.table.tbody

#declaring columns
columns = ['PostalCode', 'Borough', 'Neighbourhood']

#create a dictonary
data = dict({key:[]*len(columns) for key in columns})

for row in table.find_all('tr'):
    for i,column in zip(row.find_all('td'),columns):
        i = i.text
        i = i.replace('\n', '')
        data[column].append(i)

#converting data into a pandas dataframe
df_pc = pd.DataFrame.from_dict(data=data)[columns]

In [20]:
#cleaning dataframe
df_pc = df_pc[df_pc['Borough'] != 'Not assigned'].reset_index(drop = True)
print('After dropping rows where borough is "Not assigned", Shape is: ',df_pc.shape)
print('Number of rows where Neighbourhood is "Not assigned" but borough has value: ', 
      df_pc[df_pc['Neighbourhood'] == 'Not assigned'].shape[0])

After dropping rows where borough is "Not assigned", Shape is:  (103, 3)
Number of rows where Neighbourhood is "Not assigned" but borough has value:  0


In [21]:
p, b, n = [], [], []
for postcode, borough, neigh in zip(df_pc['PostalCode'], df_pc['Borough'], df_pc['Neighbourhood']):
    p.append(postcode)
    b.append(borough)
    if neigh == 'Not assigned':
        n.append(borough)
    else:
        n.append(neigh)

df_pc = pd.DataFrame({'PostalCode': p, 'Borough': b, 'Neighbourhood':n})[columns]
print(df_pc.shape)
df_pc.head()

(103, 3)


Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [22]:
postcodes = df_pc['PostalCode'].values
boroughs = df_pc['Borough'].values
neighs = df_pc['Neighbourhood'].values

#create a dictionary with keys as Postcode and Borough, keys of dictioaries are unique
dic = dict({(key1,key2): [] for key1, key2 in zip(postcodes, boroughs)})
print('Number of keys in the dictionary are: ', len(dic.keys()))

#filling the values of keys of dictionary
for postcode, borough, neigh in zip(postcodes,boroughs, neighs):
    key = (postcode, borough)
    dic[key].append(neigh)

df_pc = pd.DataFrame(columns = ['Postal Code', 'Borough', 'Neighbourhood'])
for key, value in dic.items():
    postcode, borough, neig = key[0], key[1], value
    neig = ', '.join(neig)
    df_pc = df_pc.append({'Postal Code': postcode,
                     'Borough': borough,
                     'Neighbourhood': neig}, ignore_index = True)
print('Shape of final data is: ', df.shape)
df_pc.head(10)

Number of keys in the dictionary are:  103
Shape of final data is:  (180, 3)


Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,Malvern / Rouge
7,M3B,North York,Don Mills
8,M4B,East York,Parkview Hill / Woodbine Gardens
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [23]:
# download the data of latitude and longitude: link provided by >>>>coursera
!wget http://cocl.us/Geospatial_data
try:
    df_pc['Latitude'] = lats
    df_pc['Longitude'] = lons
except:
    latlon = pd.read_csv('Geospatial_data')
    df_pc = pd.merge(df_pc, latlon, how= 'inner', on = 'Postal Code')
    
print(df.shape)
df_pc.head(10)

--2020-04-01 01:31:46--  http://cocl.us/Geospatial_data
Resolving cocl.us (cocl.us)... 158.85.108.83, 158.85.108.86, 169.48.113.194
Connecting to cocl.us (cocl.us)|158.85.108.83|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://cocl.us/Geospatial_data [following]
--2020-04-01 01:31:46--  https://cocl.us/Geospatial_data
Connecting to cocl.us (cocl.us)|158.85.108.83|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2020-04-01 01:31:47--  https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Resolving ibm.box.com (ibm.box.com)... 107.152.27.197, 107.152.26.197
Connecting to ibm.box.com (ibm.box.com)|107.152.27.197|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2020-04-01 01:31:47--  https://ib

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
3,M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,Parkview Hill / Woodbine Gardens,43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [24]:
address = 'North York,Toronto, Ontario'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of North York, Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of North York, Toronto are 43.7543263, -79.44911696639593.


In [25]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_pc['Latitude'], df_pc['Longitude'], df_pc['Borough'], df_pc['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [13]:
#Analysis of downtown toronto
print('Toatl number of Borough = ', len(df_pc['Borough'].unique()))
downtown_toronto = df_pc[df_pc['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
print(downtown_toronto.shape)
downtown_toronto.head()

Toatl number of Borough =  10
(19, 5)


Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
1,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


In [26]:
#creating a map of downtown toronto using folium library
address = 'Downtown Toronto ,Toronto, Ontario'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

# create map of New York using latitude and longitude values
map_dwontown = folium.Map(location=[latitude, longitude], zoom_start= 11)

# add markers to map
for lat, lng, borough, neighborhood in zip(downtown_toronto['Latitude'], downtown_toronto['Longitude'], 
                                           downtown_toronto['Borough'], downtown_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_dwontown)  
    
map_dwontown

In [27]:
#creating a data set of 100 locations around 1000mtrs from 43.7543263, -79.44911696639593

lat = downtown_toronto.loc[0, 'Latitude'] # neighborhood latitude value
lon = downtown_toronto.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = downtown_toronto.loc[0, 'Neighbourhood'] # neighborhood name
print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, lat, lon))

CLIENT_ID = 'CQXYHN5ZDQT51CZVUPB10EJKYPART22E05F1ZYSVQFBKUMCV' # your Foursquare ID
CLIENT_SECRET = 'WGCCDB5ZZSG133XGEBCTWVOK33QICEE0KMXDJRKPIEVYSNVI' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

LIMIT = 100
radius =1000
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, CLIENT_SECRET, VERSION, lat,lon, radius, LIMIT)

# gettig the venues data form Forsquare API in json format
results = requests.get(url).json()
results

Latitude and longitude values of Regent Park / Harbourfront are 43.6542599, -79.3606359.


{'meta': {'code': 200, 'requestId': '5e83ef640de0d90023d0dde9'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Corktown',
  'headerFullLocation': 'Corktown, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 113,
  'suggestedBounds': {'ne': {'lat': 43.66325990900001,
    'lng': -79.3482199002972},
   'sw': {'lat': 43.64525989099999, 'lng': -79.37305189970282}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '54ea41ad498e9a11e9e13308',
       'name': 'Roselle Desserts',
       'location': {'address': '362 King St E',
        'crossStreet': 'Trinity St',
        'lat': 43.653446723052674,
        'lng': -79.3620167174383,
        'labeledLatLngs': [{'label':