In [26]:
import pandas as pd
import numpy as np
import json
from pandas.io.json import json_normalize
from geopy.geocoders import Nominatim
import requests

import matplotlib.cm as cm
import matplotlib.colors as colors
from bs4 import BeautifulSoup
import xml
import folium

print("All libs imported")

All libs imported


### Scrape Wikipedia page using BeautifulSoup and put data of interest in table_columns

In [15]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response= requests.get(url).text
soup= BeautifulSoup(response,'html.parser')
postcode_table= soup.find('table')
table_columns= postcode_table.find_all('td')

### Convert requied data into a DataFrame

In [46]:
postcode=[]
borough=[]
neighborhood=[]

for i in range(0,len(table_columns),3):
    postcode.append(table_columns[i].text.strip())
    borough.append(table_columns[i+1].text.strip())
    neighborhood.append(table_columns[i+2].text.strip())

df= pd.DataFrame(list(zip(postcode,borough,neighborhood)),columns=['Postcode','Borough','Neighborhood'])
df.shape

(287, 3)

### Data Cleaning and Wrangling

In [47]:
# remove not assigned boroughs
df['Borough'].replace('Not assigned',np.nan, inplace=True)
df.dropna(subset=['Borough'], inplace=True)
df.shape

(210, 3)

In [92]:
# Put neighborhoods in one row separated by comma
df= df.groupby(['Postcode','Borough'])['Neighborhood'].apply(' ,'.join).reset_index()
df.shape

(103, 3)

In [117]:
#just one not assigned neighborhood
df['Neighborhood'].replace('Not assigned','Queen\'s park',inplace=True)
df['Borough'].unique()

array(['Scarborough', 'North York', 'East York', 'East Toronto',
       'Central Toronto', 'Downtown Toronto', 'York', 'West Toronto',
       "Queen's Park", 'Mississauga', 'Etobicoke'], dtype=object)

In [100]:
df_geo = pd.read_csv('http://cocl.us/Geospatial_data')
df_geo.rename(columns={'Postal Code':'Postcode'}, inplace=True)
df_geo

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


### Tried this package but it was not working

In [None]:
import geocoder
# initialize your variable to None
lat_lng_coords = None

# loop until you get the coordinates
while(lat_lng_coords is None):
    g = geocoder.google('{}, Toronto, Ontario'.format(postcode))
    lat_lng_coords = g.latlng
    print('g={}', g)  

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]

In [162]:
dftoronto= pd.merge(df,df_geo, on='Postcode',how='inner')
dftoronto[dftoronto['Borough'].str.contains('Toronto')]

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West ,Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West ,India Bazaar",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
45,M4P,Central Toronto,Davisville North,43.712751,-79.390197
46,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
47,M4S,Central Toronto,Davisville,43.704324,-79.38879
48,M4T,Central Toronto,"Moore Park ,Summerhill East",43.689574,-79.38316
49,M4V,Central Toronto,"Deer Park ,Forest Hill SE ,Rathnelly ,South Hi...",43.686412,-79.400049


### PART 3 OF ASSIGNEMENT- EXPLORE AND CLUSTER

In [123]:
address='Toronto, Canada'

geolocater= Nominatim(user_agent="toronto-exp")
location= geolocater.geocode(address)
lat= location.latitude
lon= location.longitude
print('The geograpical coordinate of the City of Toronto are {}, {}.'.format(lat, lon))

The geograpical coordinate of the City of Toronto are 43.653963, -79.387207.


In [125]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[lat, lon], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(dftoronto['Latitude'], dftoronto['Longitude'], dftoronto['Borough'], dftoronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3199cc',
        fill_opacity=0.3,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [114]:
CLIENT_ID = 'shaded' # your Foursquare ID
CLIENT_SECRET = 'shaded' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: KRVXN4W21L5T5HRYMZTBBU2NVQ34QPCLSIYCLYPGFIQV55XC
CLIENT_SECRET:WFCCQH5VJGIPAFPVUR13U15TGBSRNAPKDX4LPV5YRS4M0FQZ


## Explore Toronto Neighborhoods

In [172]:
idx=68
dftoronto.loc[idx]

Postcode                                                      M5V
Borough                                          Downtown Toronto
Neighborhood    CN Tower ,Bathurst Quay ,Island airport ,Harbo...
Latitude                                                  43.6289
Longitude                                                -79.3944
Name: 68, dtype: object

In [173]:
limit=1000
radius=500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    dftoronto.loc[idx, 'Latitude'], 
    dftoronto.loc[idx, 'Longitude'], 
    radius, 
    limit)
response= requests.get(url).json()
#response

In [174]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [166]:
venues = response['response']['groups'][0]['items']    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.location.city','venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,city,name,categories,lat,lng
0,Toronto,Billy Bishop Toronto City Airport (YTZ) (Billy...,Airport,43.631541,-79.395868
1,Toronto,Porter Lounge,Airport Lounge,43.63068,-79.395756
2,Toronto,Toronto Harbour,Harbor / Marina,43.633045,-79.396484
3,Toronto,Billy Bishop Café,Airport Food Court,43.631132,-79.396139
4,Toronto,Air Canada Check-In Counter,Airport Terminal,43.631226,-79.395987
5,Toronto,Gate 8,Airport Gate,43.631536,-79.39457
6,Toronto,Crew Room,Airport Lounge,43.63136,-79.396107
7,Toronto,Market@416,Bar,43.631653,-79.39451
8,Toronto,Want Passport,Boutique,43.631483,-79.396077
9,Toronto,Porter Airlines Check-In Counter,Airport Service,43.631683,-79.395454


In [177]:
#put them on a map
# create map of Toronto using latitude and longitude values
map_venues = folium.Map(location=[dftoronto.loc[idx,'Latitude'], dftoronto.loc[idx,'Longitude']], zoom_start=15)

# add markers to map
for lat, lng, name, cat in zip(nearby_venues['lat'], nearby_venues['lng'], nearby_venues['name'],nearby_venues['categories']):
    label = '{}, {}'.format(name, cat)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3199cc',
        fill_opacity=0.3,
        parse_html=False).add_to(map_venues)  
    
map_venues