### Import modules

In [158]:
import io
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

### Download wikipedia 

In [159]:
wikipedia_page = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
file = requests.get(wikipedia_page).text



### Parsing the data

In [160]:
soup = BeautifulSoup(file, 'html.parser')
table = soup.find_all('table', class_='sortable')
print ('Found {} table(s) in the extracted wiki-html.'.format( len (table ) ) )

Found 1 table(s) in the extracted wiki-html.


### Changing to dataframe

In [161]:

table_headings = table[0].find_all('th') 
headings = [th.text.strip() for th in table_headings]
headings

['Postcode', 'Borough', 'Neighbourhood']

In [162]:
headings[0] =  'Postalcode'

headings

['Postalcode', 'Borough', 'Neighbourhood']

In [163]:
table_rows = []

for tr in table[0].find_all('tr'):
    rows = tr.find_all('td')
    if not rows: 
        continue
    row = [td.text.strip() for td in rows[:3]]
    table_rows.append(row)
table_rows

[['M1A', 'Not assigned', 'Not assigned'],
 ['M2A', 'Not assigned', 'Not assigned'],
 ['M3A', 'North York', 'Parkwoods'],
 ['M4A', 'North York', 'Victoria Village'],
 ['M5A', 'Downtown Toronto', 'Harbourfront'],
 ['M5A', 'Downtown Toronto', 'Regent Park'],
 ['M6A', 'North York', 'Lawrence Heights'],
 ['M6A', 'North York', 'Lawrence Manor'],
 ['M7A', "Queen's Park", 'Not assigned'],
 ['M8A', 'Not assigned', 'Not assigned'],
 ['M9A', 'Etobicoke', 'Islington Avenue'],
 ['M1B', 'Scarborough', 'Rouge'],
 ['M1B', 'Scarborough', 'Malvern'],
 ['M2B', 'Not assigned', 'Not assigned'],
 ['M3B', 'North York', 'Don Mills North'],
 ['M4B', 'East York', 'Woodbine Gardens'],
 ['M4B', 'East York', 'Parkview Hill'],
 ['M5B', 'Downtown Toronto', 'Ryerson'],
 ['M5B', 'Downtown Toronto', 'Garden District'],
 ['M6B', 'North York', 'Glencairn'],
 ['M7B', 'Not assigned', 'Not assigned'],
 ['M8B', 'Not assigned', 'Not assigned'],
 ['M9B', 'Etobicoke', 'Cloverdale'],
 ['M9B', 'Etobicoke', 'Islington'],
 ['M9B', 

In [164]:
Postal_Codes_Canada = pd.DataFrame(table_rows, columns=headings)
Postal_Codes_Canada.head()

Unnamed: 0,Postalcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Getting rid of "Not assigned" values

In [165]:
Postal_Codes_Canada.replace(to_replace='Not assigned', value=np.NaN, inplace=True)
Postal_Codes_Canada.dropna(axis=0, subset=['Borough'], inplace=True)
Postal_Codes_Canada.reset_index(drop = True, inplace = True)

In [166]:
Postal_Codes_Canada.head()

Unnamed: 0,Postalcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


In [167]:
s = Postal_Codes_Canada['Neighbourhood'].isna()
print (Postal_Codes_Canada['Borough'].loc[s])
Postal_Codes_Canada['Neighbourhood'].loc[s] = Postal_Codes_Canada['Borough'].loc[s]

6    Queen's Park
Name: Borough, dtype: object


In [168]:
groupby = Postal_Codes_Canada.groupby(['Postalcode','Borough'])

rows = []

for n,g in groupby:
    r = [ n[0], n[1], (', '.join(g['Neighbourhood']) )]
    rows.append(r)

Postal_Codes_Canada = pd.DataFrame(rows, columns=headings)
Postal_Codes_Canada.shape

(103, 3)

### geographical coordinates of the neighborhoods

In [169]:
f =requests.get("https://cocl.us/Geospatial_data/Geospatial_Coordinates.csv").content
df=pd.read_csv(io.StringIO(f.decode('utf-8')))

In [170]:
df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [171]:
df.columns = ['Postalcode','Latitude','Longitude']

In [172]:
geo_toronto = pd.merge(Postal_Codes_Canada, df, on='Postalcode')

In [173]:
geo_toronto.head()

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Creating Map

In [139]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /anaconda3

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    r-plyr-1.8.4               |   r351h9d2a408_2         836 KB  conda-forge
    freetype-2.8.1             |       hfa320df_1         830 KB  conda-forge
    pango-1.40.14              |                0         520 KB  conda-forge
    r-cvst-0.2_2               |   r351h6115d3f_0         100 KB  conda-forge
    r-munsell-0.5.0            |   r351h6115d3f_1         243 KB  conda-forge
    r-repr-0.15.0              |   r351hf348343_0         105 KB
    r-dimred-0.1.0             |   r351h6115d3f_2         620 KB  conda-forge
    xorg-libxrender-0.9.10     |       h470a237_2          24 KB  conda-forge
    r-modelr-0.1.2             |   r351h6115d3f_1         211 KB  conda-forge
    xorg-libx11-1.6.6          |       h470a


The following NEW packages will be INSTALLED:

    geographiclib:     1.49-py_0                conda-forge
    geopy:             1.17.0-py_0              conda-forge
    libxcb:            1.13-h470a237_2          conda-forge
    prometheus_client: 0.4.1-py_0               conda-forge
    pthread-stubs:     0.4-h470a237_1           conda-forge
    r-abind:           1.4_5-r351h6115d3f_0     conda-forge
    r-fansi:           0.3.0-r351hc070d10_0     conda-forge
    r-geometry:        0.3_6-r351hc070d10_2     conda-forge
    r-later:           0.7.3-r351h9d2a408_0     conda-forge
    r-magic:           1.5_8-r351h6115d3f_0     conda-forge
    r-pls:             2.7_0-r351h6115d3f_0     conda-forge
    r-prettyunits:     1.0.2-r351h6115d3f_1     conda-forge
    r-processx:        3.2.0-r351hc070d10_1     conda-forge
    r-promises:        1.0.1-r351h9d2a408_0     conda-forge
    r-ps:              1.1.0-r351hc070d10_1     conda-forge
    r-squarem:         2017.10_1-r

r-plyr-1.8.4         | 836 KB    | ##################################### | 100% 
freetype-2.8.1       | 830 KB    | ##################################### | 100% 
pango-1.40.14        | 520 KB    | ##################################### | 100% 
r-cvst-0.2_2         | 100 KB    | ##################################### | 100% 
r-munsell-0.5.0      | 243 KB    | ##################################### | 100% 
r-repr-0.15.0        | 105 KB    | ##################################### | 100% 
r-dimred-0.1.0       | 620 KB    | ##################################### | 100% 
xorg-libxrender-0.9. | 24 KB     | ##################################### | 100% 
r-modelr-0.1.2       | 211 KB    | ##################################### | 100% 
xorg-libx11-1.6.6    | 841 KB    | ##################################### | 100% 
r-rcurl-1.95_4.11    | 861 KB    | ##################################### | 100% 
r-tibble-1.4.2       | 227 KB    | ##################################### | 100% 
r-caret-6.0_80       | 5.9 M

r-forcats-0.3.0      | 222 KB    | ##################################### | 100% 
r-scales-1.0.0       | 571 KB    | ##################################### | 100% 
r-httpuv-1.4.5       | 491 KB    | ##################################### | 100% 
r-boot-1.3_20        | 624 KB    | ##################################### | 100% 
r-profvis-0.3.5      | 164 KB    | ##################################### | 100% 
r-bit64-0.9_7        | 453 KB    | ##################################### | 100% 
r-modelmetrics-1.1.0 | 127 KB    | ##################################### | 100% 
r-odbc-1.1.5         | 431 KB    | ##################################### | 100% 
r-numderiv-2016.8_1  | 123 KB    | ##################################### | 100% 
r-ttr-0.23_3         | 505 KB    | ##################################### | 100% 
r-miniui-0.1.1.1     | 50 KB     | ##################################### | 100% 
r-blob-1.1.1         | 27 KB     | ##################################### | 100% 
xorg-libxext-1.3.3   | 42 KB

In [177]:
import folium

In [178]:
address = 'Toronto'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))



The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [189]:
Toronto = geo_toronto[geo_toronto['Borough'].str.contains("Toronto", case=False)].reset_index(drop=True)


print(Toronto.shape)
Toronto.head()

(38, 5)


Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [190]:
from geopy.geocoders import Nominatim 

location = Nominatim().geocode('Toronto, Canada') 



In [192]:
import folium

map_toronto = folium.Map(location=[location.latitude, location.longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(Toronto['Latitude'], Toronto['Longitude'], Toronto['Neighbourhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto) 
map_toronto
