In [95]:
import pandas as pd 
import numpy as np
import random 
import matplotlib.pyplot as plt

In [4]:
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
# import k-means from clustering stage
from sklearn.cluster import KMeans
# for webscraping import Beautiful Soup 
from bs4 import BeautifulSoup
import xml
!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library
print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


In [96]:
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup=BeautifulSoup(url)

In [97]:
table=soup.find("table")

Import the table from Web Site, Store them into output_rows

In [98]:
output_rows = []
for table_row in table.findAll('tr'):
    columns = table_row.findAll('td')
    output_row = []
    if len(columns)>0:
        for column in columns:
            output_row.append(column.text.rstrip('\n'))
    output_rows.append(output_row)

Save the output into a data frame, but remove the first row where all values are none, df_final is the data set to be used

In [99]:
df = pd.DataFrame(output_rows) 
df.columns=['Postcode', 'Borough', 'Neighbourhood']
df_final=df[~df['Postcode'].isnull()]

In [100]:
df_final.head(5)

Unnamed: 0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront


Drop the rows with Borogh value "Not assigned"

In [101]:
df_dropna = df_final[df_final.Borough != "Not assigned"].reset_index(drop=True)
df_dropna.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor


Group neighborhood by postcode and borough

In [110]:
df_group= df_dropna.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(', '.join).reset_index()
df_group.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [109]:
df_group.shape

(103, 3)

In [113]:
df_group.Neighbourhood.unique()

array(['Rouge, Malvern', 'Highland Creek, Rouge Hill, Port Union',
       'Guildwood, Morningside, West Hill', 'Woburn', 'Cedarbrae',
       'Scarborough Village',
       'East Birchmount Park, Ionview, Kennedy Park',
       'Clairlea, Golden Mile, Oakridge',
       'Cliffcrest, Cliffside, Scarborough Village West',
       'Birch Cliff, Cliffside West',
       'Dorset Park, Scarborough Town Centre, Wexford Heights',
       'Maryvale, Wexford', 'Agincourt',
       "Clarks Corners, Sullivan, Tam O'Shanter",
       "Agincourt North, L'Amoreaux East, Milliken, Steeles East",
       "L'Amoreaux West", 'Upper Rouge', 'Hillcrest Village',
       'Fairview, Henry Farm, Oriole', 'Bayview Village',
       'Silver Hills, York Mills', 'Newtonbrook, Willowdale',
       'Willowdale South', 'York Mills West', 'Willowdale West',
       'Parkwoods', 'Don Mills North', 'Flemingdon Park, Don Mills South',
       'Bathurst Manor, Downsview North, Wilson Heights',
       'Northwood Park, York University', 

In [116]:
for index, row in df_group.iterrows():
    if row['Neighbourhood']=='Not assigned':
        row['Neighbourhood']=row['Borough']
df_group.head()   

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


Dimension of the data set

In [118]:
df_group.shape

(103, 3)

Get the latitude and longitude information

In [119]:
Geo_Dat=pd.read_csv("http://cocl.us/Geospatial_data")
Geo_Dat.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [120]:
Geo_Dat.columns=['Postcode', 'Latitude','Longitude']
Geo_Dat.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Merge the two data sets

In [121]:
df_geo = pd.merge(df_dropna, Geo_Dat, on=['Postcode'], how='inner')
df_geo.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,Lawrence Heights,43.718518,-79.464763
4,M6A,North York,Lawrence Manor,43.718518,-79.464763


Get the longitude and latitude of Toronto

In [122]:
address='Toronto, Canada'
geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

  


43.653963 -79.387207


Draw the map - Figure 

In [134]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=15) # generate map centred around Ecco

# add Ecco as a red circle mark
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    popup='Ecco',
    fill=True,
    color='red',
    fill_color='red',
    fill_opacity=0.6
    ).add_to(venues_map)

# add popular spots to the map as blue circle markers
for lat, lng, borough, neighborhood in zip(df_geo['Latitude'], df_geo['Longitude'], df_geo['Borough'], 
                                           df_geo['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.6,
        parse_html=False).add_to(venues_map)  
# display map

venues_map