In [12]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

# !conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
# from sklearn.cluster import KMeans

# !conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [30]:
'''
SCN TORONTO NEIGHBOURHOOD
Web scrape html file and clean for analysis.
'''
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
wiki = pd.read_html(url, header=0)
df = pd.DataFrame(wiki[0])

# Clean up 'Not assigned' rows and values
df.loc[df.Neighbourhood == 'Not assigned', 'Neighbourhood'] = df['Borough']
dfc = df[~df['Borough'].isin(['Not assigned'])]

# Group by Postcode and join Neighbourhood values
dfs = dfc.groupby(['Postcode','Borough'])['Neighbourhood'].apply(lambda tags: ', '.join(tags))
dfs2 = pd.DataFrame(dfs)

toronto_neigh = dfs2.sort_values('Postcode')
toronto_neigh.reset_index(inplace=True)

print('Shape:   ', toronto_neigh.shape)
print('-'*80)
print(toronto_neigh.describe())
print('-'*80)
# print(toronto_neigh.head(20))
toronto_neigh.head(20)

Shape:    (103, 3)
--------------------------------------------------------------------------------
       Postcode     Borough                      Neighbourhood
count       103         103                                103
unique      103          11                                103
top         M7Y  North York  Guildwood, Morningside, West Hill
freq          1          24                                  1
--------------------------------------------------------------------------------


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [14]:
# Import geo code from web link provided in assignment

url_geo = 'http://cocl.us/Geospatial_data'
geodata = pd.read_csv(url_geo)
toronto_geo = pd.DataFrame(geodata)

toronto_geo.rename(columns ={'Postal Code':'Postcode'},inplace=True)

print('Shape:   ',toronto_geo.shape)
print('-'*80)
print(toronto_geo.head())

Shape:    (103, 3)
--------------------------------------------------------------------------------
  Postcode   Latitude  Longitude
0      M1B  43.806686 -79.194353
1      M1C  43.784535 -79.160497
2      M1E  43.763573 -79.188711
3      M1G  43.770992 -79.216917
4      M1H  43.773136 -79.239476


In [29]:
# merge toronto df and toronto_geo(lat / long) df

toronto = pd.merge(toronto_neigh, toronto_geo, how ='left', on='Postcode')
toronto.head(20)


# print('Shape:   ',toronto.shape)
# print('-'*80)
# print(toronto.head())


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


## Toronto Neighbourhoods

In [16]:
print('The dataframe has {} boroughs and {} neighborhoods.'
    .format(len(toronto['Borough'].unique()),toronto.shape[0]))

The dataframe has 11 boroughs and 103 neighborhoods.


In [17]:
address = 'Toronto'

geolocator = Nominatim(user_agent="agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('Geographical coordinates of {} are {}, {}.'.format(address, latitude, longitude))


Geographical coordinates of Toronto are 43.653963, -79.387207.


In [31]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, label in zip(toronto['Latitude'], 
                           toronto['Longitude'], 
                           toronto['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## West Toronto Neighborhoods

In [19]:
west_toronto = toronto[toronto['Borough']=='West Toronto']

print('DF Shape:  ',west_toronto.shape)
print('-'*80)
print(west_toronto.head())


DF Shape:   (6, 5)
--------------------------------------------------------------------------------
   Postcode       Borough                                 Neighbourhood  \
76      M6H  West Toronto                  Dovercourt Village, Dufferin   
77      M6J  West Toronto                      Little Portugal, Trinity   
78      M6K  West Toronto  Brockton, Exhibition Place, Parkdale Village   
82      M6P  West Toronto                 High Park, The Junction South   
83      M6R  West Toronto                        Parkdale, Roncesvalles   

     Latitude  Longitude  
76  43.669005 -79.442259  
77  43.647927 -79.419750  
78  43.636847 -79.428191  
82  43.661608 -79.464763  
83  43.648960 -79.456325  


In [20]:
print('The dataframe has {} boroughs and {} neighborhoods.'
    .format(len(west_toronto['Borough'].unique()),west_toronto.shape[0]))

The dataframe has 1 boroughs and 6 neighborhoods.


In [21]:
address = 'West Toronto'

geolocator = Nominatim(user_agent="agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('Geographical coordinates of {} are {}, {}.'.format(address, latitude, longitude))


Geographical coordinates of West Toronto are 43.653963, -79.387207.


In [22]:
# create map of Toronto using latitude and longitude values
map_west_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, label in zip(west_toronto['Latitude'], 
                           west_toronto['Longitude'], 
                           west_toronto['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_west_toronto)  
    
map_west_toronto

## All Neighbourhoods With Name Toronto In It

In [23]:
name_toronto = toronto[toronto['Borough'].str.contains('Toronto')]

print('DF Shape:  ',name_toronto.shape)
print('-'*80)
print(name_toronto.head())


DF Shape:   (38, 5)
--------------------------------------------------------------------------------
   Postcode          Borough                   Neighbourhood   Latitude  \
37      M4E     East Toronto                     The Beaches  43.676357   
41      M4K     East Toronto    The Danforth West, Riverdale  43.679557   
42      M4L     East Toronto  The Beaches West, India Bazaar  43.668999   
43      M4M     East Toronto                 Studio District  43.659526   
44      M4N  Central Toronto                   Lawrence Park  43.728020   

    Longitude  
37 -79.293031  
41 -79.352188  
42 -79.315572  
43 -79.340923  
44 -79.388790  


In [24]:
print('The dataframe has {} boroughs and {} neighborhoods.'
    .format(len(name_toronto['Borough'].unique()),name_toronto.shape[0]))

The dataframe has 4 boroughs and 38 neighborhoods.


In [25]:
address = 'Toronto'

geolocator = Nominatim(user_agent="agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('Geographical coordinates of {} are {}, {}.'.format(address, latitude, longitude))


Geographical coordinates of Toronto are 43.653963, -79.387207.


In [27]:
# create map of Toronto using latitude and longitude values
map_name_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(name_toronto['Latitude'], 
                           name_toronto['Longitude'], 
                           name_toronto['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_name_toronto)  
    
map_name_toronto

## Notes:

I'm looking at the entire Toronto neighborhoods with zipcode starting with M. Then I'm also looking at the clustering of couplf of subset neighborhoods within Toronto. Statistics on number of boroughs and neighborhoods are summarized below. Maps of those area are shown above. There seems to be a small concentration of neighborhoods near the Toronto Union Station, near the port.

A) All neighborhood in Toronto: There are 11 boroughs and 103 neighborhoods.

B) West Toronto neighborhood:  There are 1 borough and 6 neighborhoods.

C) All neighborhoods with name Toronto in it:  There are 4 boroughs and 38 neighborhoods.