## Assignment Part 1

In [1]:
import pandas as pd

__Read in table__

In [2]:
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


__Drop rows were Borough is "Not assigned"__

In [3]:
df_drop = df[df.Borough != 'Not assigned'].reset_index(drop=True)
df_drop

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


__Group the Neighbourhoods by Boroughs__

In [4]:
df_group = df_drop.groupby(['Postal Code','Borough'], as_index=False).agg(lambda x: ','.join(x))
df_group

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


__If any neighbourhood row is "Not assigned", replace it with its Borough. Check that no "Not assigned" remain__

In [5]:
mask = df_group['Neighbourhood'] == "Not assigned"
df_group.loc[mask, 'Neighbourhood'] = df_group.loc[mask, 'Borough']
df_group
b = 'Not assigned'
if b in df.Neighbourhood:
    print('Not Assigned present')
else:
    print('Not Assigned is not present')

Not Assigned is not present


__Print the number of rows and columns__

In [6]:
df.shape

(180, 3)

## Assignment Part 2

__Install geocoder__

In [7]:
!pip install geocoder



__Run geocoder to find coordinates__

In [None]:
import geocoder # import geocoder
postal_code = df_group['Postal Code']

# Using while Statement
latitude = []
longitude = []
n = 0

while n < len(postal_code):
    g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code[n]))
    lat_lng_coords = g.latlng
    print('The geograpical coordinate of {} are {}, {}.'.format(postal_code[n], lat_lng_coords[0], lat_lng_coords[1]))
    latitude.append(lat_lng_coords[0])
    longitude.append(lat_lng_coords[1])
    n = n + 1

df_group['Latitude'] = latitude
df_group['Longitude'] = longitude

The geograpical coordinate of M1B are 43.811390000000074, -79.19661999999994.
The geograpical coordinate of M1C are 43.78574000000003, -79.15874999999994.
The geograpical coordinate of M1E are 43.765750000000025, -79.17469999999997.
The geograpical coordinate of M1G are 43.76812000000007, -79.21760999999998.
The geograpical coordinate of M1H are 43.76944000000003, -79.23891999999995.
The geograpical coordinate of M1J are 43.74446000000006, -79.23116999999996.
The geograpical coordinate of M1K are 43.725820000000056, -79.26460999999995.
The geograpical coordinate of M1L are 43.71289000000007, -79.28505999999999.
The geograpical coordinate of M1M are 43.72360000000003, -79.23495999999994.
The geograpical coordinate of M1N are 43.695100000000025, -79.26465999999994.
The geograpical coordinate of M1P are 43.75998000000004, -79.26939999999996.
The geograpical coordinate of M1R are 43.75075000000004, -79.30053999999996.
The geograpical coordinate of M1S are 43.794520000000034, -79.2670799999

__Display header for dataframe__

In [None]:
df_group.head(12)

## Assignment Part 3

__Install and load necessary packages__

In [None]:
!pip install folium
import folium # plotting library
from bs4 import BeautifulSoup
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np

__Generate map__

In [None]:
map_toronto = folium.Map(location=[43.651070,-79.347015],zoom_start=10)

for lat,lng,borough,neighbourhood in zip(df_group['Latitude'],df_group['Longitude'],df_group['Borough'],df_group['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat,lng],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(map_toronto)
map_toronto

__Use KMeans clustering to cluster neighbourhoods__

In [None]:
k=5
toronto_clustering = df_group.drop(['Postal Code','Borough','Neighbourhood'],1)
kmeans = KMeans(n_clusters = k,random_state=0).fit(toronto_clustering)
kmeans.labels_
df_group.insert(0, 'Cluster Labels', kmeans.labels_)

In [None]:
df_group.head(12)

__Generate map with clustered neighbourhoods__

In [None]:
# create map
map_clusters = folium.Map(location=[43.651070,-79.347015],zoom_start=10)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, neighbourhood, cluster in zip(df_group['Latitude'], df_group['Longitude'], df_group['Neighbourhood'], df_group['Cluster Labels']):
    label = folium.Popup(' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters