**Segmenting and Clustering Neighborhoods in Toronto**

In [0]:
import pandas as pd

In [5]:
data = pd.read_csv('data-toronto.csv')
data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [8]:
data.shape

(287, 3)

**Cleaning and selecting data**

In [9]:
df = data[data.Borough != 'Not assigned']
df = df.sort_values( by=['Postcode','Borough'] )
df.reset_index(inplace=True)
df.drop('index', axis=1, inplace=True)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,Rouge
1,M1B,Scarborough,Malvern
2,M1C,Scarborough,Highland Creek
3,M1C,Scarborough,Rouge Hill
4,M1C,Scarborough,Port Union


In [10]:
df.shape

(210, 3)

In [20]:
!pip install geocoder

Collecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K     |████████████████████████████████| 102kB 2.7MB/s 
[?25hCollecting ratelim
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [0]:
from geopy.geocoders import Nominatim 
import matplotlib.cm as cm
import matplotlib.colors as colors
import geocoder
from sklearn.cluster import KMeans
import folium

**Geocoder**

In [26]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent = "Toronto")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The coordinates of Toronto are 43.653963, -79.387207.


**Extract the data**

In [0]:
def get_lat_lng(postal_code):
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [0]:
postal_codes = df['Postcode']    
coords = [ get_lat_lng(postal_code) for postal_code in postal_codes.tolist() ]

In [29]:
coordinates = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
df['Latitude'] = coordinates['Latitude']
df['Longitude'] = coordinates['Longitude']
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,Rouge,43.811525,-79.195517
1,M1B,Scarborough,Malvern,43.811525,-79.195517
2,M1C,Scarborough,Highland Creek,43.785665,-79.158725
3,M1C,Scarborough,Rouge Hill,43.785665,-79.158725
4,M1C,Scarborough,Port Union,43.785665,-79.158725


In [0]:
df.to_csv('coordinates.csv', index=False)

**Added two news columns.**

In [32]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,Rouge,43.811525,-79.195517
1,M1B,Scarborough,Malvern,43.811525,-79.195517
2,M1C,Scarborough,Highland Creek,43.785665,-79.158725
3,M1C,Scarborough,Rouge Hill,43.785665,-79.158725
4,M1C,Scarborough,Port Union,43.785665,-79.158725


In [33]:
toronto = pd.read_csv('coordinates.csv')
toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,Rouge,43.811525,-79.195517
1,M1B,Scarborough,Malvern,43.811525,-79.195517
2,M1C,Scarborough,Highland Creek,43.785665,-79.158725
3,M1C,Scarborough,Rouge Hill,43.785665,-79.158725
4,M1C,Scarborough,Port Union,43.785665,-79.158725


**Map**

In [45]:
toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

#Markers
for lat, lng, borough, Neighbourhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(Neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([lat, lng], radius=3, popup=label,
        color='blue', fill=True, fill_color='#3126cc', 
        fill_opacity=0.5,parse_html=False).add_to(toronto)  
    
toronto