### Import all necessary libraries

In [90]:
import pandas as pd
import numpy as np

#!conda install -c conda-forge folium=0.5.0 --yes

In [91]:
# import map rendering library
import folium

# import k-means from clustering stage
from sklearn.cluster import KMeans

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

### Read output from Stage 2

In [92]:
neighborhoods = pd.read_csv('output_From_Stage_2.csv',index_col=False)
neighborhoods.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [93]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


In [94]:
Latitude=neighborhoods['Latitude'].mean()
print(Latitude)
Longitude=neighborhoods['Longitude'].mean()
print(Longitude)

43.70460773398059
-79.39715291165048


### Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 4 clusters. 
__Note__: Tryed numbers from 3 to 9, 4 looks more logical separation.

In [95]:
# set number of clusters
kclusters = 4

# copy original one to new one (for be able to re-run, if needs)
neighborhoods_merged=neighborhoods.copy()

neighborhoods_grouped_clustering = neighborhoods.drop(['Neighbourhood','Borough','Postcode'], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(neighborhoods_grouped_clustering)

# add clustering labels
neighborhoods_merged.insert(0, 'Cluster Labels', kmeans.labels_)

### Finally, let's visualize the resulting clusters

In [96]:
# create map
map_Toronto = folium.Map(location=[Latitude, Longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(neighborhoods_merged['Latitude'], neighborhoods_merged['Longitude'], neighborhoods_merged['Neighbourhood'], neighborhoods_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_Toronto)
       
map_Toronto