In [22]:
import pandas as pd
# !pip install folium
import folium
# !pip install geopy
from geopy.geocoders import Nominatim
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors


In [2]:
df = pd.read_csv("canada_data_with_latlong.csv")
df.head()

Unnamed: 0,Postal_code,Borough,Neighbour,Latitude,Longitude
0,M1B,Scarborough,"Rouge\n, Malvern\n",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek\n, Rouge Hill\n, Port Union\n",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood\n, Morningside\n, West Hill\n",43.763573,-79.188711
3,M1G,Scarborough,Woburn\n,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae\n,43.773136,-79.239476


In [3]:
latitude = df['Latitude']
longitude = df['Longitude']

In [4]:
eoLocator = Nominatim(user_agent='My-IBMNotebook')

In [5]:
address = 'Toronto, Ontario Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto Canada are {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of Toronto Canada are 43.653963, -79.387207.


In [6]:
map_toronto = folium.Map(location=[latitude,longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbour']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=4,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#87cefa',
        fill_opacity=0.5,
        parse_html=False).add_to(map_toronto)

In [7]:
map_toronto

In [12]:
toronto_latlon = df.drop(['Postal_code','Borough','Neighbour'], 1)

toronto_latlon.head()

Unnamed: 0,Latitude,Longitude
0,43.806686,-79.194353
1,43.784535,-79.160497
2,43.763573,-79.188711
3,43.770992,-79.216917
4,43.773136,-79.239476


In [15]:
from sklearn.preprocessing import StandardScaler

X = toronto_latlon.values[:,1:]
X = np.nan_to_num(X)
toronto_cluster = StandardScaler().fit_transform(toronto_latlon)
toronto_cluster

array([[ 1.95523876,  2.09777597],
       [ 1.53094905,  2.44798852],
       [ 1.12942801,  2.15613628],
       [ 1.271543  ,  1.86437197],
       [ 1.3126078 ,  1.6310228 ],
       [ 0.76859251,  1.6310228 ],
       [ 0.44670528,  1.39772948],
       [ 0.12457862,  1.16449306],
       [ 0.2242631 ,  1.6310228 ],
       [-0.22890739,  1.3685726 ],
       [ 1.01138034,  1.28110403],
       [ 0.87082451,  1.04789553],
       [ 1.71607875,  1.39772948],
       [ 1.47544769,  0.96045696],
       [ 2.11931218,  1.16449306],
       [ 1.81807323,  0.81474393],
       [ 2.51910935,  1.98106673],
       [ 1.89922981,  0.3486083 ],
       [ 1.41568642,  0.523382  ],
       [ 1.57715295,  0.1156253 ],
       [ 1.01292418,  0.23210904],
       [ 1.61748606, -0.1173008 ],
       [ 1.25483666, -0.1173008 ],
       [ 0.92228816, -0.0299605 ],
       [ 1.4964963 , -0.46658445],
       [ 0.93187104,  0.69818881],
       [ 0.79103364,  0.465121  ],
       [ 0.40783172,  0.58164715],
       [ 0.95236034,

In [18]:
kclusters = 5

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_cluster)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

#num_clusters = 3

#k_means = KMeans(init="k-means++", n_clusters=num_clusters, n_init=12)
#k_means.fit(cluster_dataset)
#labels = k_means.labels_

#print(labels)

array([4, 4, 4, 4, 4, 4, 4, 1, 4, 1])

In [20]:
df.insert(0, 'Cluster Labels', kmeans.labels_)

df

Unnamed: 0,Cluster Labels,Postal_code,Borough,Neighbour,Latitude,Longitude
0,4,M1B,Scarborough,"Rouge\n, Malvern\n",43.806686,-79.194353
1,4,M1C,Scarborough,"Highland Creek\n, Rouge Hill\n, Port Union\n",43.784535,-79.160497
2,4,M1E,Scarborough,"Guildwood\n, Morningside\n, West Hill\n",43.763573,-79.188711
3,4,M1G,Scarborough,Woburn\n,43.770992,-79.216917
4,4,M1H,Scarborough,Cedarbrae\n,43.773136,-79.239476
5,4,M1J,Scarborough,Scarborough Village\n,43.744734,-79.239476
6,4,M1K,Scarborough,"East Birchmount Park\n, Ionview\n, Kennedy Park\n",43.727929,-79.262029
7,1,M1L,Scarborough,"Clairlea\n, Golden Mile\n, Oakridge\n",43.711112,-79.284577
8,4,M1M,Scarborough,"Cliffcrest\n, Cliffside\n, Scarborough Village...",43.716316,-79.239476
9,1,M1N,Scarborough,"Birch Cliff\n, Cliffside West\n",43.692657,-79.264848


In [23]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df['Latitude'], df['Longitude'], df['Neighbour'], df['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters
