Below is the code for the coursera IBM data science specialization 

In [2]:
#importing all relevant libraries
import pandas as pd
import numpy as np
import sklearn
from sklearn.cluster import KMeans
import matplotlib as mpl
import folium
import requests
import json 
import matplotlib.cm as cm
import matplotlib.colors as colors

In [3]:
# using url and pandas for getting the dataframe
# using pandas to scrap the data from the wikipedia link and store it in data frame. Since the page has '3' tables/dataframe
# accessing the same using indexing
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df= pd.read_html(url, header = 0)
df = df[0]
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [4]:
# Removing the rows having Borough values as Not assigned
# taking a look at the data also reveals that there are no neighbourhoods having 'not Assigned' after having cleaned the 'not assigned' borough 
df = df[df.Borough != 'Not assigned']
df.reset_index(drop = True, inplace = True)
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [5]:
#creating a dataframe from the given location data csv file. 
geodata = pd.read_csv('Geospatial_Coordinates.csv')
geodata.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [6]:
#using pandas merge feature to match the column values for postal code and create the final required dataframe
finaldf = pd.merge(df, geodata, on = 'Postal Code',how = 'inner')

In [7]:
finaldf.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [8]:
torontomap = folium.Map(location = [43.6532,-79.3832], zoom_start = 10)
#superimposing neighnorhood data from our final dataset
for lat,lng,borough,neighborhood in zip(finaldf['Latitude'],finaldf['Longitude'],finaldf['Borough'],finaldf['Neighborhood']):
    label = '{},{}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='yellow',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(torontomap)
torontomap

In [9]:
ID = pd.read_csv('Book1.csv')

In [10]:
#defining Foursquare API credentials
CLIENT_ID = ID.loc[0,'CLIENT_ID']
CLIENT_SECRET = ID.loc[0,'CLIENT_SECRET']
VERSION = ID.loc[0,'VERSION']

In [11]:
#defining radius and limit
radius = 20000
LIMIT = 1000
latitude1 = 43.6534817 
longitude1 = -79.3839347

In [12]:
#defining query to Foursquare API
url1 = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}&categoryId=4bf58dd8d48988d16d941735'.format(CLIENT_ID, CLIENT_SECRET, latitude1, longitude1, VERSION, radius, LIMIT)
results1 = requests.get(url1).json()
venue1= results1['response']['venues']
resultsdf1 =  pd.json_normalize(venue1)
resultsdf1

Unnamed: 0,id,name,categories,referralId,hasPerk,location.address,location.crossStreet,location.lat,location.lng,location.labeledLatLngs,location.distance,location.postalCode,location.cc,location.city,location.state,location.country,location.formattedAddress,location.neighborhood
0,4ae60299f964a52003a421e3,Starbucks,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1592654851,False,"180 Queen St W, Suite 102.3A",at Simcoe St.,43.650751,-79.388047,"[{'label': 'display', 'lat': 43.650751, 'lng':...",449,M5V 3X3,CA,Toronto,ON,Canada,"[180 Queen St W, Suite 102.3A (at Simcoe St.),...",
1,5d10db203e8ac400238e1bf1,Starbucks,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1592654851,False,"5650 Yonge St, Ste 101",,43.780178,-79.416408,"[{'label': 'display', 'lat': 43.780178, 'lng':...",14343,M2M 4G3,CA,Toronto,ON,Canada,"[5650 Yonge St, Ste 101, Toronto ON M2M 4G3, C...",Willowdale
2,5a981dbe0c9f3129c9ee442c,Starbucks,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1592654851,False,132 Queens Quay East,,43.644489,-79.368639,"[{'label': 'display', 'lat': 43.644489, 'lng':...",1587,M5A 3Y5,CA,Toronto,ON,Canada,"[132 Queens Quay East, Toronto ON M5A 3Y5, Can...",
3,4b4626fef964a5204a1826e3,Rooster Coffee House,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",v-1592654851,False,479 Broadview Ave,at Riverdale Ave,43.669177,-79.353134,"[{'label': 'display', 'lat': 43.66917710774420...",3034,M4K 2N4,CA,Toronto,ON,Canada,"[479 Broadview Ave (at Riverdale Ave), Toronto...",
4,5e5d749285a0610007e60fe8,Terroni Sud Forno Produzione e Spaccio,"[{'id': '4bf58dd8d48988d1f5941735', 'name': 'G...",v-1592654851,False,22 Sackville St,,43.653903,-79.360018,"[{'label': 'display', 'lat': 43.653903, 'lng':...",1926,M5A 3E2,CA,Toronto,ON,Canada,"[22 Sackville St, Toronto ON M5A 3E2, Canada]",
5,4ad79243f964a5204c0c21e3,Jetfuel Coffee,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1592654851,False,519 Parliament St.,btwn Carlton & Winchester,43.665295,-79.368335,"[{'label': 'display', 'lat': 43.66529519392083...",1818,M4X 1P3,CA,Toronto,ON,Canada,[519 Parliament St. (btwn Carlton & Winchester...,
6,5b4b6453c4404e001c1fa419,Mofer Coffee,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",v-1592654851,False,1040 St Clair Ave W,,43.679094,-79.438931,"[{'label': 'display', 'lat': 43.6790939, 'lng'...",5267,M6E 1A5,CA,Toronto,ON,Canada,"[1040 St Clair Ave W, Toronto ON M6E 1A5, Canada]",
7,55ad1c45498ee54a96be5ba6,Mattachioni,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",v-1592654851,False,1617 Dupont St.,,43.66496,-79.454912,"[{'label': 'display', 'lat': 43.66495951140892...",5857,M6P 3S8,CA,Toronto,ON,Canada,"[1617 Dupont St., Toronto ON M6P 3S8, Canada]","Junction Triangle, Toronto, ON"
8,4ba3fa87f964a520d17338e3,SanRemo Bakery,"[{'id': '4bf58dd8d48988d16a941735', 'name': 'B...",v-1592654851,False,374 Royal York Rd,at Simpson Ave,43.618542,-79.499485,"[{'label': 'display', 'lat': 43.61854213652106...",10089,M8Y 2R3,CA,Toronto,ON,Canada,"[374 Royal York Rd (at Simpson Ave), Toronto O...",
9,4b770853f964a5205d762ee3,Starbucks,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1592654851,False,"686 Bay St, Unit 3",at Gerrard St W,43.657656,-79.385007,"[{'label': 'display', 'lat': 43.657656, 'lng':...",472,M5G 0A4,CA,Toronto,ON,Canada,"[686 Bay St, Unit 3 (at Gerrard St W), Toronto...",Discovery District


In [13]:
resultsdf1.shape

(50, 18)

In [14]:
#defining a new dataframe using only the required columns
resultsdf_new = resultsdf1[['location.lat','location.lng','location.distance']]

In [15]:
resultsdf_new.shape

(50, 3)

In [16]:
#defining the K means algorithm
k = 6
kmeans = KMeans(init = "k-means++", n_clusters = k, n_init = 12).fit(resultsdf_new)

In [17]:
kmeans.labels_

array([0, 5, 0, 0, 0, 0, 4, 4, 2, 0, 0, 4, 5, 2, 5, 2, 4, 4, 1, 4, 3, 3,
       0, 2, 0, 1, 4, 1, 0, 0, 0, 4, 4, 0, 5, 5, 3, 0, 0, 1, 2, 0, 4, 2,
       0, 1, 5, 2, 0, 0])

In [18]:
resultsdf_new.insert(0, 'Cluster_labels', kmeans.labels_)
resultsdf_new.head()

Unnamed: 0,Cluster_labels,location.lat,location.lng,location.distance
0,0,43.650751,-79.388047,449
1,5,43.780178,-79.416408,14343
2,0,43.644489,-79.368639,1587
3,0,43.669177,-79.353134,3034
4,0,43.653903,-79.360018,1926


In [19]:
#plotting the K means results using Folium
map_clusters = folium.Map(location = (latitude1, longitude1), zoom_start = 11)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, cluster in zip(resultsdf_new['location.lat'], resultsdf_new['location.lng'], resultsdf_new['Cluster_labels']):
    label = folium.Popup(' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters