<h1> Finding out Best Neighborhood to open a Clinic In Toronto </h1>

<h2> Data Preparation </h2>

In [1]:
#defing URL to scrape
url_wiki = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
url_wiki

'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [2]:
#Scraping the data and tabulating
import pandas as pd

pd_page = pd.read_html(url_wiki)

df_TOR = pd_page[0]
df_TOR.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [3]:
df_TOR.shape

(288, 3)

In [4]:
df_TOR['Borough'].unique()

array(['Not assigned', 'North York', 'Downtown Toronto', "Queen's Park",
       'Etobicoke', 'Scarborough', 'East York', 'York', 'East Toronto',
       'West Toronto', 'Central Toronto', 'Mississauga'], dtype=object)

In [5]:
df_TOR.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


In [6]:
print('Number of "Not assigned" in Neighbourhood, before: ',df_TOR[df_TOR['Neighbourhood']=='Not assigned'].count()['Postcode'])

for i, row in df_TOR.iterrows():
    if row.Neighbourhood == 'Not assigned':
        row.Neighbourhood = row.Borough

print('Number of "Not assigned" in Neighbourhood, after: ',df_TOR[df_TOR['Neighbourhood']=='Not assigned'].count()['Postcode'])

Number of "Not assigned" in Neighbourhood, before:  78
Number of "Not assigned" in Neighbourhood, after:  77


In [7]:
#info before combining neighbourhoods which have the same postcode
print('Number of postcodes: ',len(df_TOR['Postcode'].unique()))
print('Number of total neighbourhoods: ', df_TOR.shape[0])

Number of postcodes:  180
Number of total neighbourhoods:  288


In [8]:
#set temporary parameters for saving data
last_postcode = ''
last_borough = ''
last_neighbourhood = ''

#processing
for i, row in df_TOR.iterrows():
    if row.Postcode == last_postcode and row.Borough == last_borough:
        row.Neighbourhood = last_neighbourhood + ', ' + row.Neighbourhood
        #print('stop at ',i) #for debugging
        df_TOR.iloc[i-1]['Neighbourhood'] = 'NA'
    last_postcode = row.Postcode
    last_borough = row.Borough
    last_neighbourhood = row.Neighbourhood
    #print(i, postcode_before, borough_before, neighbourhood_before) #for debugging

df_TOR.head(5)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,


In [9]:

df_TOR.tail(5)

Unnamed: 0,Postcode,Borough,Neighbourhood
283,M8Z,Etobicoke,
284,M8Z,Etobicoke,
285,M8Z,Etobicoke,
286,M8Z,Etobicoke,"Kingsway Park South West, Mimico NW, The Queen..."
287,M9Z,Not assigned,Not assigned


In [10]:
df_TOR = df_TOR[df_TOR['Neighbourhood'] != 'NA']

In [11]:
df_TOR.reset_index(drop=True, inplace=True)
df_TOR.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Harbourfront, Regent Park"


In [12]:
df_TOR.shape

(180, 3)

<h2> Getting Geospatial Data for Neighborhood </h2>

In [13]:
url_postcode_TOR = 'http://cocl.us/Geospatial_data'
url_postcode_TOR

'http://cocl.us/Geospatial_data'

In [14]:
pd_postcode = pd.read_csv(url_postcode_TOR)
df_PC_TOR = pd.DataFrame(pd_postcode)
df_PC_TOR.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


<h2> Merging the 2 tables </h2>

In [15]:
df_merged = df_TOR
df_merged = df_merged.join(df_PC_TOR.set_index('Postal Code'), on='Postcode')
df_merged.head(10)


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1A,Not assigned,Not assigned,,
1,M2A,Not assigned,Not assigned,,
2,M3A,North York,Parkwoods,43.753259,-79.329656
3,M4A,North York,Victoria Village,43.725882,-79.315572
4,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
5,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
6,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
7,M8A,Not assigned,Not assigned,,
8,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
9,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353


In [16]:
df_filtered_TOR = df_merged[df_merged['Borough'].str.contains('Toronto')]
df_filtered_TOR.reset_index(drop=True, inplace=True)
df_filtered_TOR.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
1,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,M4E,East Toronto,The Beaches,43.676357,-79.293031
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


In [17]:
df_filtered_TOR.shape

(38, 5)

In [18]:
# install folium
!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    branca-0.3.1               |             py_0          25 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    altair-3.1.0               |           py36_0         724 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         822 KB

The following NEW packages will be INSTALLED:

    altair:  3.1.0-py36_0 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge


Downloading and Extracting Packages
branca-0.3.1         | 25 KB    

In [19]:
  import numpy as np # library to handle data in a vectorized manner

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans


import folium # map rendering library

print('Libraries imported.')

Libraries imported.


<h2> Initializing Foursquare request type for Clincs around Toronto </h2>

In [21]:
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

Solving environment: done

# All requested packages already installed.



In [22]:
address = 'Toronto'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

43.653963 -79.387207


In [23]:
search_query = 'Clinic'
radius = 500
print(search_query + ' .... OK!')

Clinic .... OK!


In [24]:
CLIENT_ID = 'YQFF03YT4BOY32U4XIVELI3PUXTABGYD2W1AFDBHLWQFU1TV' # your Foursquare ID
CLIENT_SECRET = 'LLX3X4P1CXMWFTA4DLJJXZSY5U1MQIARDGLL3IECBNR4ZOTL' # your Foursquare Secret
VERSION = '20190804'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: YQFF03YT4BOY32U4XIVELI3PUXTABGYD2W1AFDBHLWQFU1TV
CLIENT_SECRET:LLX3X4P1CXMWFTA4DLJJXZSY5U1MQIARDGLL3IECBNR4ZOTL


In [25]:
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/search?client_id=YQFF03YT4BOY32U4XIVELI3PUXTABGYD2W1AFDBHLWQFU1TV&client_secret=LLX3X4P1CXMWFTA4DLJJXZSY5U1MQIARDGLL3IECBNR4ZOTL&ll=43.653963,-79.387207&v=20190804&query=Clinic&radius=500&limit=30'

In [29]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d492ff489b06a002c0566ce'},
 'response': {'venues': [{'id': '4f6334e0e4b04d4b145f5b83',
    'name': 'Transplant Clinic',
    'location': {'lat': 43.657469381697524,
     'lng': -79.38917767008937,
     'labeledLatLngs': [{'label': 'display',
       'lat': 43.657469381697524,
       'lng': -79.38917767008937}],
     'distance': 421,
     'cc': 'CA',
     'country': 'Canada',
     'formattedAddress': ['Canada']},
    'categories': [],
    'referralId': 'v-1565077492',
    'hasPerk': False},
   {'id': '4de68e89e4cde71744c308da',
    'name': 'Rudd-PES Endoscopy Clinic',
    'location': {'address': '123 Edward Street',
     'lat': 43.65589356806434,
     'lng': -79.38663756275383,
     'labeledLatLngs': [{'label': 'display',
       'lat': 43.65589356806434,
       'lng': -79.38663756275383}],
     'distance': 219,
     'postalCode': 'M5G 1E2',
     'cc': 'CA',
     'city': 'Toronto',
     'state': 'ON',
     'country': 'Canada',
     'formattedAddress': 

<h3> Transform the JSON into Pandas Dataframe </h3>

In [30]:
# assign relevant part of JSON to venues
#venues = results1['response']['venues']

# tranform venues into a dataframe
#dataframe = json_normalize(venues)
#dataframe.head()

# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe = json_normalize(venues)
dataframe.head()

Unnamed: 0,categories,hasPerk,id,location.address,location.cc,location.city,location.country,location.crossStreet,location.distance,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.postalCode,location.state,name,referralId,venuePage.id
0,[],False,4f6334e0e4b04d4b145f5b83,,CA,,Canada,,421,[Canada],"[{'label': 'display', 'lat': 43.65746938169752...",43.657469,-79.389178,,,Transplant Clinic,v-1565077492,
1,"[{'id': '4bf58dd8d48988d177941735', 'name': 'D...",False,4de68e89e4cde71744c308da,123 Edward Street,CA,Toronto,Canada,,219,"[123 Edward Street, Toronto ON M5G 1E2, Canada]","[{'label': 'display', 'lat': 43.65589356806434...",43.655894,-79.386638,M5G 1E2,ON,Rudd-PES Endoscopy Clinic,v-1565077492,
2,"[{'id': '4bf58dd8d48988d196941735', 'name': 'H...",False,507ff5a8e4b03cfb613822e9,The Hospital for Sick Children (SickKids),CA,Toronto,Canada,,378,"[The Hospital for Sick Children (SickKids), To...","[{'label': 'display', 'lat': 43.65734334277486...",43.657343,-79.387732,,ON,Clinic 9,v-1565077492,
3,"[{'id': '4bf58dd8d48988d104941735', 'name': 'M...",False,4b8e90ecf964a520122933e3,595 Bay St.,CA,Toronto,Canada,in Atrium on Bay,387,"[595 Bay St. (in Atrium on Bay), Toronto ON M5...","[{'label': 'display', 'lat': 43.65613745484842...",43.656137,-79.383454,M5G 2C2,ON,MCI Medical Clinic,v-1565077492,
4,"[{'id': '4bf58dd8d48988d177941735', 'name': 'D...",False,51c0df108bbd5e5f422621d1,101-133 Hazelton Avenue,CA,Toronto,Canada,,484,"[101-133 Hazelton Avenue, Toronto ON M5R 0A6, ...","[{'label': 'display', 'lat': 43.65072566250946...",43.650726,-79.391225,M5R 0A6,ON,Visage Clinic,v-1565077492,59006842.0


In [47]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered.head()

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,postalCode,state,id
0,Transplant Clinic,,,CA,,Canada,,421,[Canada],"[{'label': 'display', 'lat': 43.65746938169752...",43.657469,-79.389178,,,4f6334e0e4b04d4b145f5b83
1,Rudd-PES Endoscopy Clinic,Doctor's Office,123 Edward Street,CA,Toronto,Canada,,219,"[123 Edward Street, Toronto ON M5G 1E2, Canada]","[{'label': 'display', 'lat': 43.65589356806434...",43.655894,-79.386638,M5G 1E2,ON,4de68e89e4cde71744c308da
2,Clinic 9,Hospital,The Hospital for Sick Children (SickKids),CA,Toronto,Canada,,378,"[The Hospital for Sick Children (SickKids), To...","[{'label': 'display', 'lat': 43.65734334277486...",43.657343,-79.387732,,ON,507ff5a8e4b03cfb613822e9
3,MCI Medical Clinic,Medical Center,595 Bay St.,CA,Toronto,Canada,in Atrium on Bay,387,"[595 Bay St. (in Atrium on Bay), Toronto ON M5...","[{'label': 'display', 'lat': 43.65613745484842...",43.656137,-79.383454,M5G 2C2,ON,4b8e90ecf964a520122933e3
4,Visage Clinic,Doctor's Office,101-133 Hazelton Avenue,CA,Toronto,Canada,,484,"[101-133 Hazelton Avenue, Toronto ON M5R 0A6, ...","[{'label': 'display', 'lat': 43.65072566250946...",43.650726,-79.391225,M5R 0A6,ON,51c0df108bbd5e5f422621d1


In [67]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=13)

for lat, lng, label in zip(dataframe_filtered['lat'], dataframe_filtered['lng'], dataframe_filtered['categories']):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
display(venues_map)


In [56]:
dt=dataframe_filtered[['name','lat','lng','postalCode']]
dt.head()

Unnamed: 0,name,lat,lng,postalCode
0,Transplant Clinic,43.657469,-79.389178,
1,Rudd-PES Endoscopy Clinic,43.655894,-79.386638,M5G 1E2
2,Clinic 9,43.657343,-79.387732,
3,MCI Medical Clinic,43.656137,-79.383454,M5G 2C2
4,Visage Clinic,43.650726,-79.391225,M5R 0A6


In [34]:
dt.dropna(subset = ['postalCode'])

Unnamed: 0,name,lat,lng,postalCode
1,Rudd-PES Endoscopy Clinic,43.655894,-79.386638,M5G 1E2
3,MCI Medical Clinic,43.656137,-79.383454,M5G 2C2
4,Visage Clinic,43.650726,-79.391225,M5R 0A6
6,Dundas University Health Clinic,43.654196,-79.388166,M4P 2K8
7,Dundas West Chiropractic Clinic,43.654866,-79.387836,M6R 3A9
8,The Voice Clinic,43.655368,-79.386429,M7A 0A1
11,Cystoscopy Clinic,43.658806,-79.389568,M5G 2N2
15,Gastrointestinal Clinic,43.658706,-79.388775,M5G 0A3
17,The Mindfulness Clinic,43.652069,-79.382722,M5G 1Z6
18,Toronto Foot Clinic,43.653187,-79.382181,M5G 2A3


In [35]:
dt[['Postal Code','subcode']] = dt.postalCode.str.split(expand=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


In [36]:
dt

Unnamed: 0,name,lat,lng,postalCode,Postal Code,subcode
0,Transplant Clinic,43.657469,-79.389178,,,
1,Rudd-PES Endoscopy Clinic,43.655894,-79.386638,M5G 1E2,M5G,1E2
2,Clinic 9,43.657343,-79.387732,,,
3,MCI Medical Clinic,43.656137,-79.383454,M5G 2C2,M5G,2C2
4,Visage Clinic,43.650726,-79.391225,M5R 0A6,M5R,0A6
5,Princess Margaret Hospital- REACH Clinic,43.657917,-79.390492,,,
6,Dundas University Health Clinic,43.654196,-79.388166,M4P 2K8,M4P,2K8
7,Dundas West Chiropractic Clinic,43.654866,-79.387836,M6R 3A9,M6R,3A9
8,The Voice Clinic,43.655368,-79.386429,M7A 0A1,M7A,0A1
9,Medisys Travel Health Clinic,43.65553,-79.386374,,,


In [37]:
dt1=dt.dropna(subset = ['Postal Code'])
dt1

Unnamed: 0,name,lat,lng,postalCode,Postal Code,subcode
1,Rudd-PES Endoscopy Clinic,43.655894,-79.386638,M5G 1E2,M5G,1E2
3,MCI Medical Clinic,43.656137,-79.383454,M5G 2C2,M5G,2C2
4,Visage Clinic,43.650726,-79.391225,M5R 0A6,M5R,0A6
6,Dundas University Health Clinic,43.654196,-79.388166,M4P 2K8,M4P,2K8
7,Dundas West Chiropractic Clinic,43.654866,-79.387836,M6R 3A9,M6R,3A9
8,The Voice Clinic,43.655368,-79.386429,M7A 0A1,M7A,0A1
11,Cystoscopy Clinic,43.658806,-79.389568,M5G 2N2,M5G,2N2
15,Gastrointestinal Clinic,43.658706,-79.388775,M5G 0A3,M5G,0A3
17,The Mindfulness Clinic,43.652069,-79.382722,M5G 1Z6,M5G,1Z6
18,Toronto Foot Clinic,43.653187,-79.382181,M5G 2A3,M5G,2A3


In [49]:
df_PC_TOR.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


<h2> Merging Neighborhood Data with Foursqaure Data for Clinics </h2>

In [39]:
clinic_merged = df_TOR
clinic_merged = dt1.join(df_PC_TOR.set_index('Postal Code'), on='Postal Code')
clinic_merged.head(10)

Unnamed: 0,name,lat,lng,postalCode,Postal Code,subcode,Latitude,Longitude
1,Rudd-PES Endoscopy Clinic,43.655894,-79.386638,M5G 1E2,M5G,1E2,43.657952,-79.387383
3,MCI Medical Clinic,43.656137,-79.383454,M5G 2C2,M5G,2C2,43.657952,-79.387383
4,Visage Clinic,43.650726,-79.391225,M5R 0A6,M5R,0A6,43.67271,-79.405678
6,Dundas University Health Clinic,43.654196,-79.388166,M4P 2K8,M4P,2K8,43.712751,-79.390197
7,Dundas West Chiropractic Clinic,43.654866,-79.387836,M6R 3A9,M6R,3A9,43.64896,-79.456325
8,The Voice Clinic,43.655368,-79.386429,M7A 0A1,M7A,0A1,43.662301,-79.389494
11,Cystoscopy Clinic,43.658806,-79.389568,M5G 2N2,M5G,2N2,43.657952,-79.387383
15,Gastrointestinal Clinic,43.658706,-79.388775,M5G 0A3,M5G,0A3,43.657952,-79.387383
17,The Mindfulness Clinic,43.652069,-79.382722,M5G 1Z6,M5G,1Z6,43.657952,-79.387383
18,Toronto Foot Clinic,43.653187,-79.382181,M5G 2A3,M5G,2A3,43.657952,-79.387383


In [40]:
clinic_merged2 = df_filtered_TOR
clinic_merged2 = dt1.join(df_filtered_TOR.set_index('Postcode'), on='Postal Code')
clinic_merged2.head(10)

Unnamed: 0,name,lat,lng,postalCode,Postal Code,subcode,Borough,Neighbourhood,Latitude,Longitude
1,Rudd-PES Endoscopy Clinic,43.655894,-79.386638,M5G 1E2,M5G,1E2,Downtown Toronto,Central Bay Street,43.657952,-79.387383
3,MCI Medical Clinic,43.656137,-79.383454,M5G 2C2,M5G,2C2,Downtown Toronto,Central Bay Street,43.657952,-79.387383
4,Visage Clinic,43.650726,-79.391225,M5R 0A6,M5R,0A6,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678
6,Dundas University Health Clinic,43.654196,-79.388166,M4P 2K8,M4P,2K8,Central Toronto,Davisville North,43.712751,-79.390197
7,Dundas West Chiropractic Clinic,43.654866,-79.387836,M6R 3A9,M6R,3A9,West Toronto,"Parkdale, Roncesvalles",43.64896,-79.456325
8,The Voice Clinic,43.655368,-79.386429,M7A 0A1,M7A,0A1,,,,
11,Cystoscopy Clinic,43.658806,-79.389568,M5G 2N2,M5G,2N2,Downtown Toronto,Central Bay Street,43.657952,-79.387383
15,Gastrointestinal Clinic,43.658706,-79.388775,M5G 0A3,M5G,0A3,Downtown Toronto,Central Bay Street,43.657952,-79.387383
17,The Mindfulness Clinic,43.652069,-79.382722,M5G 1Z6,M5G,1Z6,Downtown Toronto,Central Bay Street,43.657952,-79.387383
18,Toronto Foot Clinic,43.653187,-79.382181,M5G 2A3,M5G,2A3,Downtown Toronto,Central Bay Street,43.657952,-79.387383


In [41]:
clinic_merged3=clinic_merged2[['name','lat','lng','Postal Code','Borough','Neighbourhood']]
clinic_merged3

Unnamed: 0,name,lat,lng,Postal Code,Borough,Neighbourhood
1,Rudd-PES Endoscopy Clinic,43.655894,-79.386638,M5G,Downtown Toronto,Central Bay Street
3,MCI Medical Clinic,43.656137,-79.383454,M5G,Downtown Toronto,Central Bay Street
4,Visage Clinic,43.650726,-79.391225,M5R,Central Toronto,"The Annex, North Midtown, Yorkville"
6,Dundas University Health Clinic,43.654196,-79.388166,M4P,Central Toronto,Davisville North
7,Dundas West Chiropractic Clinic,43.654866,-79.387836,M6R,West Toronto,"Parkdale, Roncesvalles"
8,The Voice Clinic,43.655368,-79.386429,M7A,,
11,Cystoscopy Clinic,43.658806,-79.389568,M5G,Downtown Toronto,Central Bay Street
15,Gastrointestinal Clinic,43.658706,-79.388775,M5G,Downtown Toronto,Central Bay Street
17,The Mindfulness Clinic,43.652069,-79.382722,M5G,Downtown Toronto,Central Bay Street
18,Toronto Foot Clinic,43.653187,-79.382181,M5G,Downtown Toronto,Central Bay Street


In [42]:
clinic_merged3.to_csv('clinic_merged3.csv')

In [43]:
neigh_venue_summary = clinic_merged3.groupby('Neighbourhood').count()
neigh_venue_summary

Unnamed: 0_level_0,name,lat,lng,Postal Code,Borough
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Central Bay Street,7,7,7,7,7
Davisville North,1,1,1,1,1
"Parkdale, Roncesvalles",1,1,1,1,1
St. James Town,1,1,1,1,1
"The Annex, North Midtown, Yorkville",1,1,1,1,1


<h2> Preparing DataFrame for Clustering </h2>

In [58]:
clinic_onehot = pd.get_dummies(data =clinic_merged3 , drop_first  = False, 
                              prefix = "", prefix_sep = "", columns = ['name'])
clinic_onehot.head()

Unnamed: 0,lat,lng,Postal Code,Borough,Neighbourhood,Cystoscopy Clinic,Dundas University Health Clinic,Dundas West Chiropractic Clinic,Gastrointestinal Clinic,Grow Legally Marijuana Clinic and Consulting,MCI Medical Clinic,Rudd-PES Endoscopy Clinic,The Mindfulness Clinic,The Voice Clinic,Toronto Foot Clinic,Tuina Health Clinic,Visage Clinic
1,43.655894,-79.386638,M5G,Downtown Toronto,Central Bay Street,0,0,0,0,0,0,1,0,0,0,0,0
3,43.656137,-79.383454,M5G,Downtown Toronto,Central Bay Street,0,0,0,0,0,1,0,0,0,0,0,0
4,43.650726,-79.391225,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",0,0,0,0,0,0,0,0,0,0,0,1
6,43.654196,-79.388166,M4P,Central Toronto,Davisville North,0,1,0,0,0,0,0,0,0,0,0,0
7,43.654866,-79.387836,M6R,West Toronto,"Parkdale, Roncesvalles",0,0,1,0,0,0,0,0,0,0,0,0


In [59]:
listoffeatures=['lat','lng','Neighbourhood','Borough','Rudd-PES Endoscopy Clinic',
                               'MCI Medical Clinic',
                                    'Visage Clinic',
                  'Dundas University Health Clinic',
                  'Dundas West Chiropractic Clinic',
                                 'The Voice Clinic',
                               'Cystoscopy Clinic',
                         'Gastrointestinal Clinic',
                          'The Mindfulness Clinic',
                             'Toronto Foot Clinic',
    'Grow Legally Marijuana Clinic and Consulting',
                             'Tuina Health Clinic']

In [60]:
clinic_onehot = clinic_onehot[listoffeatures].drop(
    columns = ['lat', 'lng','Borough']).groupby(
    'Neighbourhood').sum()


clinic_onehot.head()

Unnamed: 0_level_0,Rudd-PES Endoscopy Clinic,MCI Medical Clinic,Visage Clinic,Dundas University Health Clinic,Dundas West Chiropractic Clinic,The Voice Clinic,Cystoscopy Clinic,Gastrointestinal Clinic,The Mindfulness Clinic,Toronto Foot Clinic,Grow Legally Marijuana Clinic and Consulting,Tuina Health Clinic
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Central Bay Street,1,1,0,0,0,0,1,1,1,1,1,0
Davisville North,0,0,0,1,0,0,0,0,0,0,0,0
"Parkdale, Roncesvalles",0,0,0,0,1,0,0,0,0,0,0,0
St. James Town,0,0,0,0,0,0,0,0,0,0,0,1
"The Annex, North Midtown, Yorkville",0,0,1,0,0,0,0,0,0,0,0,0


<h2>Clustering Initialization </h2>

In [61]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# run k-means clustering
kmeans = KMeans(n_clusters = 5, random_state = 0).fit(clinic_onehot)

In [62]:
means_df = pd.DataFrame(kmeans.cluster_centers_)
means_df.columns = clinic_onehot.columns
means_df.index = ['G1','G2','G3','G4','G5']
means_df['Total Sum'] = means_df.sum(axis = 1)
means_df.sort_values(axis = 0, by = ['Total Sum'], ascending=False)

Unnamed: 0,Rudd-PES Endoscopy Clinic,MCI Medical Clinic,Visage Clinic,Dundas University Health Clinic,Dundas West Chiropractic Clinic,The Voice Clinic,Cystoscopy Clinic,Gastrointestinal Clinic,The Mindfulness Clinic,Toronto Foot Clinic,Grow Legally Marijuana Clinic and Consulting,Tuina Health Clinic,Total Sum
G3,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,7.0
G1,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
G2,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
G4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
G5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0


In [63]:
neigh_summary = pd.DataFrame([means_df.index, 1 + kmeans.labels_]).T
neigh_summary.columns = ['Neighbourhood', 'Group']
neigh_summary

Unnamed: 0,Neighbourhood,Group
0,G1,3
1,G2,2
2,G3,4
3,G4,5
4,G5,1


<h4> Best Neighborhood to open a clinic <h4>

In [64]:

neigh_summary[neigh_summary['Group'] == 5]

Unnamed: 0,Neighbourhood,Group
3,G4,5


<h4> Second best neighborhood to open a clinic </h4>

In [65]:
neigh_summary[neigh_summary['Group'] == 1]

Unnamed: 0,Neighbourhood,Group
4,G5,1


<h4> Third best neighborhood to open a clinic </h4>


In [66]:
neigh_summary[neigh_summary['Group'] == 4]

Unnamed: 0,Neighbourhood,Group
2,G3,4


<h2>So the Doctor can open a clinic at St.James Town </h2>