# IBM Applied D.S Capstone Project
## Viral Patel
## Week 3 Part 3: Explore and cluster the neighborhoods in Toronto.

## 1. Import libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd
import folium
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values
!conda install -c conda-forge lxml --yes
import requests
import lxml.html as lh
from sklearn.cluster import KMeans
print("Libraries imported.")

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Libraries imported.


## 2. Scrap data from Wikipedia page into a DataFrame (Based on Previous Part)

In [2]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
#Create a handle, page, to handle the contents of the website
page = requests.get(url)
#Store the contents of the website under doc
doc = lh.fromstring(page.content)
#Parse data that are stored between <tr>..</tr> of HTML
tr_elements = doc.xpath('//tr')
r = requests.get(url)

#Create empty list
col=[]
i=0
#For each row, store each first element (header) and an empty list
for t in tr_elements[0]:
    i+=1
    name=t.text_content()
    print ('%d:"%s"'%(i,name))
    col.append((name,[]))

size_of_col = 3
for j in range(1,len(tr_elements)):
    #T is our j'th row
    T=tr_elements[j]
    
    #If row is not of size 3, the //tr data is not from our table 
    if len(T)!=size_of_col:
        break
    
    #i is the index of our column
    i=0
    
    #Iterate through each element of the row
    for t in T.iterchildren():
        data=t.text_content() 
        #Check if row is empty
        if i>0:
        #Convert any numerical value to integers
            try:
                data=int(data)
            except:
                pass
        #Append the data to the empty list of the i'th column
        col[i][1].append(data)
        #Increment i for the next column
        i+=1
        
Dict={title:column for (title,column) in col}
df=pd.DataFrame(Dict)
df = df.replace(r'\n','', regex=True) 
df.columns = ['PostalCode', 'Borough', 'Neighborhood']
df.drop(df.tail(1).index,inplace=True)

df = df[df.Borough != "Not assigned"].reset_index(drop=True)

df = df.replace(r'/',', ', regex=True) 

for index, row in df.iterrows():
    if row["Neighborhood"] == "Not assigned":
        row["Neighborhood"] = row["Borough"]
df.head()

1:"Postal Code
"
2:"Borough
"
3:"Neighbourhood
"


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


## 3. Defining the new Dataframe Format

In [3]:
df["Latitude"] = ""
df["Longitude"] = ""
df.shape

(103, 5)

In [4]:
# Cleaning the neighborhood with multiple values ( Selecting the first one)
df["Neighborhood"] = df["Neighborhood"].str.split(",", n = 1, expand = True) 
df["Neighborhood"] = df["Neighborhood"].str.split("-", n = 1, expand = True) 
df["Neighborhood"].head(5)

0           Parkwoods
1    Victoria Village
2         Regent Park
3      Lawrence Manor
4        Queen's Park
Name: Neighborhood, dtype: object

In [5]:
df.head(50)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,,
1,M4A,North York,Victoria Village,,
2,M5A,Downtown Toronto,Regent Park,,
3,M6A,North York,Lawrence Manor,,
4,M7A,Downtown Toronto,Queen's Park,,
5,M9A,Etobicoke,Islington Avenue,,
6,M1B,Scarborough,Malvern,,
7,M3B,North York,Don Mills,,
8,M4B,East York,Parkview Hill,,
9,M5B,Downtown Toronto,Garden District,,


## 4. To get coordinates and populate the df 

In [6]:
df1 = df.loc[0:25]
df2 = df.loc[26:50]
df3 = df.loc[51:75]
df4 = df.loc[76:102]

In [7]:
# Need to drop those Neighborhood that the geocode does not find
to_drop_unknown = []
geolocator = Nominatim(user_agent="ny_explorer")
for index, row in df1.iterrows():
    address = row['Neighborhood'] + ', Toronto'
    try:
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude
        print('The geograpical coordinate of {} are {}, {}.'.format(address, latitude, longitude))
        df1.loc[index, 'Latitude'] = latitude
        df1.loc[index, 'Longitude'] = longitude
    except AttributeError:
        print('Cannot do: {}, will drop index: {}'.format(address, index))
        to_drop_unknown.append(index)

The geograpical coordinate of Parkwoods, Toronto are 43.7587999, -79.3201966.
The geograpical coordinate of Victoria Village, Toronto are 43.732658, -79.3111892.
The geograpical coordinate of Regent Park, Toronto are 43.6607056, -79.3604569.
The geograpical coordinate of Lawrence Manor, Toronto are 43.7220788, -79.4375067.
The geograpical coordinate of Queen's Park, Toronto are 43.659659, -79.3903399.
The geograpical coordinate of Islington Avenue, Toronto are 43.6389593, -79.5210499.
The geograpical coordinate of Malvern, Toronto are 43.8091955, -79.2217008.
The geograpical coordinate of Don Mills, Toronto are 43.775347, -79.3459439.
The geograpical coordinate of Parkview Hill, Toronto are 43.7062977, -79.3219073.
The geograpical coordinate of Garden District, Toronto are 43.6564995, -79.3771141.
The geograpical coordinate of Glencairn, Toronto are 43.7087117, -79.4406853.
The geograpical coordinate of West Deane Park, Toronto are 43.6631995, -79.5685684.
The geograpical coordinate of

In [8]:
geolocator = Nominatim(user_agent="ny_explorer2")
for index, row in df2.iterrows():
    address = row['Neighborhood'] + ', Toronto'
    try:
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude
        print('The geograpical coordinate of {} are {}, {}.'.format(address, latitude, longitude))
        df2.loc[index, 'Latitude'] = latitude
        df2.loc[index, 'Longitude'] = longitude
    except AttributeError:
        print('Cannot do: {}, will drop index: {}'.format(address, index))
        to_drop_unknown.append(index)

The geograpical coordinate of Cedarbrae, Toronto are 43.75646655, -79.22669244258802.
The geograpical coordinate of Hillcrest Village, Toronto are 43.6816953, -79.4257118.
The geograpical coordinate of Bathurst Manor, Toronto are 43.6655189, -79.4119373.
The geograpical coordinate of Thorncliffe Park, Toronto are 43.704553, -79.3454074.
The geograpical coordinate of Richmond, Toronto are 43.6485875, -79.3913729.
The geograpical coordinate of Dufferin, Toronto are 43.6602019, -79.4357191.
The geograpical coordinate of Scarborough Village, Toronto are 43.7437422, -79.2116324.
The geograpical coordinate of Fairview, Toronto are 43.777758500000004, -79.34429375180316.
The geograpical coordinate of Northwood Park, Toronto are 43.7541351, -79.50448.
The geograpical coordinate of East Toronto, Toronto are 43.6247901, -79.3934918.
The geograpical coordinate of Harbourfront East, Toronto are 43.6400801, -79.3801495.
The geograpical coordinate of Little Portugal, Toronto are 43.64741325, -79.431

In [9]:
geolocator = Nominatim(user_agent="ny_explorer3")
for index, row in df3.iterrows():
    address = row['Neighborhood'] + ', Toronto'
    try:
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude
        print('The geograpical coordinate of {} are {}, {}.'.format(address, latitude, longitude))
        df3.loc[index, 'Latitude'] = latitude
        df3.loc[index, 'Longitude'] = longitude
    except AttributeError:
        print('Cannot do: {}, will drop index: {}'.format(address, index))
        to_drop_unknown.append(index)

The geograpical coordinate of Cliffside, Toronto are 43.7111699, -79.2481769.
The geograpical coordinate of Willowdale, Toronto are 43.7615095, -79.4109234.
The geograpical coordinate of Downsview, Toronto are 43.7492988, -79.462248.
The geograpical coordinate of Studio District, Toronto are 43.64958515, -79.39068322559326.
The geograpical coordinate of Bedford Park, Toronto are 43.7373876, -79.4109253.
Cannot do: Del Ray, Toronto, will drop index: 56
The geograpical coordinate of Humberlea, Toronto are 43.7213166, -79.5331605.
The geograpical coordinate of Birch Cliff, Toronto are 43.6918051, -79.2644935.
The geograpical coordinate of Willowdale, Toronto are 43.7615095, -79.4109234.
The geograpical coordinate of Downsview, Toronto are 43.7492988, -79.462248.
The geograpical coordinate of Lawrence Park, Toronto are 43.729199, -79.4032525.
The geograpical coordinate of Roselawn, Toronto are 43.7087103, -79.4097894.
The geograpical coordinate of Runnymede, Toronto are 43.6517026, -79.475

In [10]:
geolocator = Nominatim(user_agent="ny_explorer4")
for index, row in df4.iterrows():
    address = row['Neighborhood'] + ', Toronto'
    try:
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude
        print('The geograpical coordinate of {} are {}, {}.'.format(address, latitude, longitude))
        df4.loc[index, 'Latitude'] = latitude
        df4.loc[index, 'Longitude'] = longitude
    except AttributeError:
        print('Cannot do: {}, will drop index: {}'.format(address, index))
        to_drop_unknown.append(index)

Cannot do: Canada Post Gateway Processing Centre, Toronto, will drop index: 76
The geograpical coordinate of Kingsview Village, Toronto are 43.6995391, -79.5563459.
The geograpical coordinate of Agincourt, Toronto are 43.7853531, -79.2785494.
The geograpical coordinate of Davisville, Toronto are 43.697936, -79.3972908.
The geograpical coordinate of University of Toronto, Toronto are 43.663461999999996, -79.39775965337452.
The geograpical coordinate of Runnymede, Toronto are 43.6517026, -79.4759978.
The geograpical coordinate of Clarks Corners, Toronto are 43.7964095, -79.2977951.
The geograpical coordinate of Moore Park, Toronto are 43.6903876, -79.3832965.
The geograpical coordinate of Kensington Market, Toronto are 43.6552136, -79.4022604.
The geograpical coordinate of Milliken, Toronto are 43.8231743, -79.3017626.
The geograpical coordinate of Summerhill West, Toronto are 43.6816776, -79.3905037.
The geograpical coordinate of CN Tower, Toronto are 43.6425637, -79.38708718320467.
The

In [11]:
print(df1.shape)
print(df2.shape)
print(df3.shape)
print(df4.shape)

(26, 5)
(25, 5)
(25, 5)
(27, 5)


In [12]:
df = df1.append(df2, ignore_index = True)
df = df.append(df3, ignore_index = True)
df = df.append(df4, ignore_index = True)

In [204]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7588,-79.3202
1,M4A,North York,Victoria Village,43.7327,-79.3112
2,M5A,Downtown Toronto,Regent Park,43.6607,-79.3605
3,M6A,North York,Lawrence Manor,43.7221,-79.4375
4,M7A,Downtown Toronto,Queen's Park,43.6597,-79.3903


## 5. Cleaning the dataframe

In [208]:
clean_df = df.drop(to_drop_unknown)
clean_df['Latitude'].replace('', np.nan, inplace=True)
clean_df.dropna(subset=['Latitude'], inplace=True)
clean_df.shape

(99, 5)

In [209]:
clean_df.tail()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
97,M5X,Downtown Toronto,First Canadian Place,43.648768,-79.3817
98,M8X,Etobicoke,The Kingsway,43.647381,-79.5113
99,M4Y,Downtown Toronto,Church and Wellesley,43.670862,-79.3728
101,M8Y,Etobicoke,Old Mill South,43.649826,-79.4943
102,M8Z,Etobicoke,Mimico NW,43.616677,-79.4968


## 6. Filter only boroughs that contain the word Toronto

In [210]:
# filter borough names that contain the word Toronto
Toronto_df = clean_df[clean_df['Borough'].str.contains("Toronto")].reset_index(drop=True)
Toronto_df.head()
Toronto_df.shape

(37, 5)

In [211]:
Toronto_df.Borough.unique()

array(['Downtown Toronto', 'East Toronto', 'West Toronto',
       'Central Toronto'], dtype=object)

In [212]:
Toronto_df.drop_duplicates(subset ="PostalCode",keep = "first", inplace = True)
Toronto_df.reset_index(drop=True) 
print(Toronto_df.shape)

(37, 5)


In [213]:
Toronto_df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,Regent Park,43.660706,-79.3605
1,M7A,Downtown Toronto,Queen's Park,43.659659,-79.3903
2,M5B,Downtown Toronto,Garden District,43.6565,-79.3771
3,M5C,Downtown Toronto,St. James Town,43.669403,-79.3727
4,M4E,East Toronto,The Beaches,43.671024,-79.2967
5,M5E,Downtown Toronto,Berczy Park,43.647984,-79.3754
6,M5G,Downtown Toronto,Central Bay Street,43.659756,-79.3854
7,M6G,Downtown Toronto,Christie,43.664111,-79.4184
8,M5H,Downtown Toronto,Richmond,43.648587,-79.3914
9,M6H,West Toronto,Dufferin,43.660202,-79.4357


## 7. Define Foursquare Credentials and Version

In [214]:
# define Foursquare Credentials and Version
CLIENT_ID = 'ZMJ5WRVGRW5HMPBXBTVYBSX3UJL3WYMFPAMBHUCI23BBH5IZ' # your Foursquare ID
CLIENT_SECRET = 'C1IIOKQSEEUI1E2MUHGOLGLVJ1RJVVEGMQJEIYN1YOTPYHOH' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)
toronto_df_new = Toronto_df.copy()

Your credentails:
CLIENT_ID: ZMJ5WRVGRW5HMPBXBTVYBSX3UJL3WYMFPAMBHUCI23BBH5IZ
CLIENT_SECRET:C1IIOKQSEEUI1E2MUHGOLGLVJ1RJVVEGMQJEIYN1YOTPYHOH


## 8. Top 100 venues that are within a radius of 500 meters.

In [217]:
def getNearbyVenues(names, neigh, latitudes, longitudes, radius=500):
    
    limit=100
    venues_list=[]
    for name, n, lat, lng in zip(names, neigh, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            n,
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough', 
                  'Neighbourhood',
                  'Latitude', 
                  'Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [218]:
toronto_venues = getNearbyVenues(names=toronto_df_new['Borough'],
                                   neigh = toronto_df_new['Neighborhood'],
                                   latitudes=toronto_df_new['Latitude'],
                                   longitudes=toronto_df_new['Longitude']
                                  )

Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
East Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
West Toronto
Downtown Toronto
West Toronto
East Toronto
Downtown Toronto
West Toronto
East Toronto
Downtown Toronto
East Toronto
Central Toronto
Central Toronto
Central Toronto
Central Toronto
West Toronto
Central Toronto
Central Toronto
West Toronto
Central Toronto
Downtown Toronto
West Toronto
Central Toronto
Downtown Toronto
Central Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto


In [219]:
toronto_venues.head()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Downtown Toronto,Regent Park,43.660706,-79.360457,Regent Park Aquatic Centre,43.6606,-79.361392,Pool
1,Downtown Toronto,Regent Park,43.660706,-79.360457,Paintbox Bistro,43.66005,-79.362855,Restaurant
2,Downtown Toronto,Regent Park,43.660706,-79.360457,Daniels Spectrum,43.660137,-79.361808,Performing Arts Venue
3,Downtown Toronto,Regent Park,43.660706,-79.360457,Sumach Espresso,43.658135,-79.359515,Coffee Shop
4,Downtown Toronto,Regent Park,43.660706,-79.360457,Thai To Go,43.663418,-79.36071,Thai Restaurant


In [220]:
toronto_venues.shape

(1884, 8)

In [221]:
#Venues per Borough
toronto_venues.groupby('Neighbourhood').count().Venue

Neighbourhood
Berczy Park                 100
Brockton                     22
CN Tower                     61
Central Bay Street           63
Christie                     59
Church and Wellesley         31
Commerce Court              100
Davisville                   44
Davisville North             44
Dufferin                     44
First Canadian Place        100
Forest Hill North & West     23
Garden District              63
Harbourfront East           100
High Park                    10
India Bazaar                 35
Kensington Market            89
Lawrence Park                51
Little Portugal              37
Moore Park                    4
North Toronto West           49
Parkdale                     43
Queen's Park                 77
Regent Park                  22
Richmond                    100
Rosedale                      4
Roselawn                     15
Runnymede                    40
St. James Town               66
Studio District             100
Summerhill West           

In [222]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 238 uniques categories.


## 9. Analyze Each Area

In [223]:
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")


toronto_onehot['Borough'] = toronto_venues['Borough'] 
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 


fixed_columns = [toronto_onehot.columns[-2]] + [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Borough,Neighbourhood,Accessories Store,African Restaurant,American Restaurant,Animal Shelter,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio,Borough.1
0,Downtown Toronto,Regent Park,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Downtown Toronto
1,Downtown Toronto,Regent Park,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Downtown Toronto
2,Downtown Toronto,Regent Park,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Downtown Toronto
3,Downtown Toronto,Regent Park,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Downtown Toronto
4,Downtown Toronto,Regent Park,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Downtown Toronto


### Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [224]:
toronto_freq = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_freq

Unnamed: 0,Neighbourhood,Accessories Store,African Restaurant,American Restaurant,Animal Shelter,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,...,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,...,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01
1,Brockton,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,...,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0
2,CN Tower,0.0,0.0,0.0,0.0,0.0,0.032787,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393
3,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015873
4,Christie,0.0,0.0,0.016949,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.016949,0.016949,0.0,0.016949,0.0,0.0,0.0
5,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Commerce Court,0.0,0.0,0.04,0.0,0.0,0.0,0.02,0.0,0.0,...,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01
7,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,...,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0
8,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,...,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0
9,Dufferin,0.0,0.022727,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,...,0.0,0.022727,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0


In [225]:
tor_transposed = toronto_freq.iloc[:, 1:].T

In [226]:
tor_transposed.shape

(238, 36)

In [227]:
toronto_df_new.Neighborhood.shape

(37,)

In [228]:

for i, bor in enumerate(toronto_freq.Neighbourhood, 0):
    print('For neighborhood {} the 5 most common venues and their frequencies are:'.format(bor))
    print('------------')
    print(tor_transposed[i].nlargest(5))
    print('------------')

For neighborhood Berczy Park the 5 most common venues and their frequencies are:
------------
Coffee Shop            0.09
Restaurant             0.06
Café                   0.05
Italian Restaurant     0.04
Japanese Restaurant    0.04
Name: 0, dtype: float64
------------
For neighborhood Brockton the 5 most common venues and their frequencies are:
------------
Bar                      0.136364
Park                     0.090909
Vietnamese Restaurant    0.090909
Art Gallery              0.045455
Auto Workshop            0.045455
Name: 1, dtype: float64
------------
For neighborhood CN Tower the 5 most common venues and their frequencies are:
------------
Hotel               0.081967
Coffee Shop         0.065574
Pizza Place         0.065574
Bar                 0.049180
Baseball Stadium    0.049180
Name: 2, dtype: float64
------------
For neighborhood Central Bay Street the 5 most common venues and their frequencies are:
------------
Coffee Shop        0.222222
Bookstore          0.047619
C

### Create the new dataframe and display the top 10 venues for each PostalCode.

In [229]:
def common_venues(data, num_venues):
    cols = ['Neighbourhood']
    
    for i in range(num_venues):
        cols.append('CommonVenue' + str(i + 1))
        
    df = pd.DataFrame(columns = cols)
    row = []
    
    mydict = {}
    for i, bor in enumerate(toronto_freq.Neighbourhood, 0):
        row.append(bor)
        row.extend(data[i].nlargest(num_venues).index.to_list())
        
        for col, r in zip(cols, row):
            mydict[col] = r
        
        df = df.append(mydict, ignore_index = True)
        row = [] 
    
    
    return df

In [230]:
commondf = common_venues(tor_transposed, 15)

In [231]:
commondf

Unnamed: 0,Neighbourhood,CommonVenue1,CommonVenue2,CommonVenue3,CommonVenue4,CommonVenue5,CommonVenue6,CommonVenue7,CommonVenue8,CommonVenue9,CommonVenue10,CommonVenue11,CommonVenue12,CommonVenue13,CommonVenue14,CommonVenue15
0,Berczy Park,Coffee Shop,Restaurant,Café,Italian Restaurant,Japanese Restaurant,Seafood Restaurant,Bakery,Beer Bar,Cocktail Bar,Gastropub,Gym,Hotel,Art Gallery,Breakfast Spot,Cheese Shop
1,Brockton,Bar,Park,Vietnamese Restaurant,Art Gallery,Auto Workshop,Bakery,Boutique,Bus Stop,Café,Coffee Shop,Dive Bar,French Restaurant,Gastropub,Grocery Store,Jazz Club
2,CN Tower,Hotel,Coffee Shop,Pizza Place,Bar,Baseball Stadium,Scenic Lookout,Aquarium,Food & Drink Shop,Gym,Ice Cream Shop,Beer Store,Bistro,Breakfast Spot,Brewery,Cable Car
3,Central Bay Street,Coffee Shop,Bookstore,Café,Sandwich Place,Bubble Tea Shop,Chinese Restaurant,Italian Restaurant,Middle Eastern Restaurant,Bank,Burger Joint,Burrito Place,Clothing Store,Comic Shop,Creperie,Deli / Bodega
4,Christie,Korean Restaurant,Coffee Shop,Café,Cocktail Bar,Dessert Shop,Grocery Store,Ice Cream Shop,Indian Restaurant,Karaoke Bar,Mexican Restaurant,Pub,Sandwich Place,American Restaurant,BBQ Joint,Bubble Tea Shop
5,Church and Wellesley,Coffee Shop,Grocery Store,Pizza Place,Bakery,Bank,Bar,Bike Rental / Bike Share,Bistro,Breakfast Spot,Caribbean Restaurant,Convenience Store,Diner,Filipino Restaurant,Food & Drink Shop,Hotel
6,Commerce Court,Coffee Shop,Restaurant,Hotel,Seafood Restaurant,American Restaurant,Beer Bar,Café,Gastropub,Gym,Italian Restaurant,Japanese Restaurant,Art Gallery,Bakery,Breakfast Spot,Cocktail Bar
7,Davisville,Italian Restaurant,Sushi Restaurant,Coffee Shop,Café,Furniture / Home Store,Ice Cream Shop,Pharmacy,Pub,Sporting Goods Shop,Asian Restaurant,Bank,Bar,Convenience Store,Deli / Bodega,Flower Shop
8,Davisville North,Italian Restaurant,Sushi Restaurant,Coffee Shop,Café,Furniture / Home Store,Ice Cream Shop,Pharmacy,Pub,Sporting Goods Shop,Asian Restaurant,Bank,Bar,Convenience Store,Deli / Bodega,Flower Shop
9,Dufferin,Bar,Bakery,Coffee Shop,Beer Store,Café,Cocktail Bar,Mexican Restaurant,Restaurant,Sandwich Place,Vietnamese Restaurant,African Restaurant,Antique Shop,Beer Bar,Department Store,Diner


## 10. Cluster Neighborhoods.
Run k-means to cluster the neighborhood into 4 clusters.

In [249]:
clusters = 4

toronto_clust = toronto_freq.drop('Neighbourhood', axis = 1)

# run k-means clustering
kmeans = KMeans(n_clusters = clusters, random_state=0).fit(toronto_clust)


kmeans.labels_[0:10]

array([1, 0, 1, 1, 0, 1, 1, 1, 1, 0])

In [234]:
commondf['clusters'] = kmeans.labels_

In [235]:
commondf

Unnamed: 0,Neighbourhood,CommonVenue1,CommonVenue2,CommonVenue3,CommonVenue4,CommonVenue5,CommonVenue6,CommonVenue7,CommonVenue8,CommonVenue9,CommonVenue10,CommonVenue11,CommonVenue12,CommonVenue13,CommonVenue14,CommonVenue15,clusters
0,Berczy Park,Coffee Shop,Restaurant,Café,Italian Restaurant,Japanese Restaurant,Seafood Restaurant,Bakery,Beer Bar,Cocktail Bar,Gastropub,Gym,Hotel,Art Gallery,Breakfast Spot,Cheese Shop,1
1,Brockton,Bar,Park,Vietnamese Restaurant,Art Gallery,Auto Workshop,Bakery,Boutique,Bus Stop,Café,Coffee Shop,Dive Bar,French Restaurant,Gastropub,Grocery Store,Jazz Club,0
2,CN Tower,Hotel,Coffee Shop,Pizza Place,Bar,Baseball Stadium,Scenic Lookout,Aquarium,Food & Drink Shop,Gym,Ice Cream Shop,Beer Store,Bistro,Breakfast Spot,Brewery,Cable Car,1
3,Central Bay Street,Coffee Shop,Bookstore,Café,Sandwich Place,Bubble Tea Shop,Chinese Restaurant,Italian Restaurant,Middle Eastern Restaurant,Bank,Burger Joint,Burrito Place,Clothing Store,Comic Shop,Creperie,Deli / Bodega,1
4,Christie,Korean Restaurant,Coffee Shop,Café,Cocktail Bar,Dessert Shop,Grocery Store,Ice Cream Shop,Indian Restaurant,Karaoke Bar,Mexican Restaurant,Pub,Sandwich Place,American Restaurant,BBQ Joint,Bubble Tea Shop,0
5,Church and Wellesley,Coffee Shop,Grocery Store,Pizza Place,Bakery,Bank,Bar,Bike Rental / Bike Share,Bistro,Breakfast Spot,Caribbean Restaurant,Convenience Store,Diner,Filipino Restaurant,Food & Drink Shop,Hotel,1
6,Commerce Court,Coffee Shop,Restaurant,Hotel,Seafood Restaurant,American Restaurant,Beer Bar,Café,Gastropub,Gym,Italian Restaurant,Japanese Restaurant,Art Gallery,Bakery,Breakfast Spot,Cocktail Bar,1
7,Davisville,Italian Restaurant,Sushi Restaurant,Coffee Shop,Café,Furniture / Home Store,Ice Cream Shop,Pharmacy,Pub,Sporting Goods Shop,Asian Restaurant,Bank,Bar,Convenience Store,Deli / Bodega,Flower Shop,1
8,Davisville North,Italian Restaurant,Sushi Restaurant,Coffee Shop,Café,Furniture / Home Store,Ice Cream Shop,Pharmacy,Pub,Sporting Goods Shop,Asian Restaurant,Bank,Bar,Convenience Store,Deli / Bodega,Flower Shop,1
9,Dufferin,Bar,Bakery,Coffee Shop,Beer Store,Café,Cocktail Bar,Mexican Restaurant,Restaurant,Sandwich Place,Vietnamese Restaurant,African Restaurant,Antique Shop,Beer Bar,Department Store,Diner,0


In [236]:
toronto_final = toronto_df_new

toronto_final = toronto_final.join(commondf.set_index('Neighbourhood'), on='Neighborhood')
toronto_final.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,CommonVenue1,CommonVenue2,CommonVenue3,CommonVenue4,CommonVenue5,...,CommonVenue7,CommonVenue8,CommonVenue9,CommonVenue10,CommonVenue11,CommonVenue12,CommonVenue13,CommonVenue14,CommonVenue15,clusters
0,M5A,Downtown Toronto,Regent Park,43.660706,-79.3605,Coffee Shop,Restaurant,Thai Restaurant,Animal Shelter,Auto Dealership,...,Electronics Store,Fast Food Restaurant,Food Truck,Grocery Store,Indian Restaurant,Park,Performing Arts Venue,Pet Store,Pharmacy,1
1,M7A,Downtown Toronto,Queen's Park,43.659659,-79.3903,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Bank,...,Bubble Tea Shop,French Restaurant,Japanese Restaurant,Restaurant,Thai Restaurant,Bar,Burrito Place,Chinese Restaurant,College Auditorium,1
2,M5B,Downtown Toronto,Garden District,43.6565,-79.3771,Clothing Store,Coffee Shop,Hotel,Restaurant,Bookstore,...,Cosmetics Shop,Electronics Store,Fast Food Restaurant,Japanese Restaurant,Lingerie Store,Pizza Place,Ramen Restaurant,Sandwich Place,Theater,1
3,M5C,Downtown Toronto,St. James Town,43.669403,-79.3727,Coffee Shop,Pizza Place,Café,Grocery Store,Gym / Fitness Center,...,Bank,Bar,Beer Store,Bike Rental / Bike Share,Bistro,Breakfast Spot,Caribbean Restaurant,Diner,Filipino Restaurant,1
4,M4E,East Toronto,The Beaches,43.671024,-79.2967,Beach,Pizza Place,Bar,Breakfast Spot,Japanese Restaurant,...,Pub,BBQ Joint,Bakery,Bank,Burger Joint,Café,Chocolate Shop,Coffee Shop,Dance Studio,0


In [237]:
toronto_final.shape

(37, 21)

## 11. Plotting the final neighborhood of Toronto Map

In [250]:
import matplotlib.cm as cm
import matplotlib.colors as colors
map_clusters  = folium.Map(location=[latitude, longitude], zoom_start=12)
# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
# add markers to map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_final['Latitude'], toronto_final['Longitude'], toronto_final['Borough'], toronto_final['clusters']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## 12. Examine Clusters


In [240]:
toronto_final

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,CommonVenue1,CommonVenue2,CommonVenue3,CommonVenue4,CommonVenue5,...,CommonVenue7,CommonVenue8,CommonVenue9,CommonVenue10,CommonVenue11,CommonVenue12,CommonVenue13,CommonVenue14,CommonVenue15,clusters
0,M5A,Downtown Toronto,Regent Park,43.660706,-79.3605,Coffee Shop,Restaurant,Thai Restaurant,Animal Shelter,Auto Dealership,...,Electronics Store,Fast Food Restaurant,Food Truck,Grocery Store,Indian Restaurant,Park,Performing Arts Venue,Pet Store,Pharmacy,1
1,M7A,Downtown Toronto,Queen's Park,43.659659,-79.3903,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Bank,...,Bubble Tea Shop,French Restaurant,Japanese Restaurant,Restaurant,Thai Restaurant,Bar,Burrito Place,Chinese Restaurant,College Auditorium,1
2,M5B,Downtown Toronto,Garden District,43.6565,-79.3771,Clothing Store,Coffee Shop,Hotel,Restaurant,Bookstore,...,Cosmetics Shop,Electronics Store,Fast Food Restaurant,Japanese Restaurant,Lingerie Store,Pizza Place,Ramen Restaurant,Sandwich Place,Theater,1
3,M5C,Downtown Toronto,St. James Town,43.669403,-79.3727,Coffee Shop,Pizza Place,Café,Grocery Store,Gym / Fitness Center,...,Bank,Bar,Beer Store,Bike Rental / Bike Share,Bistro,Breakfast Spot,Caribbean Restaurant,Diner,Filipino Restaurant,1
4,M4E,East Toronto,The Beaches,43.671024,-79.2967,Beach,Pizza Place,Bar,Breakfast Spot,Japanese Restaurant,...,Pub,BBQ Joint,Bakery,Bank,Burger Joint,Café,Chocolate Shop,Coffee Shop,Dance Studio,0
5,M5E,Downtown Toronto,Berczy Park,43.647984,-79.3754,Coffee Shop,Restaurant,Café,Italian Restaurant,Japanese Restaurant,...,Bakery,Beer Bar,Cocktail Bar,Gastropub,Gym,Hotel,Art Gallery,Breakfast Spot,Cheese Shop,1
6,M5G,Downtown Toronto,Central Bay Street,43.659756,-79.3854,Coffee Shop,Bookstore,Café,Sandwich Place,Bubble Tea Shop,...,Italian Restaurant,Middle Eastern Restaurant,Bank,Burger Joint,Burrito Place,Clothing Store,Comic Shop,Creperie,Deli / Bodega,1
7,M6G,Downtown Toronto,Christie,43.664111,-79.4184,Korean Restaurant,Coffee Shop,Café,Cocktail Bar,Dessert Shop,...,Ice Cream Shop,Indian Restaurant,Karaoke Bar,Mexican Restaurant,Pub,Sandwich Place,American Restaurant,BBQ Joint,Bubble Tea Shop,0
8,M5H,Downtown Toronto,Richmond,43.648587,-79.3914,Coffee Shop,Café,Clothing Store,Hotel,Italian Restaurant,...,Vegetarian / Vegan Restaurant,Arts & Crafts Store,Bar,Beer Bar,Bookstore,Burrito Place,Concert Hall,Cosmetics Shop,Event Space,1
9,M6H,West Toronto,Dufferin,43.660202,-79.4357,Bar,Bakery,Coffee Shop,Beer Store,Café,...,Mexican Restaurant,Restaurant,Sandwich Place,Vietnamese Restaurant,African Restaurant,Antique Shop,Beer Bar,Department Store,Diner,0


### Cluster 0

In [251]:
# Cluster 0
toronto_final.loc[toronto_final['clusters'] == 0, toronto_final.columns[[1] + [2] + list(range(5, toronto_final.shape[1] - 1))]]


Unnamed: 0,Borough,Neighborhood,CommonVenue1,CommonVenue2,CommonVenue3,CommonVenue4,CommonVenue5,CommonVenue6,CommonVenue7,CommonVenue8,CommonVenue9,CommonVenue10,CommonVenue11,CommonVenue12,CommonVenue13,CommonVenue14,CommonVenue15
4,East Toronto,The Beaches,Beach,Pizza Place,Bar,Breakfast Spot,Japanese Restaurant,Park,Pub,BBQ Joint,Bakery,Bank,Burger Joint,Café,Chocolate Shop,Coffee Shop,Dance Studio
7,Downtown Toronto,Christie,Korean Restaurant,Coffee Shop,Café,Cocktail Bar,Dessert Shop,Grocery Store,Ice Cream Shop,Indian Restaurant,Karaoke Bar,Mexican Restaurant,Pub,Sandwich Place,American Restaurant,BBQ Joint,Bubble Tea Shop
9,West Toronto,Dufferin,Bar,Bakery,Coffee Shop,Beer Store,Café,Cocktail Bar,Mexican Restaurant,Restaurant,Sandwich Place,Vietnamese Restaurant,African Restaurant,Antique Shop,Beer Bar,Department Store,Diner
11,West Toronto,Little Portugal,Bar,Café,Coffee Shop,Bakery,Cocktail Bar,Korean Restaurant,Restaurant,Athletics & Sports,Boutique,Breakfast Spot,Dive Bar,French Restaurant,Grocery Store,Health & Beauty Service,Italian Restaurant
14,West Toronto,Brockton,Bar,Park,Vietnamese Restaurant,Art Gallery,Auto Workshop,Bakery,Boutique,Bus Stop,Café,Coffee Shop,Dive Bar,French Restaurant,Gastropub,Grocery Store,Jazz Club
15,East Toronto,India Bazaar,Indian Restaurant,Grocery Store,Café,Art Gallery,Asian Restaurant,Bar,Brewery,Bus Line,Convenience Store,Diner,Donut Shop,Egyptian Restaurant,Gym,Indian Chinese Restaurant,Indie Theater
23,Central Toronto,North Toronto West,Bar,Vegetarian / Vegan Restaurant,Asian Restaurant,Café,Coffee Shop,Men's Store,Restaurant,Theater,Vietnamese Restaurant,American Restaurant,Art Gallery,Bakery,Beer Bar,Beer Store,Brewery
25,West Toronto,Parkdale,Tibetan Restaurant,Bakery,Bar,Diner,Indian Restaurant,Pharmacy,Pizza Place,Restaurant,Accessories Store,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Bank,Beach Bar
30,Downtown Toronto,Kensington Market,Café,Coffee Shop,Vegetarian / Vegan Restaurant,Bar,Mexican Restaurant,Vietnamese Restaurant,Bakery,Breakfast Spot,Caribbean Restaurant,Comfort Food Restaurant,Dessert Shop,Farmers Market,Grocery Store,Nightclub,Pizza Place


### Cluster 1

In [243]:
# Cluster 1
toronto_final.loc[toronto_final['clusters'] == 1, toronto_final.columns[[1] + [2] + list(range(5, toronto_final.shape[1] - 1))]]

Unnamed: 0,Borough,Neighborhood,CommonVenue1,CommonVenue2,CommonVenue3,CommonVenue4,CommonVenue5,CommonVenue6,CommonVenue7,CommonVenue8,CommonVenue9,CommonVenue10,CommonVenue11,CommonVenue12,CommonVenue13,CommonVenue14,CommonVenue15
0,Downtown Toronto,Regent Park,Coffee Shop,Restaurant,Thai Restaurant,Animal Shelter,Auto Dealership,Beer Store,Electronics Store,Fast Food Restaurant,Food Truck,Grocery Store,Indian Restaurant,Park,Performing Arts Venue,Pet Store,Pharmacy
1,Downtown Toronto,Queen's Park,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Bank,Bookstore,Bubble Tea Shop,French Restaurant,Japanese Restaurant,Restaurant,Thai Restaurant,Bar,Burrito Place,Chinese Restaurant,College Auditorium
2,Downtown Toronto,Garden District,Clothing Store,Coffee Shop,Hotel,Restaurant,Bookstore,Café,Cosmetics Shop,Electronics Store,Fast Food Restaurant,Japanese Restaurant,Lingerie Store,Pizza Place,Ramen Restaurant,Sandwich Place,Theater
3,Downtown Toronto,St. James Town,Coffee Shop,Pizza Place,Café,Grocery Store,Gym / Fitness Center,Bakery,Bank,Bar,Beer Store,Bike Rental / Bike Share,Bistro,Breakfast Spot,Caribbean Restaurant,Diner,Filipino Restaurant
5,Downtown Toronto,Berczy Park,Coffee Shop,Restaurant,Café,Italian Restaurant,Japanese Restaurant,Seafood Restaurant,Bakery,Beer Bar,Cocktail Bar,Gastropub,Gym,Hotel,Art Gallery,Breakfast Spot,Cheese Shop
6,Downtown Toronto,Central Bay Street,Coffee Shop,Bookstore,Café,Sandwich Place,Bubble Tea Shop,Chinese Restaurant,Italian Restaurant,Middle Eastern Restaurant,Bank,Burger Joint,Burrito Place,Clothing Store,Comic Shop,Creperie,Deli / Bodega
8,Downtown Toronto,Richmond,Coffee Shop,Café,Clothing Store,Hotel,Italian Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Arts & Crafts Store,Bar,Beer Bar,Bookstore,Burrito Place,Concert Hall,Cosmetics Shop,Event Space
10,Downtown Toronto,Harbourfront East,Coffee Shop,Café,Hotel,Restaurant,Italian Restaurant,Pizza Place,Bank,Brewery,Chinese Restaurant,Fried Chicken Joint,Gym,History Museum,Music Venue,Park,Plaza
12,East Toronto,The Danforth West,Coffee Shop,Grocery Store,Bus Line,Skating Rink,Athletics & Sports,BBQ Joint,Bank,Baseball Field,Café,Caribbean Restaurant,Construction & Landscaping,Dance Studio,Fish & Chips Shop,Fried Chicken Joint,Indian Restaurant
13,Downtown Toronto,Toronto Dominion Centre,Coffee Shop,Hotel,Café,American Restaurant,Restaurant,Asian Restaurant,Japanese Restaurant,Salad Place,Seafood Restaurant,Beer Bar,Breakfast Spot,Concert Hall,Deli / Bodega,Gastropub,Gym


### Cluster 2

In [244]:
# Cluster 2
toronto_final.loc[toronto_final['clusters'] == 2, toronto_final.columns[[1] + [2] + list(range(5, toronto_final.shape[1] - 1))]]

Unnamed: 0,Borough,Neighborhood,CommonVenue1,CommonVenue2,CommonVenue3,CommonVenue4,CommonVenue5,CommonVenue6,CommonVenue7,CommonVenue8,CommonVenue9,CommonVenue10,CommonVenue11,CommonVenue12,CommonVenue13,CommonVenue14,CommonVenue15
33,Downtown Toronto,Rosedale,Park,Bike Trail,Playground,Accessories Store,African Restaurant,American Restaurant,Animal Shelter,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,Auto Workshop


### Cluster 3

In [246]:
# Cluster 3
toronto_final.loc[toronto_final['clusters'] == 3, toronto_final.columns[[1] + [2] + list(range(5, toronto_final.shape[1] - 1))]]

Unnamed: 0,Borough,Neighborhood,CommonVenue1,CommonVenue2,CommonVenue3,CommonVenue4,CommonVenue5,CommonVenue6,CommonVenue7,CommonVenue8,CommonVenue9,CommonVenue10,CommonVenue11,CommonVenue12,CommonVenue13,CommonVenue14,CommonVenue15
29,Central Toronto,Moore Park,Gym,Park,Tennis Court,Trail,Accessories Store,African Restaurant,American Restaurant,Animal Shelter,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership


## 13. Observations:
Most of the neighborhoods fall into Cluster 0 which are mostly bars, Cluster 1 has most common single area of coffee shop, Cluster 2 and Cluster 3 have parks and gym.