In [90]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [3]:
res=requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

In [4]:
soup=BeautifulSoup(res.text,'lxml')

In [5]:
postal=[]
borough=[]
neighborhoods=[]

In [6]:
for rows in soup.find('table').find_all('tr'):
    data=rows.find_all('td')
    if(len(data)>0):
        postal.append(data[0].text)
        borough.append(data[1].text)
        neighborhoods.append(data[2].text)
print(len(postal),len(borough),len(neighborhoods))

288 288 288


In [7]:
print(postal[:5],borough[:5],neighborhoods[:5])

['M1A', 'M2A', 'M3A', 'M4A', 'M5A'] ['Not assigned', 'Not assigned', 'North York', 'North York', 'Downtown Toronto'] ['Not assigned\n', 'Not assigned\n', 'Parkwoods\n', 'Victoria Village\n', 'Harbourfront\n']


In [8]:
tdf=pd.DataFrame()

In [9]:
tdf['PostalCode']=postal

In [10]:
tdf['Borough']=borough

In [11]:
tdf['Neighborhood']=neighborhoods

In [12]:
tdf.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned\n
1,M2A,Not assigned,Not assigned\n
2,M3A,North York,Parkwoods\n
3,M4A,North York,Victoria Village\n
4,M5A,Downtown Toronto,Harbourfront\n


### Now let's remove rows with Borough 'Not assigned'

In [13]:
df=tdf[tdf['Borough']!='Not assigned']

In [14]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods\n
3,M4A,North York,Victoria Village\n
4,M5A,Downtown Toronto,Harbourfront\n
5,M5A,Downtown Toronto,Regent Park\n
6,M6A,North York,Lawrence Heights\n


In [15]:
dff=df.groupby(['PostalCode','Borough'],as_index=False).sum()

In [16]:
dff.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,Rouge\nMalvern\n
1,M1C,Scarborough,Highland Creek\nRouge Hill\nPort Union\n
2,M1E,Scarborough,Guildwood\nMorningside\nWest Hill\n
3,M1G,Scarborough,Woburn\n
4,M1H,Scarborough,Cedarbrae\n


In [17]:
li=dff['Neighborhood']
for i in range(0,len(li)):
    li[i]=li[i].replace('\n',', ')
    li[i]=li[i].rstrip(', ')

In [18]:
li[0:50]

0                                        Rouge, Malvern
1                Highland Creek, Rouge Hill, Port Union
2                     Guildwood, Morningside, West Hill
3                                                Woburn
4                                             Cedarbrae
5                                   Scarborough Village
6           East Birchmount Park, Ionview, Kennedy Park
7                       Clairlea, Golden Mile, Oakridge
8       Cliffcrest, Cliffside, Scarborough Village West
9                           Birch Cliff, Cliffside West
10    Dorset Park, Scarborough Town Centre, Wexford ...
11                                    Maryvale, Wexford
12                                            Agincourt
13              Clarks Corners, Sullivan, Tam O'Shanter
14    Agincourt North, L'Amoreaux East, Milliken, St...
15                                      L'Amoreaux West
16                                          Upper Rouge
17                                    Hillcrest 

In [19]:
dff.drop(columns=['Neighborhood'])

Unnamed: 0,PostalCode,Borough
0,M1B,Scarborough
1,M1C,Scarborough
2,M1E,Scarborough
3,M1G,Scarborough
4,M1H,Scarborough
5,M1J,Scarborough
6,M1K,Scarborough
7,M1L,Scarborough
8,M1M,Scarborough
9,M1N,Scarborough


In [20]:
dff['Neighborhood']=li

In [21]:
dff.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [22]:
dff[dff['PostalCode']=='M5A']

Unnamed: 0,PostalCode,Borough,Neighborhood
53,M5A,Downtown Toronto,"Harbourfront, Regent Park"


In [23]:
dff.shape

(103, 3)

### Getting latitude and longitude values for each neighborhood

In [24]:
from geopy.geocoders import Nominatim

In [25]:
import folium


In [26]:
postal_codes=dff.PostalCode

In [27]:
coords=pd.read_csv('Geospatial_Coordinates.csv')

In [28]:
coords.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [29]:
pc2=coords['Postal Code']

In [30]:
pc1=dff['PostalCode']

In [31]:
(pc1==pc2).value_counts()

True    103
dtype: int64

### So both of them have same postal codes...now we can merge them

In [32]:
df_merged=pd.concat([dff,coords],axis=1)

In [33]:
df_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",M1C,43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476


In [34]:
df_merged.drop(columns=['Postal Code'],inplace=True)

In [35]:
df_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [36]:
CLIENT_ID = 'PCCHO4J0AGUYXHW1W10DK5YU4T4DE2V1BWFOMB53KAUNOMTL' 
CLIENT_SECRET = '2VZPPKEWQQVWZ4ZCSZ1OVYY1CVBOMQSOPZINC0YSAOZX4WUJ' 
VERSION = '20190713' 

In [39]:
df_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [40]:
dff=df_merged.drop(columns=['PostalCode','Borough'])

In [41]:
dff.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,"Rouge, Malvern",43.806686,-79.194353
1,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,Woburn,43.770992,-79.216917
4,Cedarbrae,43.773136,-79.239476


In [56]:
def getvenues(names, latitudes, longitudes, radius=500):
    limit=75
    radius=500
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET,VERSION, lat, lng, radius, limit)
            
        
        try:
            results = requests.get(url).json()["response"]['groups'][0]['items']
        except:
            continue
        
        venues_list.append([(
            name, lat, lng,v['venue']['name'], v['venue']['location']['lat'],v['venue']['location']['lng'],v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 'Neighborhood Latitude','Neighborhood Longitude','Venue','Venue Latitude','Venue Longitude','Venue Category']
    
    return(nearby_venues)

In [57]:
required_venues_t=getvenues(dff['Neighborhood'],dff['Latitude'],dff['Longitude'])

Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens, Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West, 

In [58]:
required_venues_t.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,RIGHT WAY TO GOLF,43.785177,-79.161108,Golf Course
2,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
3,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store


In [59]:
required_venues_t.shape

(2020, 7)

In [60]:
required_venues_t.groupby(['Neighborhood']).count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",75,75,75,75,75,75
Agincourt,4,4,4,4,4,4
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",2,2,2,2,2,2
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",11,11,11,11,11,11
"Alderwood, Long Branch",8,8,8,8,8,8
"Bathurst Manor, Downsview North, Wilson Heights",21,21,21,21,21,21
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",22,22,22,22,22,22
Berczy Park,58,58,58,58,58,58
"Birch Cliff, Cliffside West",4,4,4,4,4,4


In [63]:
required_venues_t['Venue Category'].value_counts()

Coffee Shop                      166
Café                              93
Restaurant                        57
Pizza Place                       56
Park                              50
Italian Restaurant                44
Bakery                            43
Bar                               41
Sandwich Place                    38
Fast Food Restaurant              34
Hotel                             33
Japanese Restaurant               29
Gym                               27
American Restaurant               25
Gastropub                         25
Sushi Restaurant                  24
Clothing Store                    24
Grocery Store                     24
Breakfast Spot                    22
Burger Joint                      21
Steakhouse                        21
Seafood Restaurant                21
Chinese Restaurant                21
Pharmacy                          21
Pub                               20
Thai Restaurant                   20
Vegetarian / Vegan Restaurant     19
D

In [64]:
required_venues_t['Venue Category'].value_counts()[60]

9

In [66]:
len(required_venues_t['Venue Category'].unique())

267

##### There are 267 different types of venues in Toronto


In [67]:
t_dummies=pd.get_dummies(required_venues_t['Venue Category'])

In [69]:
t_dummies

Unnamed: 0,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [70]:
t_dummies['Neighborhood']=required_venues_t['Neighborhood']

In [72]:
t_dummies['Neighborhood'].head()

0                            Rouge, Malvern
1    Highland Creek, Rouge Hill, Port Union
2    Highland Creek, Rouge Hill, Port Union
3         Guildwood, Morningside, West Hill
4         Guildwood, Morningside, West Hill
Name: Neighborhood, dtype: object

In [73]:
t_dummies.columns.get_loc("Neighborhood")

187

In [76]:
t_dummies.iloc[:,187].head()

0                            Rouge, Malvern
1    Highland Creek, Rouge Hill, Port Union
2    Highland Creek, Rouge Hill, Port Union
3         Guildwood, Morningside, West Hill
4         Guildwood, Morningside, West Hill
Name: Neighborhood, dtype: object

In [77]:
cols=[t_dummies.columns[187]]+list(t_dummies.columns[:187])+list(t_dummies.columns[188:])

In [78]:
updated_t_dummies=t_dummies[cols]

In [79]:
updated_t_dummies.head()

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Highland Creek, Rouge Hill, Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Highland Creek, Rouge Hill, Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [82]:
t_dummies.shape, updated_t_dummies.shape

((2020, 267), (2020, 267))

#### All set!

In [83]:
t_venue_by_neighborhood_grouped=updated_t_dummies.groupby(['Neighborhood']).mean()

In [84]:
t_venue_by_neighborhood_grouped

Unnamed: 0_level_0,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Adelaide, King, Richmond",0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.053333,0.0,...,0.0,0.013333,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Agincourt,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.0,0.000000,0.000000,0.090909,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
"Alderwood, Long Branch",0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
"Bathurst Manor, Downsview North, Wilson Heights",0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.0,0.000000,0.000000,0.047619,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Bayview Village,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
"Bedford Park, Lawrence Manor East",0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.045455,0.0,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Berczy Park,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.0,0.017241,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
"Birch Cliff, Cliffside West",0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


#### Neighborhood is serving as the index...Lets reset it!

In [85]:
t_venue_by_neighborhood_grouped.reset_index(inplace=True)

In [86]:
t_venue_by_neighborhood_grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.053333,...,0.0,0.013333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0
4,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [87]:
t_venue_by_neighborhood_grouped.shape

(99, 267)

In [88]:
def get_common_venues(row, n):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:n]

In [131]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']


columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))


neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = t_venue_by_neighborhood_grouped['Neighborhood']

for ind in np.arange(t_venue_by_neighborhood_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = get_common_venues(t_venue_by_neighborhood_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,American Restaurant,Café,Steakhouse,Asian Restaurant,Bar,Sushi Restaurant,Pizza Place,Gym,Hotel
1,Agincourt,Lounge,Clothing Store,Skating Rink,Breakfast Spot,Yoga Studio,Doner Restaurant,Diner,Discount Store,Dive Bar,Dog Run
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Park,Playground,Dog Run,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Doner Restaurant
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Pizza Place,Video Store,Beer Store,Fast Food Restaurant,Sandwich Place,Fried Chicken Joint,Coffee Shop,Pharmacy,Colombian Restaurant
4,"Alderwood, Long Branch",Pizza Place,Gym,Coffee Shop,Skating Rink,Pharmacy,Pub,Sandwich Place,Diner,Department Store,Dessert Shop
5,"Bathurst Manor, Downsview North, Wilson Heights",Coffee Shop,Shopping Mall,Pharmacy,Supermarket,Sushi Restaurant,Fast Food Restaurant,Deli / Bodega,Restaurant,Fried Chicken Joint,Frozen Yogurt Shop
6,Bayview Village,Café,Japanese Restaurant,Bank,Chinese Restaurant,Yoga Studio,Dessert Shop,Diner,Discount Store,Dive Bar,Dog Run
7,"Bedford Park, Lawrence Manor East",Italian Restaurant,Coffee Shop,Pizza Place,Pharmacy,Café,Indian Restaurant,Sushi Restaurant,Pub,Restaurant,Fast Food Restaurant
8,Berczy Park,Coffee Shop,Cocktail Bar,Beer Bar,Farmers Market,Café,Cheese Shop,Seafood Restaurant,Steakhouse,Bakery,Jazz Club
9,"Birch Cliff, Cliffside West",General Entertainment,College Stadium,Café,Skating Rink,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run


In [132]:
t_for_clustering=t_venue_by_neighborhood_grouped.drop(columns=['Neighborhood'],axis=1)

In [133]:
t_for_clustering.head()

Unnamed: 0,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.053333,0.0,...,0.0,0.013333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [134]:
from sklearn.cluster import KMeans

In [135]:
kmeans = KMeans(n_clusters=6, random_state=0).fit(t_for_clustering)

In [136]:
kmeans

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=6, n_init=10, n_jobs=1, precompute_distances='auto',
    random_state=0, tol=0.0001, verbose=0)

In [137]:
len(kmeans.labels_)

99

In [138]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [139]:
neighborhoods_venues_sorted.head()

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,"Adelaide, King, Richmond",Coffee Shop,American Restaurant,Café,Steakhouse,Asian Restaurant,Bar,Sushi Restaurant,Pizza Place,Gym,Hotel
1,0,Agincourt,Lounge,Clothing Store,Skating Rink,Breakfast Spot,Yoga Studio,Doner Restaurant,Diner,Discount Store,Dive Bar,Dog Run
2,2,"Agincourt North, L'Amoreaux East, Milliken, St...",Park,Playground,Dog Run,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Doner Restaurant
3,0,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Pizza Place,Video Store,Beer Store,Fast Food Restaurant,Sandwich Place,Fried Chicken Joint,Coffee Shop,Pharmacy,Colombian Restaurant
4,0,"Alderwood, Long Branch",Pizza Place,Gym,Coffee Shop,Skating Rink,Pharmacy,Pub,Sandwich Place,Diner,Department Store,Dessert Shop


In [140]:
neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster Labels']==0]

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,"Adelaide, King, Richmond",Coffee Shop,American Restaurant,Café,Steakhouse,Asian Restaurant,Bar,Sushi Restaurant,Pizza Place,Gym,Hotel
1,0,Agincourt,Lounge,Clothing Store,Skating Rink,Breakfast Spot,Yoga Studio,Doner Restaurant,Diner,Discount Store,Dive Bar,Dog Run
3,0,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Pizza Place,Video Store,Beer Store,Fast Food Restaurant,Sandwich Place,Fried Chicken Joint,Coffee Shop,Pharmacy,Colombian Restaurant
4,0,"Alderwood, Long Branch",Pizza Place,Gym,Coffee Shop,Skating Rink,Pharmacy,Pub,Sandwich Place,Diner,Department Store,Dessert Shop
5,0,"Bathurst Manor, Downsview North, Wilson Heights",Coffee Shop,Shopping Mall,Pharmacy,Supermarket,Sushi Restaurant,Fast Food Restaurant,Deli / Bodega,Restaurant,Fried Chicken Joint,Frozen Yogurt Shop
6,0,Bayview Village,Café,Japanese Restaurant,Bank,Chinese Restaurant,Yoga Studio,Dessert Shop,Diner,Discount Store,Dive Bar,Dog Run
7,0,"Bedford Park, Lawrence Manor East",Italian Restaurant,Coffee Shop,Pizza Place,Pharmacy,Café,Indian Restaurant,Sushi Restaurant,Pub,Restaurant,Fast Food Restaurant
8,0,Berczy Park,Coffee Shop,Cocktail Bar,Beer Bar,Farmers Market,Café,Cheese Shop,Seafood Restaurant,Steakhouse,Bakery,Jazz Club
9,0,"Birch Cliff, Cliffside West",General Entertainment,College Stadium,Café,Skating Rink,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run
10,0,"Bloordale Gardens, Eringate, Markland Wood, Ol...",Park,Coffee Shop,Liquor Store,Beer Store,Convenience Store,Café,Pizza Place,Pharmacy,Dessert Shop,Eastern European Restaurant


In [141]:
neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster Labels']==1]

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
41,1,East Toronto,Park,Pizza Place,Coffee Shop,Convenience Store,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar
57,1,Humber Summit,Empanada Restaurant,Pizza Place,Dog Run,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Doner Restaurant
59,1,"Kingsview Village, Martin Grove Gardens, Richv...",Park,Bus Line,Pizza Place,Mobile Phone Shop,Dive Bar,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
79,1,Scarborough Village,Pizza Place,Playground,Convenience Store,Yoga Studio,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar
87,1,"The Junction North, Runnymede",Brewery,Grocery Store,Pizza Place,Bus Line,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar
91,1,Westmount,Pizza Place,Chinese Restaurant,Coffee Shop,Sandwich Place,Middle Eastern Restaurant,Yoga Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store


In [142]:
final=df_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

In [143]:
final['Cluster Labels'].value_counts()

0.0    77
2.0     9
1.0     6
5.0     4
4.0     2
3.0     1
Name: Cluster Labels, dtype: int64

In [144]:
down_t_m=folium.Map(location=[43.655115, -79.380219],zoom_start=12)
down_t_m

In [147]:
final.dropna(axis=0,inplace=True)

In [149]:
colors=['red','blue','green','black','yellow','pink','indigo']
for lat,lng,neighborhood,cl in zip(final['Latitude'],final['Longitude'],final['Neighborhood'],final['Cluster Labels']):
    rangu=colors[int(cl)+1]
    label='{}, {}'.format(neighborhood,cl)
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker([lat,lng],radius=5,popup=label,color=rangu,fill=True,fill_color='pink',pare_html=False).add_to(down_t_m)
    

In [150]:
down_t_m

In [152]:
final.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,5.0,Fast Food Restaurant,Yoga Studio,Department Store,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,4.0,Golf Course,Bar,Yoga Studio,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Donut Shop
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,0.0,Electronics Store,Mexican Restaurant,Rental Car Location,Pizza Place,Intersection,Medical Center,Breakfast Spot,Yoga Studio,Dim Sum Restaurant,Diner
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0.0,Coffee Shop,Korean Restaurant,Insurance Office,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dessert Shop,Donut Shop
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0.0,Hakka Restaurant,Fried Chicken Joint,Athletics & Sports,Bakery,Thai Restaurant,Bank,Caribbean Restaurant,Dim Sum Restaurant,Diner,Discount Store


In [159]:
import json
from pandas.io.json import json_normalize

In [160]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

In [161]:
newyork_data

{'type': 'FeatureCollection',
 'totalFeatures': 306,
 'features': [{'type': 'Feature',
   'id': 'nyu_2451_34572.1',
   'geometry': {'type': 'Point',
    'coordinates': [-73.84720052054902, 40.89470517661]},
   'geometry_name': 'geom',
   'properties': {'name': 'Wakefield',
    'stacked': 1,
    'annoline1': 'Wakefield',
    'annoline2': None,
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.84720052054902,
     40.89470517661,
     -73.84720052054902,
     40.89470517661]}},
  {'type': 'Feature',
   'id': 'nyu_2451_34572.2',
   'geometry': {'type': 'Point',
    'coordinates': [-73.82993910812398, 40.87429419303012]},
   'geometry_name': 'geom',
   'properties': {'name': 'Co-op City',
    'stacked': 2,
    'annoline1': 'Co-op',
    'annoline2': 'City',
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.82993910812398,
     40.87429419303012,
     -73.82993910812398,
     40.87429419303012]}},
  {'type': 'Feature',
 

In [167]:
neighborhoods_data = newyork_data['features']

In [168]:
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

neighborhoods = pd.DataFrame(columns=column_names)

In [169]:
neighborhoods

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude


In [170]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [171]:
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [172]:
neighborhoods.shape

(306, 4)

In [174]:
def getvenues(names, latitudes, longitudes, radius=500):
    limit=75
    radius=500
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET,VERSION, lat, lng, radius, limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, lat, lng,v['venue']['name'], v['venue']['location']['lat'],v['venue']['location']['lng'],v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 'Neighborhood Latitude','Neighborhood Longitude','Venue','Venue Latitude','Venue Longitude','Venue Category']
    
    return(nearby_venues)

In [175]:
ny_venues=getvenues(names=neighborhoods['Neighborhood'],latitudes=neighborhoods['Latitude'],longitudes=neighborhoods['Longitude'])


Wakefield
Co-op City
Eastchester
Fieldston
Riverdale
Kingsbridge
Marble Hill
Woodlawn
Norwood
Williamsbridge
Baychester
Pelham Parkway
City Island
Bedford Park
University Heights
Morris Heights
Fordham
East Tremont
West Farms
High  Bridge
Melrose
Mott Haven
Port Morris
Longwood
Hunts Point
Morrisania
Soundview
Clason Point
Throgs Neck
Country Club
Parkchester
Westchester Square
Van Nest
Morris Park
Belmont
Spuyten Duyvil
North Riverdale
Pelham Bay
Schuylerville
Edgewater Park
Castle Hill
Olinville
Pelham Gardens
Concourse
Unionport
Edenwald
Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heights
East Flatbush
Kensington
Windsor Terrace
Prospect Heights
Brownsville
Williamsburg
Bushwick
Bedford Stuyvesant
Brooklyn Heights
Cobble Hill
Carroll Gardens
Red Hook
Gowanus
Fort Greene
Park Slope
Cypress Hills
East New York
Starrett City
Canarsie
Flatlands
Mill Island
Manhattan Beach
Coney Island
Bath Beach
Borough Park
Dyker

In [177]:
ny_venues.shape

(9391, 7)

In [180]:
ny_venues['Venue Category'].value_counts()

Pizza Place                        408
Italian Restaurant                 271
Deli / Bodega                      259
Coffee Shop                        227
Chinese Restaurant                 205
Bakery                             204
Bar                                180
Sandwich Place                     174
Pharmacy                           172
Donut Shop                         164
Park                               164
Mexican Restaurant                 158
Grocery Store                      155
American Restaurant                150
Café                               148
Bank                               141
Ice Cream Shop                     140
Gym                                114
Bagel Shop                         113
Gym / Fitness Center               110
Supermarket                        109
Sushi Restaurant                   102
Fast Food Restaurant                97
Spa                                 96
Hotel                               91
Cosmetics Shop           

#### Looks like there are more venues in NY than in Toronto

In [182]:
ny_dummies=pd.get_dummies(ny_venues['Venue Category'])
ny_dummies.head()

Unnamed: 0,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport Terminal,American Restaurant,Antique Shop,Arcade,Arepa Restaurant,Argentinian Restaurant,...,Waste Facility,Watch Shop,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [183]:
ny_dummies['Neighborhood']=ny_venues['Neighborhood']

In [184]:
ny_dummies.head()

Unnamed: 0,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport Terminal,American Restaurant,Antique Shop,Arcade,Arepa Restaurant,Argentinian Restaurant,...,Waste Facility,Watch Shop,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [186]:
ny_dummies.columns.get_loc("Neighborhood")

258

In [187]:
cols=[ny_dummies.columns[258]]+list(ny_dummies.columns[:258])+list(ny_dummies.columns[259:])

In [188]:
ny_n_v=ny_dummies[cols]

In [189]:
ny_n_v.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport Terminal,American Restaurant,Antique Shop,Arcade,Arepa Restaurant,...,Waste Facility,Watch Shop,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Wakefield,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Wakefield,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Wakefield,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Wakefield,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Wakefield,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [190]:
ny_venue_by_neighborhood=ny_n_v.groupby('Neighborhood').mean()

In [191]:
ny_venue_by_neighborhood.head()

Unnamed: 0_level_0,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport Terminal,American Restaurant,Antique Shop,Arcade,Arepa Restaurant,Argentinian Restaurant,...,Waste Facility,Watch Shop,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Allerton,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Annadale,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Arden Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Arlington,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Arrochar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [192]:
ny_venue_by_neighborhood.reset_index(inplace=True)

In [193]:
ny_venue_by_neighborhood.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport Terminal,American Restaurant,Antique Shop,Arcade,Arepa Restaurant,...,Waste Facility,Watch Shop,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Allerton,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Annadale,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Arden Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Arlington,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Arrochar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [194]:
ny_venue_by_neighborhood.shape

(300, 420)

In [195]:
def get_common_venues(row, n):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:n]

In [200]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']


columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))


ny_venues_sorted = pd.DataFrame(columns=columns)
ny_venues_sorted['Neighborhood'] = ny_venue_by_neighborhood['Neighborhood']

for ind in np.arange(t_venue_by_neighborhood_grouped.shape[0]):
    ny_venues_sorted.iloc[ind, 1:] = get_common_venues(ny_venue_by_neighborhood.iloc[ind, :], num_top_venues)

ny_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Allerton,Pizza Place,Supermarket,Spa,Deli / Bodega,Playground,Chinese Restaurant,Smoke Shop,Martial Arts Dojo,Fast Food Restaurant,Bakery
1,Annadale,Bakery,Sushi Restaurant,Restaurant,Liquor Store,Train Station,Food,Diner,Pizza Place,American Restaurant,Sports Bar
2,Arden Heights,Pharmacy,Deli / Bodega,Coffee Shop,Bus Stop,Pizza Place,Home Service,Filipino Restaurant,Exhibit,Eye Doctor,Factory
3,Arlington,Deli / Bodega,Intersection,Bus Stop,Boat or Ferry,Grocery Store,Yoga Studio,Flea Market,Falafel Restaurant,Farm,Farmers Market
4,Arrochar,Bus Stop,Bagel Shop,Italian Restaurant,Deli / Bodega,Middle Eastern Restaurant,Sandwich Place,Pizza Place,Athletics & Sports,Mediterranean Restaurant,Hotel
5,Arverne,Surf Spot,Sandwich Place,Metro Station,Board Shop,Pizza Place,Donut Shop,Playground,Coffee Shop,Beach,Bus Stop
6,Astoria,Bar,Greek Restaurant,Hookah Bar,Seafood Restaurant,Gourmet Shop,Bakery,Mediterranean Restaurant,Salon / Barbershop,Pub,Middle Eastern Restaurant
7,Astoria Heights,Italian Restaurant,Plaza,Supermarket,Bus Station,Bowling Alley,Food,Gourmet Shop,Chinese Restaurant,Bakery,Liquor Store
8,Auburndale,Bar,Supermarket,Pet Store,Pharmacy,Toy / Game Store,Fast Food Restaurant,Train,Miscellaneous Shop,Mobile Phone Shop,Athletics & Sports
9,Bath Beach,Pharmacy,Chinese Restaurant,Italian Restaurant,Kids Store,Fast Food Restaurant,Donut Shop,Pizza Place,Bubble Tea Shop,Sushi Restaurant,Coffee Shop


In [202]:
ny_upd=ny_venues_sorted.dropna(axis=0)

In [205]:
t_venue_by_neighborhood_grouped.shape

(99, 267)

In [206]:
ny_upd.shape

(99, 11)

In [227]:
ny_venues=ny_venue_by_neighborhood.columns[:]

In [228]:
ny_venues

Index(['Neighborhood', 'Accessories Store', 'Adult Boutique',
       'Afghan Restaurant', 'African Restaurant', 'Airport Terminal',
       'American Restaurant', 'Antique Shop', 'Arcade', 'Arepa Restaurant',
       ...
       'Waste Facility', 'Watch Shop', 'Waterfront', 'Weight Loss Center',
       'Whisky Bar', 'Wine Bar', 'Wine Shop', 'Wings Joint', 'Women's Store',
       'Yoga Studio'],
      dtype='object', length=420)

In [229]:
t_venues=t_venue_by_neighborhood_grouped.columns[:]

In [230]:
t_venues

Index(['Neighborhood', 'Accessories Store', 'Afghan Restaurant', 'Airport',
       'Airport Food Court', 'Airport Gate', 'Airport Lounge',
       'Airport Service', 'Airport Terminal', 'American Restaurant',
       ...
       'Train Station', 'Vegetarian / Vegan Restaurant', 'Video Game Store',
       'Video Store', 'Vietnamese Restaurant', 'Warehouse Store', 'Wine Bar',
       'Wings Joint', 'Women's Store', 'Yoga Studio'],
      dtype='object', length=267)

In [231]:
common_columns=set(t_venues).intersection(set(ny_venues))

In [232]:
len(common_columns)

236

So there are 235 common venue categories in both NY and Toronto

In [233]:
common_columns=list(common_columns)

In [234]:
t_to_merge=t_venue_by_neighborhood_grouped[common_columns]

In [235]:
ny_to_merge=ny_venue_by_neighborhood[common_columns]

In [240]:
ny_to_merge
t_to_merge.head()

Unnamed: 0,Bowling Alley,Drugstore,Steakhouse,Rental Car Location,Café,Boat or Ferry,Music Venue,Building,Dim Sum Restaurant,Baseball Field,...,Southern / Soul Food Restaurant,Sports Bar,Coworking Space,Beach,Church,French Restaurant,Salon / Barbershop,Cheese Shop,Flea Market,Arts & Crafts Store
0,0.0,0.0,0.053333,0.0,0.053333,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.013333,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [241]:
ny_to_merge.columns.get_loc("Neighborhood")

99

In [242]:
t_to_merge.columns.get_loc("Neighborhood")

99

In [244]:
ny_to_merge.shape,t_to_merge.shape

((300, 236), (99, 236))

In [245]:
almost_cluster_ready_df=ny_to_merge.append(t_to_merge)

In [246]:
almost_cluster_ready_df.shape

(399, 236)

In [248]:
almost_cluster_ready_df['Neighborhood'].head(10)

0           Allerton
1           Annadale
2      Arden Heights
3          Arlington
4           Arrochar
5            Arverne
6            Astoria
7    Astoria Heights
8         Auburndale
9         Bath Beach
Name: Neighborhood, dtype: object

In [249]:
almost_cluster_ready_df['Neighborhood'].tail(10)

89                   Thorncliffe Park
90                   Victoria Village
91                          Westmount
92                             Weston
93                   Willowdale South
94                    Willowdale West
95                             Woburn
96    Woodbine Gardens, Parkview Hill
97                   Woodbine Heights
98                    York Mills West
Name: Neighborhood, dtype: object

All set!

In [260]:
almost_cluster_ready_df.head()
almost_cluster_ready_df.columns.get_loc("Neighborhood")

99

In [261]:
cols=[almost_cluster_ready_df.columns[99]]+list(almost_cluster_ready_df.columns[:99])+list(almost_cluster_ready_df.columns[100:])

In [262]:
almost_cluster_ready_df=almost_cluster_ready_df[cols]

In [263]:
almost_cluster_ready_df.head()

Unnamed: 0,Neighborhood,Bowling Alley,Drugstore,Steakhouse,Rental Car Location,Café,Boat or Ferry,Music Venue,Building,Dim Sum Restaurant,...,Southern / Soul Food Restaurant,Sports Bar,Coworking Space,Beach,Church,French Restaurant,Salon / Barbershop,Cheese Shop,Flea Market,Arts & Crafts Store
0,Allerton,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Annadale,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Arden Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Arlington,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Arrochar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [264]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']


columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))


full_sorted = pd.DataFrame(columns=columns)
full_sorted['Neighborhood'] = almost_cluster_ready_df['Neighborhood']

for ind in np.arange(almost_cluster_ready_df.shape[0]):
    full_sorted.iloc[ind, 1:] = get_common_venues(almost_cluster_ready_df.iloc[ind, :], num_top_venues)

full_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Allerton,Pizza Place,Supermarket,Spa,Electronics Store,Breakfast Spot,Bakery,Discount Store,Martial Arts Dojo,Pharmacy,Intersection
1,Annadale,Bakery,Liquor Store,Sports Bar,Restaurant,Diner,Sushi Restaurant,American Restaurant,Train Station,Pizza Place,Food
2,Arden Heights,Coffee Shop,Pizza Place,Home Service,Deli / Bodega,Bus Stop,Pharmacy,Arts & Crafts Store,Cocktail Bar,Athletics & Sports,Yoga Studio
3,Arlington,Grocery Store,Deli / Bodega,Bus Stop,Boat or Ferry,Intersection,Toy / Game Store,Colombian Restaurant,Airport Terminal,Recording Studio,Garden
4,Arrochar,Bus Stop,Bagel Shop,Italian Restaurant,Deli / Bodega,Pizza Place,Hotel,Athletics & Sports,Sandwich Place,Supermarket,Middle Eastern Restaurant
5,Arverne,Sandwich Place,Metro Station,Coffee Shop,Bus Stop,Playground,Pizza Place,Thai Restaurant,Board Shop,Beach,Donut Shop
6,Astoria,Bar,Greek Restaurant,Hookah Bar,Seafood Restaurant,Dessert Shop,Ice Cream Shop,Latin American Restaurant,Indian Restaurant,Gourmet Shop,Bakery
7,Astoria Heights,Bowling Alley,Italian Restaurant,Chinese Restaurant,Playground,Pizza Place,Gourmet Shop,Bus Station,Supermarket,Bakery,Plaza
8,Auburndale,Noodle House,Italian Restaurant,Athletics & Sports,Mobile Phone Shop,Toy / Game Store,Supermarket,Hookah Bar,Discount Store,Furniture / Home Store,Miscellaneous Shop
9,Bath Beach,Pharmacy,Chinese Restaurant,Sushi Restaurant,Pizza Place,Donut Shop,Bubble Tea Shop,Italian Restaurant,Fast Food Restaurant,Park,Mobile Phone Shop


In [250]:
cluster_ready_df=almost_cluster_ready_df.drop(columns=['Neighborhood'])

In [251]:
kmeans=KMeans(n_clusters=4,random_state=0).fit(cluster_ready_df)

In [257]:
kmeans.labels_

array([3, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3,
       0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 1, 0, 3, 0, 0, 0, 0, 0,
       0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 3, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0,
       1, 0, 3, 0, 0, 3, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 3, 1, 3, 3, 0, 3, 0, 0, 3,
       0, 0, 0, 3, 3, 0, 0, 0, 3, 0, 0, 1, 0, 0, 3, 0, 0, 0, 3, 0, 3, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 3, 3, 0, 0, 0, 0,
       0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 3, 0, 0, 0,
       3, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0,
       0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 3, 3, 0, 0, 0, 0,

In [265]:
full_sorted['Cluster']=kmeans.labels_

In [266]:
full_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster
0,Allerton,Pizza Place,Supermarket,Spa,Electronics Store,Breakfast Spot,Bakery,Discount Store,Martial Arts Dojo,Pharmacy,Intersection,3
1,Annadale,Bakery,Liquor Store,Sports Bar,Restaurant,Diner,Sushi Restaurant,American Restaurant,Train Station,Pizza Place,Food,0
2,Arden Heights,Coffee Shop,Pizza Place,Home Service,Deli / Bodega,Bus Stop,Pharmacy,Arts & Crafts Store,Cocktail Bar,Athletics & Sports,Yoga Studio,3
3,Arlington,Grocery Store,Deli / Bodega,Bus Stop,Boat or Ferry,Intersection,Toy / Game Store,Colombian Restaurant,Airport Terminal,Recording Studio,Garden,1
4,Arrochar,Bus Stop,Bagel Shop,Italian Restaurant,Deli / Bodega,Pizza Place,Hotel,Athletics & Sports,Sandwich Place,Supermarket,Middle Eastern Restaurant,0
5,Arverne,Sandwich Place,Metro Station,Coffee Shop,Bus Stop,Playground,Pizza Place,Thai Restaurant,Board Shop,Beach,Donut Shop,0
6,Astoria,Bar,Greek Restaurant,Hookah Bar,Seafood Restaurant,Dessert Shop,Ice Cream Shop,Latin American Restaurant,Indian Restaurant,Gourmet Shop,Bakery,0
7,Astoria Heights,Bowling Alley,Italian Restaurant,Chinese Restaurant,Playground,Pizza Place,Gourmet Shop,Bus Station,Supermarket,Bakery,Plaza,0
8,Auburndale,Noodle House,Italian Restaurant,Athletics & Sports,Mobile Phone Shop,Toy / Game Store,Supermarket,Hookah Bar,Discount Store,Furniture / Home Store,Miscellaneous Shop,0
9,Bath Beach,Pharmacy,Chinese Restaurant,Sushi Restaurant,Pizza Place,Donut Shop,Bubble Tea Shop,Italian Restaurant,Fast Food Restaurant,Park,Mobile Phone Shop,0


In [267]:
full_sorted['Cluster'].value_counts()

0    318
3     58
2     14
1      9
Name: Cluster, dtype: int64

In [269]:
df_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [270]:
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [274]:
neighborhoods.shape

(306, 4)

In [275]:
df_merged.drop(columns=['PostalCode'],inplace=True)

In [276]:
final_neighborhood_loc=pd.concat([neighborhoods,df_merged])

In [277]:
final_neighborhood_loc

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585
5,Bronx,Kingsbridge,40.881687,-73.902818
6,Manhattan,Marble Hill,40.876551,-73.910660
7,Bronx,Woodlawn,40.898273,-73.867315
8,Bronx,Norwood,40.877224,-73.879391
9,Bronx,Williamsbridge,40.881039,-73.857446


In [278]:
complete_df=final_neighborhood_loc.join(full_sorted.set_index('Neighborhood'), on='Neighborhood')

In [280]:
complete_df

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster
0,Bronx,Wakefield,40.894705,-73.847201,Food Truck,Sandwich Place,Ice Cream Shop,Caribbean Restaurant,Pharmacy,Dessert Shop,Donut Shop,Pizza Place,Cocktail Bar,Athletics & Sports,3.0
1,Bronx,Co-op City,40.874294,-73.829939,Bus Station,Park,Pizza Place,Baseball Field,Discount Store,Pharmacy,Trail,Restaurant,Ice Cream Shop,Fast Food Restaurant,0.0
2,Bronx,Eastchester,40.887556,-73.827806,Caribbean Restaurant,Diner,Deli / Bodega,Metro Station,Bowling Alley,Fast Food Restaurant,Donut Shop,Chinese Restaurant,Bus Stop,Pizza Place,0.0
3,Bronx,Fieldston,40.895437,-73.905643,River,Bus Station,Plaza,Playground,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,Toy / Game Store,Garden,0.0
4,Bronx,Riverdale,40.890834,-73.912585,Bus Station,Home Service,Gym,Park,Playground,Plaza,Bank,Food Truck,Golf Course,Colombian Restaurant,0.0
5,Bronx,Kingsbridge,40.881687,-73.902818,Pizza Place,Bar,Sandwich Place,Mexican Restaurant,Discount Store,Supermarket,Bakery,Chinese Restaurant,Donut Shop,Fried Chicken Joint,0.0
6,Manhattan,Marble Hill,40.876551,-73.910660,Sandwich Place,Discount Store,Coffee Shop,Shopping Mall,American Restaurant,Video Game Store,Diner,Pharmacy,Clothing Store,Department Store,0.0
7,Bronx,Woodlawn,40.898273,-73.867315,Deli / Bodega,Pizza Place,Playground,Food & Drink Shop,Pub,Bus Station,Bus Stop,Supermarket,Park,Pharmacy,3.0
8,Bronx,Norwood,40.877224,-73.879391,Pizza Place,Bank,Park,Pharmacy,Bus Station,Chinese Restaurant,American Restaurant,Cosmetics Shop,Caribbean Restaurant,Coffee Shop,3.0
9,Bronx,Williamsbridge,40.881039,-73.857446,Nightclub,Bar,Caribbean Restaurant,Playground,Convenience Store,Arts & Crafts Store,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,0.0


In [281]:
lat=43.363528
lon=-76.887588

In [282]:
import folium

In [288]:
plotting_map=folium.Map(location=[43.187555,-77.524511],zoom_start=6)

In [289]:
plotting_map

In [292]:
complete_df1=complete_df.dropna(axis=0)

In [293]:
colors=['red','blue','green','black','yellow','pink','orange']

for lat,lng,cl,neighborhood in zip(complete_df1['Latitude'],complete_df1['Longitude'],complete_df1['Cluster'],complete_df1['Neighborhood']):
    label='{} ,{}'.format(cl,neighborhood)
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker([lat,lng],radius=4,popup=label,color=colors[int(cl)],fill=False,parse_html=False).add_to(plotting_map)
    

In [295]:
plotting_map

### We can explore the clusters by zooming in

In [296]:
complete_df1[complete_df1['Cluster']==0]

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster
1,Bronx,Co-op City,40.874294,-73.829939,Bus Station,Park,Pizza Place,Baseball Field,Discount Store,Pharmacy,Trail,Restaurant,Ice Cream Shop,Fast Food Restaurant,0.0
2,Bronx,Eastchester,40.887556,-73.827806,Caribbean Restaurant,Diner,Deli / Bodega,Metro Station,Bowling Alley,Fast Food Restaurant,Donut Shop,Chinese Restaurant,Bus Stop,Pizza Place,0.0
3,Bronx,Fieldston,40.895437,-73.905643,River,Bus Station,Plaza,Playground,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,Toy / Game Store,Garden,0.0
4,Bronx,Riverdale,40.890834,-73.912585,Bus Station,Home Service,Gym,Park,Playground,Plaza,Bank,Food Truck,Golf Course,Colombian Restaurant,0.0
5,Bronx,Kingsbridge,40.881687,-73.902818,Pizza Place,Bar,Sandwich Place,Mexican Restaurant,Discount Store,Supermarket,Bakery,Chinese Restaurant,Donut Shop,Fried Chicken Joint,0.0
6,Manhattan,Marble Hill,40.876551,-73.910660,Sandwich Place,Discount Store,Coffee Shop,Shopping Mall,American Restaurant,Video Game Store,Diner,Pharmacy,Clothing Store,Department Store,0.0
9,Bronx,Williamsbridge,40.881039,-73.857446,Nightclub,Bar,Caribbean Restaurant,Playground,Convenience Store,Arts & Crafts Store,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,0.0
10,Bronx,Baychester,40.866858,-73.835798,Donut Shop,Convenience Store,Bus Stop,Pizza Place,Playground,Sandwich Place,Gym / Fitness Center,Intersection,Accessories Store,Supermarket,0.0
11,Bronx,Pelham Parkway,40.857413,-73.854756,Bus Station,Pizza Place,Italian Restaurant,Food,Coffee Shop,Chinese Restaurant,Performing Arts Venue,Smoke Shop,Sandwich Place,Bank,0.0
12,Bronx,City Island,40.847247,-73.786488,Harbor / Marina,Ice Cream Shop,Thrift / Vintage Store,Seafood Restaurant,Deli / Bodega,Diner,History Museum,Park,Pharmacy,Liquor Store,0.0


The first cluster seems to have a great amount of restaurants

In [298]:
complete_df1[complete_df1['Cluster']==1]

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster
193,Queens,Brookville,40.660003,-73.751753,Deli / Bodega,Arts & Crafts Store,Video Game Store,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,Toy / Game Store,Garden,1.0
198,Staten Island,New Brighton,40.640615,-74.087017,Bus Stop,Deli / Bodega,Park,Bowling Alley,Playground,Convenience Store,Discount Store,Food,Beer Store,Seafood Restaurant,1.0
206,Staten Island,Mariner's Harbor,40.632546,-74.150085,Deli / Bodega,Bus Stop,Italian Restaurant,Ice Cream Shop,Arts & Crafts Store,Recording Studio,Cosmetics Shop,Colombian Restaurant,Airport Terminal,Toy / Game Store,1.0
212,Staten Island,Oakwood,40.558462,-74.121566,Bar,Bus Stop,Playground,Arts & Crafts Store,Video Game Store,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,1.0
217,Staten Island,Tottenville,40.505334,-74.246569,Thrift / Vintage Store,Mexican Restaurant,Cosmetics Shop,Bus Stop,Deli / Bodega,Italian Restaurant,Colombian Restaurant,Airport Terminal,Recording Studio,Arts & Crafts Store,1.0
227,Staten Island,Arlington,40.635325,-74.165104,Grocery Store,Deli / Bodega,Bus Stop,Boat or Ferry,Intersection,Toy / Game Store,Colombian Restaurant,Airport Terminal,Recording Studio,Garden,1.0
258,Staten Island,Elm Park,40.630147,-74.141817,Deli / Bodega,Pizza Place,Bus Stop,Ice Cream Shop,Italian Restaurant,American Restaurant,Arts & Crafts Store,Recording Studio,Colombian Restaurant,Airport Terminal,1.0
285,Staten Island,Willowbrook,40.603707,-74.132084,Bus Stop,Jewish Restaurant,Deli / Bodega,Chinese Restaurant,Eastern European Restaurant,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,Toy / Game Store,1.0
305,Staten Island,Fox Hills,40.617311,-74.08174,Bus Stop,Sandwich Place,Arts & Crafts Store,Video Game Store,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,Toy / Game Store,1.0


The second cluster seems to be close to transportation

In [304]:
complete_df1[complete_df1['Cluster']==2]

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster
27,Bronx,Clason Point,40.806551,-73.854144,Park,Pool,Bus Stop,Grocery Store,Boat or Ferry,Scenic Lookout,Athletics & Sports,Yoga Studio,Cocktail Bar,Thrift / Vintage Store,2.0
169,Queens,Rosedale,40.659816,-73.735261,Park,Trail,Building,Playground,Arts & Crafts Store,Cosmetics Shop,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,2.0
192,Queens,Somerville,40.597711,-73.796648,Park,Video Game Store,Bus Station,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,Toy / Game Store,Garden,2.0
203,Staten Island,Todt Hill,40.597069,-74.111329,Park,Video Game Store,Bus Station,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,Toy / Game Store,Garden,2.0
256,Staten Island,Randall Manor,40.63563,-74.098051,Park,Bus Stop,Playground,Deli / Bodega,Bagel Shop,Arts & Crafts Store,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,2.0
303,Queens,Bayswater,40.611322,-73.765968,Park,Playground,Jewish Restaurant,Bus Station,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,Toy / Game Store,2.0
14,Scarborough,"Agincourt North, L'Amoreaux East, Milliken, St...",43.815252,-79.284577,Park,Playground,Jewish Restaurant,Bus Station,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,Toy / Game Store,2.0
23,North York,York Mills West,43.752758,-79.400049,Park,Convenience Store,Bank,Arts & Crafts Store,Video Game Store,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,2.0
25,North York,Parkwoods,43.753259,-79.329656,Park,Food & Drink Shop,Fast Food Restaurant,Arts & Crafts Store,Video Game Store,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,2.0
30,North York,"CFB Toronto, Downsview East",43.737473,-79.464763,Park,Video Game Store,Bus Station,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,Toy / Game Store,Garden,2.0


The third cluster seems to have a lot of parks

In [303]:
complete_df1[complete_df1['Cluster']==3]

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster
27,Bronx,Clason Point,40.806551,-73.854144,Park,Pool,Bus Stop,Grocery Store,Boat or Ferry,Scenic Lookout,Athletics & Sports,Yoga Studio,Cocktail Bar,Thrift / Vintage Store,2.0
169,Queens,Rosedale,40.659816,-73.735261,Park,Trail,Building,Playground,Arts & Crafts Store,Cosmetics Shop,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,2.0
192,Queens,Somerville,40.597711,-73.796648,Park,Video Game Store,Bus Station,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,Toy / Game Store,Garden,2.0
203,Staten Island,Todt Hill,40.597069,-74.111329,Park,Video Game Store,Bus Station,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,Toy / Game Store,Garden,2.0
256,Staten Island,Randall Manor,40.63563,-74.098051,Park,Bus Stop,Playground,Deli / Bodega,Bagel Shop,Arts & Crafts Store,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,2.0
303,Queens,Bayswater,40.611322,-73.765968,Park,Playground,Jewish Restaurant,Bus Station,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,Toy / Game Store,2.0
14,Scarborough,"Agincourt North, L'Amoreaux East, Milliken, St...",43.815252,-79.284577,Park,Playground,Jewish Restaurant,Bus Station,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,Toy / Game Store,2.0
23,North York,York Mills West,43.752758,-79.400049,Park,Convenience Store,Bank,Arts & Crafts Store,Video Game Store,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,2.0
25,North York,Parkwoods,43.753259,-79.329656,Park,Food & Drink Shop,Fast Food Restaurant,Arts & Crafts Store,Video Game Store,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,2.0
30,North York,"CFB Toronto, Downsview East",43.737473,-79.464763,Park,Video Game Store,Bus Station,Gourmet Shop,Sculpture Garden,Cocktail Bar,Athletics & Sports,Yoga Studio,Toy / Game Store,Garden,2.0


The third cluster seems to have a lot of italian places