# Final Capstone Project

I will be clustering all of New Yorks neighborhoods, and analyzing them in order to prioritize transportation based on the most common venues located in the neighborhoods.

Install and import necessary tools

In [1]:
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim

!conda install beautifulsoup4 --yes

!conda install lxml --yes

import requests
import pandas as pd
import numpy as np
import random

import matplotlib.cm as cm
import matplotlib.colors as colors

import json

from sklearn.cluster import KMeans

from IPython.display import Image 
from IPython.core.display import HTML 
    
from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium

print('Installations and imports complete')

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-1.21.0               |             py_0          58 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.2 MB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-1.21.0-py_0

The following packages will be UPDATED:

  openssl                                 1.1.1f-h516909a_0 --> 1.1.1g-h51

Get data to work with

In [2]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

Data downloaded!


In [3]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

Organize and analyze data

In [48]:
neighborhoods_data = newyork_data['features']

In [49]:
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

In [50]:
neighborhoods = pd.DataFrame(columns=column_names)

In [51]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [52]:
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [53]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 5 boroughs and 306 neighborhoods.


Get latitude and longitude of New York City, NY

In [54]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


Get venue data from Foursquare API

In [55]:
CLIENT_ID = 'VDH3OTO2HM21Z2GOSSGYFQGKQCWVL4OFMKYE1UBB01JLIQYV'
CLIENT_SECRET = 'JUXIQOERLL33IXXSSJAB0TLQUUWHRQYZ45FZX0NQ15GXV3BN'
VERSION = '20180605'

In [56]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']

        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [57]:
LIMIT = 100
radius = 500

ny_venues = getNearbyVenues(names=neighborhoods['Neighborhood'],
                                   latitudes=neighborhoods['Latitude'],
                                   longitudes=neighborhoods['Longitude']
                                  )

Wakefield
Co-op City
Eastchester
Fieldston
Riverdale
Kingsbridge
Marble Hill
Woodlawn
Norwood
Williamsbridge
Baychester
Pelham Parkway
City Island
Bedford Park
University Heights
Morris Heights
Fordham
East Tremont
West Farms
High  Bridge
Melrose
Mott Haven
Port Morris
Longwood
Hunts Point
Morrisania
Soundview
Clason Point
Throgs Neck
Country Club
Parkchester
Westchester Square
Van Nest
Morris Park
Belmont
Spuyten Duyvil
North Riverdale
Pelham Bay
Schuylerville
Edgewater Park
Castle Hill
Olinville
Pelham Gardens
Concourse
Unionport
Edenwald
Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heights
East Flatbush
Kensington
Windsor Terrace
Prospect Heights
Brownsville
Williamsburg
Bushwick
Bedford Stuyvesant
Brooklyn Heights
Cobble Hill
Carroll Gardens
Red Hook
Gowanus
Fort Greene
Park Slope
Cypress Hills
East New York
Starrett City
Canarsie
Flatlands
Mill Island
Manhattan Beach
Coney Island
Bath Beach
Borough Park
Dyker

In [58]:
print(ny_venues.shape)
ny_venues.head()

(9738, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Wakefield,40.894705,-73.847201,Lollipops Gelato,40.894123,-73.845892,Dessert Shop
1,Wakefield,40.894705,-73.847201,Carvel Ice Cream,40.890487,-73.848568,Ice Cream Shop
2,Wakefield,40.894705,-73.847201,Walgreens,40.896528,-73.8447,Pharmacy
3,Wakefield,40.894705,-73.847201,Rite Aid,40.896649,-73.844846,Pharmacy
4,Wakefield,40.894705,-73.847201,Dunkin',40.890459,-73.849089,Donut Shop


In [59]:
ny_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Allerton,32,32,32,32,32,32
Annadale,10,10,10,10,10,10
Arden Heights,4,4,4,4,4,4
Arlington,7,7,7,7,7,7
Arrochar,22,22,22,22,22,22
...,...,...,...,...,...,...
Woodhaven,23,23,23,23,23,23
Woodlawn,23,23,23,23,23,23
Woodrow,21,21,21,21,21,21
Woodside,76,76,76,76,76,76


In [60]:
print('There are {} uniques categories.'.format(len(ny_venues['Venue Category'].unique())))

There are 432 uniques categories.


Analyze the neighborhoods

In [61]:
ny_onehot = pd.get_dummies(ny_venues[['Venue Category']], prefix="", prefix_sep="")
ny_onehot.drop('Neighborhood', axis=1, inplace=True)
ny_onehot.head()

Unnamed: 0,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport Terminal,American Restaurant,Animal Shelter,Antique Shop,Arcade,Arepa Restaurant,...,Warehouse Store,Waste Facility,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [62]:
ny_onehot["Neighborhood"] = ny_venues["Neighborhood"]
ny_onehot.head()

Unnamed: 0,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport Terminal,American Restaurant,Animal Shelter,Antique Shop,Arcade,Arepa Restaurant,...,Waste Facility,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Neighborhood
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Wakefield
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Wakefield
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Wakefield
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Wakefield
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Wakefield


In [63]:
fixed_columns = [ny_onehot.columns[-1]] + list(ny_onehot.columns[:-1])
ny_onehot = ny_onehot[fixed_columns]

ny_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport Terminal,American Restaurant,Animal Shelter,Antique Shop,Arcade,...,Warehouse Store,Waste Facility,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Wakefield,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Wakefield,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Wakefield,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Wakefield,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Wakefield,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [64]:
ny_onehot.shape

(9738, 432)

In [65]:
ny_grouped = ny_onehot.groupby('Neighborhood').mean().reset_index()
ny_grouped

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport Terminal,American Restaurant,Animal Shelter,Antique Shop,Arcade,...,Warehouse Store,Waste Facility,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Allerton,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0
1,Annadale,0.0,0.0,0.0,0.0,0.0,0.100000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0
2,Arden Heights,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0
3,Arlington,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0
4,Arrochar,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
296,Woodhaven,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0
297,Woodlawn,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0
298,Woodrow,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0
299,Woodside,0.0,0.0,0.0,0.0,0.0,0.039474,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0


Get a list of all venue types

In [66]:
list(ny_grouped)

['Neighborhood',
 'Accessories Store',
 'Adult Boutique',
 'Afghan Restaurant',
 'African Restaurant',
 'Airport Terminal',
 'American Restaurant',
 'Animal Shelter',
 'Antique Shop',
 'Arcade',
 'Arepa Restaurant',
 'Argentinian Restaurant',
 'Art Gallery',
 'Art Museum',
 'Arts & Crafts Store',
 'Arts & Entertainment',
 'Asian Restaurant',
 'Athletics & Sports',
 'Auditorium',
 'Australian Restaurant',
 'Austrian Restaurant',
 'Auto Garage',
 'Automotive Shop',
 'BBQ Joint',
 'Baby Store',
 'Bagel Shop',
 'Bakery',
 'Bank',
 'Bar',
 'Baseball Field',
 'Baseball Stadium',
 'Basketball Court',
 'Basketball Stadium',
 'Bath House',
 'Beach',
 'Beach Bar',
 'Bed & Breakfast',
 'Beer Bar',
 'Beer Garden',
 'Beer Store',
 'Big Box Store',
 'Bike Rental / Bike Share',
 'Bike Shop',
 'Bike Trail',
 'Bistro',
 'Board Shop',
 'Boat or Ferry',
 'Bookstore',
 'Boutique',
 'Bowling Alley',
 'Boxing Gym',
 'Brazilian Restaurant',
 'Breakfast Spot',
 'Brewery',
 'Bridal Shop',
 'Bridge',
 'Bubble T

Create a new dataframe to group venues into more general venue categories

In [67]:
ny_general = pd.DataFrame(columns=['Neighborhood'])
ny_general.head()

Unnamed: 0,Neighborhood


In [68]:
ny_general['Neighborhood']=ny_grouped['Neighborhood']
ny_general.head()

Unnamed: 0,Neighborhood
0,Allerton
1,Annadale
2,Arden Heights
3,Arlington
4,Arrochar


In [69]:
ny_general['Social']=ny_grouped['Community Center'] + ny_grouped['Recreation Center'] + ny_grouped['Rock Club']

In [70]:
ny_general['Education']=ny_grouped['College Academic Building'] + ny_grouped['College Arts Building'] + ny_grouped['College Bookstore'] + ny_grouped['College Cafeteria'] + ny_grouped['Cooking School'] + ny_grouped['Cycle Studio'] + ny_grouped['Dance Studio'] + ny_grouped['High School'] + ny_grouped['Music School'] + ny_grouped['School']

In [71]:
ny_general['Entertainment']=ny_grouped['Arcade'] + ny_grouped['Bowling Alley'] + ny_grouped['Gaming Cafe'] + ny_grouped['General Entertainment'] + ny_grouped['Lounge'] + ny_grouped['Mini Golf'] + ny_grouped['Roller Rink'] + ny_grouped['Skate Park'] + ny_grouped['Skating Rink'] + ny_grouped['Theme Park'] + ny_grouped['Theme Park Ride / Attraction']

In [72]:
ny_general['Hotels etc']=ny_grouped['Hostel'] + ny_grouped['Hotel'] + ny_grouped['Motel'] + ny_grouped['Resort']

In [73]:
ny_general['Theaters']=ny_grouped['Indie Movie Theater'] + ny_grouped['Indie Theater'] + ny_grouped['Movie Theater'] + ny_grouped['Multiplex'] + ny_grouped['Theater']

In [74]:
ny_general['Transportation']=ny_grouped['Boat or Ferry'] + ny_grouped['Bus Line'] + ny_grouped['Bus Station'] + ny_grouped['Bus Stop'] + ny_grouped['Train'] + ny_grouped['Train Station'] + ny_grouped['Heliport'] + ny_grouped['Metro Station']

In [75]:
ny_general['Bars/Clubs']=ny_grouped['Bar'] + ny_grouped['Beach Bar'] + ny_grouped['Beer Bar'] + ny_grouped['Beer Garden'] + ny_grouped['Brewery'] + ny_grouped['Cocktail Bar'] + ny_grouped['Comedy Club'] + ny_grouped['Distillery'] + ny_grouped['Dive Bar'] + ny_grouped['Gastropub'] + ny_grouped['Gay Bar'] + ny_grouped['Hookah Bar'] + ny_grouped['Irish Pub'] + ny_grouped['Jazz Club'] + ny_grouped['Karaoke Bar'] + ny_grouped['Nightclub'] + ny_grouped['Other Nightlife'] + ny_grouped['Piano Bar'] + ny_grouped['Pool Hall'] + ny_grouped['Pub'] + ny_grouped['Sake Bar'] + ny_grouped['Social Club'] + ny_grouped['Speakeasy'] + ny_grouped['Sports Bar'] + ny_grouped['Sports Club'] + ny_grouped['Strip Club'] + ny_grouped['Whisky Bar'] + ny_grouped['Wine Bar'] + ny_grouped['Tiki Bar']

In [76]:
ny_general['Dining']=ny_grouped['Afghan Restaurant'] + ny_grouped['African Restaurant'] + ny_grouped['American Restaurant'] + ny_grouped['Arepa Restaurant'] + ny_grouped['Argentinian Restaurant'] + ny_grouped['Asian Restaurant'] + ny_grouped['Australian Restaurant'] + ny_grouped['Austrian Restaurant'] + ny_grouped['BBQ Joint'] + ny_grouped['Bagel Shop'] + ny_grouped['Bistro'] + ny_grouped['Brazilian Restaurant'] + ny_grouped['Breakfast Spot'] + ny_grouped['Bubble Tea Shop'] + ny_grouped['Buffet'] + ny_grouped['Burger Joint'] + ny_grouped['Burmese Restaurant'] + ny_grouped['Burrito Place'] + ny_grouped['Café'] + ny_grouped['Cafeteria'] + ny_grouped['Cajun / Creole Restaurant'] + ny_grouped['Cambodian Restaurant'] + ny_grouped['Cantonese Restaurant'] + ny_grouped['Caribbean Restaurant'] + ny_grouped['Caucasian Restaurant'] + ny_grouped['Cha Chaan Teng'] + ny_grouped['Chinese Restaurant'] + ny_grouped['Coffee Shop'] + ny_grouped['Colombian Restaurant'] + ny_grouped['Comfort Food Restaurant'] + ny_grouped['Creperie'] + ny_grouped['Cuban Restaurant'] + ny_grouped['Czech Restaurant'] + ny_grouped['Deli / Bodega'] + ny_grouped['Dim Sum Restaurant'] + ny_grouped['Diner'] + ny_grouped['Donut Shop'] + ny_grouped['Dumpling Restaurant'] + ny_grouped['Eastern European Restaurant'] + ny_grouped['Egyptian Restaurant'] + ny_grouped['Empanada Restaurant'] + ny_grouped['English Restaurant'] + ny_grouped['Ethiopian Restaurant'] + ny_grouped['Falafel Restaurant'] + ny_grouped['Fast Food Restaurant'] + ny_grouped['Filipino Restaurant'] + ny_grouped['Fish & Chips Shop'] + ny_grouped['Food'] + ny_grouped['Food & Drink Shop'] + ny_grouped['Food Court'] + ny_grouped['Food Stand'] + ny_grouped['Food Truck'] + ny_grouped['French Restaurant'] + ny_grouped['Fried Chicken Joint'] + ny_grouped['Frozen Yogurt Shop'] + ny_grouped['German Restaurant'] + ny_grouped['Gluten-free Restaurant'] + ny_grouped['Greek Restaurant'] + ny_grouped['Halal Restaurant'] + ny_grouped['Hawaiian Restaurant'] + ny_grouped['Himalayan Restaurant'] + ny_grouped['Hot Dog Joint'] + ny_grouped['Hotpot Restaurant'] + ny_grouped['Ice Cream Shop'] + ny_grouped['Indian Restaurant'] + ny_grouped['Indonesian Restaurant'] + ny_grouped['Israeli Restaurant'] + ny_grouped['Italian Restaurant'] + ny_grouped['Italian Restaurant'] + ny_grouped['Japanese Curry Restaurant'] + ny_grouped['Japanese Restaurant'] + ny_grouped['Jewish Restaurant'] + ny_grouped['Kebab Restaurant'] + ny_grouped['Korean Restaurant'] + ny_grouped['Kosher Restaurant'] + ny_grouped['Kosher Restaurant'] + ny_grouped['Latin American Restaurant'] + ny_grouped['Lebanese Restaurant'] + ny_grouped['Mac & Cheese Joint'] + ny_grouped['Malay Restaurant'] + ny_grouped['Mediterranean Restaurant'] + ny_grouped['Mexican Restaurant'] + ny_grouped['Middle Eastern Restaurant'] + ny_grouped['Modern European Restaurant'] + ny_grouped['Molecular Gastronomy Restaurant'] + ny_grouped['Moroccan Restaurant'] + ny_grouped['New American Restaurant'] + ny_grouped['Noodle House'] + ny_grouped['North Indian Restaurant'] + ny_grouped['Paella Restaurant'] + ny_grouped['Pakistani Restaurant'] + ny_grouped['Persian Restaurant'] + ny_grouped['Peruvian Restaurant'] + ny_grouped['Pizza Place'] + ny_grouped['Pie Shop'] + ny_grouped['Poke Place'] + ny_grouped['Polish Restaurant'] + ny_grouped['Ramen Restaurant'] + ny_grouped['Restaurant'] + ny_grouped['Romanian Restaurant'] + ny_grouped['Russian Restaurant'] + ny_grouped['Salad Place'] + ny_grouped['Sandwich Place'] + ny_grouped['Scandinavian Restaurant'] + ny_grouped['Seafood Restaurant'] + ny_grouped['Shabu-Shabu Restaurant'] + ny_grouped['Shanghai Restaurant'] + ny_grouped['Smoothie Shop'] + ny_grouped['Snack Place'] + ny_grouped['Soba Restaurant'] + ny_grouped['Soup Place'] + ny_grouped['South American Restaurant'] + ny_grouped['Southern / Soul Food Restaurant'] + ny_grouped['Souvlaki Shop'] + ny_grouped['Spanish Restaurant'] + ny_grouped['Sri Lankan Restaurant'] + ny_grouped['Steakhouse'] + ny_grouped['Sushi Restaurant'] + ny_grouped['Swiss Restaurant'] + ny_grouped['Szechuan Restaurant'] + ny_grouped['Taco Place'] + ny_grouped['Taiwanese Restaurant'] + ny_grouped['Tapas Restaurant'] + ny_grouped['Tea Room'] + ny_grouped['Tex-Mex Restaurant'] + ny_grouped['Thai Restaurant'] + ny_grouped['Tibetan Restaurant'] + ny_grouped['Turkish Restaurant'] + ny_grouped['Udon Restaurant'] + ny_grouped['Varenyky restaurant'] + ny_grouped['Vegetarian / Vegan Restaurant'] + ny_grouped['Vietnamese Restaurant'] + ny_grouped['Wings Joint'] + ny_grouped['Dosa Place'] + ny_grouped['Juice Bar']

In [77]:
ny_general['Fitness']=ny_grouped['Boxing Gym'] + ny_grouped['Climbing Gym'] + ny_grouped['Gym'] + ny_grouped['Gym / Fitness Center'] + ny_grouped['Gym Pool'] + ny_grouped['Gymnastics Gym'] + ny_grouped['Martial Arts Dojo'] + ny_grouped['Pilates Studio'] + ny_grouped['Tennis Court'] + ny_grouped['Tennis Stadium'] + ny_grouped['Volleyball Court'] + ny_grouped['Weight Loss Center'] + ny_grouped['Yoga Studio'] + ny_grouped[ 'Pool'] + ny_grouped['Soccer Field']

In [78]:
ny_general['Shopping']=ny_grouped['Accessories Store'] + ny_grouped['Adult Boutique'] + ny_grouped['Antique Shop'] + ny_grouped['Arts & Crafts Store'] + ny_grouped['Athletics & Sports'] + ny_grouped['Automotive Shop'] + ny_grouped['Baby Store'] + ny_grouped['Bakery'] + ny_grouped['Beer Store'] + ny_grouped['Big Box Store'] + ny_grouped['Bike Shop'] + ny_grouped['Board Shop'] + ny_grouped['Bookstore'] + ny_grouped['Boutique'] + ny_grouped['Bridal Shop'] + ny_grouped['Butcher'] + ny_grouped['Camera Store'] + ny_grouped['Candy Store'] + ny_grouped['Cheese Shop'] + ny_grouped['Chocolate Shop'] + ny_grouped['Clothing Store'] + ny_grouped['Convenience Store'] + ny_grouped['Cosmetics Shop'] + ny_grouped['Cupcake Shop'] + ny_grouped['Department Store'] + ny_grouped['Dessert Shop'] + ny_grouped['Discount Store'] + ny_grouped['Drugstore'] + ny_grouped['Duty-free Shop'] + ny_grouped['Electronics Store'] + ny_grouped['Farmers Market'] + ny_grouped['Fish Market'] + ny_grouped['Flea Market'] + ny_grouped['Flower Shop'] + ny_grouped['Fruit & Vegetable Store'] + ny_grouped['Furniture / Home Store'] + ny_grouped['Garden Center'] + ny_grouped['Gift Shop'] + ny_grouped['Gourmet Shop'] + ny_grouped['Grocery Store'] + ny_grouped['Hardware Store'] + ny_grouped['Health Food Store'] + ny_grouped['Herbs & Spices Store'] + ny_grouped['Hobby Shop'] + ny_grouped['Jewelry Store'] + ny_grouped['Kids Store'] + ny_grouped['Kitchen Supply Store'] + ny_grouped['Lingerie Store'] + ny_grouped['Liquor Store'] + ny_grouped['Market'] + ny_grouped['Mattress Store'] + ny_grouped["Men's Store"] + ny_grouped['Miscellaneous Shop'] + ny_grouped['Mobile Phone Shop'] + ny_grouped['Motorcycle Shop'] + ny_grouped['Music Store'] + ny_grouped['Optical Shop'] + ny_grouped['Organic Grocery'] + ny_grouped['Outdoors & Recreation'] + ny_grouped['Outlet Mall'] + ny_grouped['Outlet Store'] + ny_grouped['Paper / Office Supplies Store'] + ny_grouped['Pet Café'] + ny_grouped['Pet Store'] + ny_grouped['Pharmacy'] + ny_grouped['Print Shop'] + ny_grouped['Record Shop'] + ny_grouped['Shipping Store'] + ny_grouped['Shoe Store'] + ny_grouped['Shop & Service'] + ny_grouped['Shopping Mall'] + ny_grouped['Smoke Shop'] + ny_grouped['Sporting Goods Shop'] + ny_grouped['Supermarket'] + ny_grouped['Supplement Shop'] + ny_grouped['Thrift / Vintage Store'] + ny_grouped['Toy / Game Store'] + ny_grouped['Used Bookstore'] + ny_grouped['Vape Store'] + ny_grouped['Video Game Store'] + ny_grouped['Video Store'] + ny_grouped['Warehouse Store'] + ny_grouped['Wine Shop'] + ny_grouped["Women's Store"]

In [79]:
ny_general['Services']=ny_grouped['Animal Shelter'] + ny_grouped['Auto Garage'] + ny_grouped['Bank'] + ny_grouped['Bath House'] + ny_grouped['Bike Rental / Bike Share'] + ny_grouped['Business Service'] + ny_grouped['Check Cashing Service'] + ny_grouped['Construction & Landscaping'] + ny_grouped['Daycare'] + ny_grouped['Design Studio'] + ny_grouped["Doctor's Office"] + ny_grouped['Dry Cleaner'] + ny_grouped['Entertainment Service'] + ny_grouped['Event Service'] + ny_grouped['Financial or Legal Service'] + ny_grouped['Harbor / Marina'] + ny_grouped['Health & Beauty Service'] + ny_grouped['Home Service'] + ny_grouped['IT Services'] + ny_grouped['Laundromat'] + ny_grouped['Laundry Service'] + ny_grouped['Lawyer'] + ny_grouped['Locksmith'] + ny_grouped['Massage Studio'] + ny_grouped['Nail Salon'] + ny_grouped['Other Repair Shop'] + ny_grouped['Pet Service'] + ny_grouped['Photography Studio'] + ny_grouped['Physical Therapist'] + ny_grouped['Spa'] + ny_grouped['Tailor Shop'] + ny_grouped['Tourist Information Center'] + ny_grouped['Veterinarian'] + ny_grouped['Dog Run'] + ny_grouped['Eye Doctor'] + ny_grouped['Gas Station'] + ny_grouped['Library'] + ny_grouped['Storage Facility'] + ny_grouped['Waste Facility'] + ny_grouped['Piercing Parlor'] + ny_grouped['Tattoo Parlor']

In [80]:
ny_general['Performing arts']=ny_grouped['Auditorium'] + ny_grouped['Circus'] + ny_grouped['Concert Hall'] + ny_grouped['Music Venue'] + ny_grouped['Opera House'] + ny_grouped['Performing Arts Venue']

In [81]:
ny_general['Gallery/Exhibit']=ny_grouped['Art Gallery'] + ny_grouped['Art Museum'] + ny_grouped['Arts & Entertainment'] + ny_grouped['Exhibit'] + ny_grouped['Historic Site'] + ny_grouped['History Museum'] + ny_grouped['Monument / Landmark'] + ny_grouped['Museum'] + ny_grouped['Outdoor Sculpture'] + ny_grouped['Public Art'] + ny_grouped['Sculpture Garden'] + ny_grouped['Street Art'] + ny_grouped['Event Space'] + ny_grouped['Memorial Site']

In [82]:
ny_general['Outdoors']=ny_grouped['Beach'] + ny_grouped['Bike Trail'] + ny_grouped['Campground'] + ny_grouped['Lake'] + ny_grouped['Other Great Outdoors'] + ny_grouped['Park'] + ny_grouped['Scenic Lookout'] + ny_grouped['Ski Area'] + ny_grouped['Trail'] + ny_grouped['Fountain'] + ny_grouped['Garden'] + ny_grouped['Pier'] + ny_grouped['River'] + ny_grouped['State / Provincial Park'] + ny_grouped['Surf Spot'] + ny_grouped['Playground']

In [83]:
ny_general['Religion']=ny_grouped['Church'] + ny_grouped['Spiritual Center'] + ny_grouped['Temple']

In [84]:
ny_general['Sports/Arenas']=ny_grouped[ 'Baseball Field'] + ny_grouped['Baseball Stadium'] + ny_grouped['Basketball Court'] + ny_grouped['Basketball Stadium'] + ny_grouped['College Basketball Court'] + ny_grouped['Golf Course'] + ny_grouped['Racetrack'] + ny_grouped['Track']

In [85]:
list(ny_general)

['Neighborhood',
 'Social',
 'Education',
 'Entertainment',
 'Hotels etc',
 'Theaters',
 'Transportation',
 'Bars/Clubs',
 'Dining',
 'Fitness',
 'Shopping',
 'Services',
 'Performing arts',
 'Gallery/Exhibit',
 'Outdoors',
 'Religion',
 'Sports/Arenas']

Analyze top venues

In [99]:
num_top_venues = 5

for hood in ny_general['Neighborhood']:
    print("----"+hood+"----")
    temp = ny_general[ny_general['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')


----Allerton----
            venue  freq
0          Dining  0.44
1        Shopping  0.31
2        Services  0.12
3  Transportation  0.03
4         Fitness  0.03


----Annadale----
            venue  freq
0          Dining   0.6
1       Education   0.1
2  Transportation   0.1
3      Bars/Clubs   0.1
4        Outdoors   0.1


----Arden Heights----
       venue  freq
0     Dining  0.50
1   Shopping  0.25
2   Services  0.25
3     Social  0.00
4  Education  0.00


----Arlington----
            venue  freq
0          Dining  0.43
1  Transportation  0.29
2        Shopping  0.14
3          Social  0.00
4       Education  0.00


----Arrochar----
            venue  freq
0          Dining  0.64
1        Shopping  0.27
2  Transportation  0.14
3      Hotels etc  0.05
4          Social  0.00


----Arverne----
            venue  freq
0          Dining  0.33
1        Outdoors  0.33
2  Transportation  0.17
3        Shopping  0.11
4          Social  0.00


----Astoria----
        venue  freq
0      Dini

In [100]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


In [101]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = ny_general['Neighborhood']

for ind in np.arange(ny_general.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(ny_general.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Allerton,Dining,Shopping,Services,Outdoors,Fitness,Transportation,Sports/Arenas,Religion,Gallery/Exhibit,Performing arts
1,Annadale,Dining,Outdoors,Bars/Clubs,Transportation,Education,Sports/Arenas,Religion,Gallery/Exhibit,Performing arts,Services
2,Arden Heights,Dining,Services,Shopping,Sports/Arenas,Religion,Outdoors,Gallery/Exhibit,Performing arts,Fitness,Bars/Clubs
3,Arlington,Dining,Transportation,Shopping,Sports/Arenas,Religion,Outdoors,Gallery/Exhibit,Performing arts,Services,Fitness
4,Arrochar,Dining,Shopping,Transportation,Hotels etc,Sports/Arenas,Religion,Outdoors,Gallery/Exhibit,Performing arts,Services


Cluster neighborhoods

In [102]:
kclusters = 4

ny_grouped_clustering = ny_general.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ny_grouped_clustering)

kmeans.labels_[0:10] 

array([0, 3, 0, 0, 3, 1, 3, 3, 0, 3], dtype=int32)

In [103]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

ny_merged = neighborhoods

ny_merged = ny_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

ny_merged.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bronx,Wakefield,40.894705,-73.847201,0.0,Dining,Shopping,Services,Sports/Arenas,Religion,Outdoors,Gallery/Exhibit,Performing arts,Fitness,Bars/Clubs
1,Bronx,Co-op City,40.874294,-73.829939,0.0,Shopping,Dining,Transportation,Sports/Arenas,Outdoors,Religion,Gallery/Exhibit,Performing arts,Services,Fitness
2,Bronx,Eastchester,40.887556,-73.827806,3.0,Dining,Transportation,Shopping,Entertainment,Sports/Arenas,Religion,Outdoors,Gallery/Exhibit,Performing arts,Services
3,Bronx,Fieldston,40.895437,-73.905643,1.0,Outdoors,Transportation,Sports/Arenas,Religion,Gallery/Exhibit,Performing arts,Services,Shopping,Fitness,Dining
4,Bronx,Riverdale,40.890834,-73.912585,1.0,Outdoors,Services,Transportation,Sports/Arenas,Fitness,Dining,Religion,Gallery/Exhibit,Performing arts,Shopping


In [104]:
ny_merged = ny_merged[ny_merged['Cluster Labels'].notna()]

In [105]:
ny_merged['Cluster Labels'] = ny_merged['Cluster Labels'].astype(int)

In [106]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(ny_merged['Latitude'], ny_merged['Longitude'], ny_merged['Neighborhood'], ny_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Cluster 1

In [107]:
ny_merged.loc[ny_merged['Cluster Labels'] == 0, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Wakefield,Dining,Shopping,Services,Sports/Arenas,Religion,Outdoors,Gallery/Exhibit,Performing arts,Fitness,Bars/Clubs
1,Co-op City,Shopping,Dining,Transportation,Sports/Arenas,Outdoors,Religion,Gallery/Exhibit,Performing arts,Services,Fitness
5,Kingsbridge,Dining,Shopping,Bars/Clubs,Outdoors,Gallery/Exhibit,Services,Fitness,Transportation,Sports/Arenas,Religion
6,Marble Hill,Dining,Shopping,Fitness,Services,Sports/Arenas,Religion,Outdoors,Gallery/Exhibit,Performing arts,Bars/Clubs
8,Norwood,Dining,Shopping,Outdoors,Services,Sports/Arenas,Religion,Gallery/Exhibit,Performing arts,Fitness,Bars/Clubs
...,...,...,...,...,...,...,...,...,...,...,...
290,Middle Village,Dining,Shopping,Outdoors,Services,Bars/Clubs,Sports/Arenas,Religion,Gallery/Exhibit,Performing arts,Fitness
295,Highland Park,Dining,Shopping,Outdoors,Fitness,Transportation,Sports/Arenas,Religion,Gallery/Exhibit,Performing arts,Services
298,Allerton,Dining,Shopping,Services,Outdoors,Fitness,Transportation,Sports/Arenas,Religion,Gallery/Exhibit,Performing arts
300,Erasmus,Shopping,Dining,Fitness,Outdoors,Performing arts,Services,Sports/Arenas,Religion,Gallery/Exhibit,Bars/Clubs


### Cluster 2

In [108]:
ny_merged.loc[ny_merged['Cluster Labels'] == 1, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Fieldston,Outdoors,Transportation,Sports/Arenas,Religion,Gallery/Exhibit,Performing arts,Services,Shopping,Fitness,Dining
4,Riverdale,Outdoors,Services,Transportation,Sports/Arenas,Fitness,Dining,Religion,Gallery/Exhibit,Performing arts,Shopping
18,West Farms,Dining,Transportation,Outdoors,Sports/Arenas,Services,Shopping,Entertainment,Religion,Gallery/Exhibit,Performing arts
27,Clason Point,Outdoors,Transportation,Shopping,Fitness,Dining,Sports/Arenas,Religion,Gallery/Exhibit,Performing arts,Services
35,Spuyten Duyvil,Shopping,Fitness,Dining,Outdoors,Services,Transportation,Sports/Arenas,Religion,Gallery/Exhibit,Performing arts
76,Mill Island,Services,Fitness,Sports/Arenas,Religion,Outdoors,Gallery/Exhibit,Performing arts,Shopping,Dining,Bars/Clubs
78,Coney Island,Dining,Sports/Arenas,Entertainment,Outdoors,Gallery/Exhibit,Performing arts,Shopping,Bars/Clubs,Religion,Services
85,Sea Gate,Services,Outdoors,Transportation,Sports/Arenas,Religion,Gallery/Exhibit,Performing arts,Shopping,Fitness,Dining
91,Bergen Beach,Services,Sports/Arenas,Outdoors,Shopping,Dining,Religion,Gallery/Exhibit,Performing arts,Fitness,Bars/Clubs
127,Battery Park City,Dining,Shopping,Outdoors,Gallery/Exhibit,Bars/Clubs,Hotels etc,Fitness,Performing arts,Transportation,Education


### Cluster 3

In [109]:
ny_merged.loc[ny_merged['Cluster Labels'] == 2, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
172,Breezy Point,Outdoors,Gallery/Exhibit,Sports/Arenas,Religion,Performing arts,Services,Shopping,Fitness,Dining,Bars/Clubs
179,Neponsit,Outdoors,Sports/Arenas,Religion,Gallery/Exhibit,Performing arts,Services,Shopping,Fitness,Dining,Bars/Clubs
192,Somerville,Outdoors,Sports/Arenas,Religion,Gallery/Exhibit,Performing arts,Services,Shopping,Fitness,Dining,Bars/Clubs
203,Todt Hill,Outdoors,Sports/Arenas,Religion,Gallery/Exhibit,Performing arts,Services,Shopping,Fitness,Dining,Bars/Clubs
303,Bayswater,Outdoors,Sports/Arenas,Religion,Gallery/Exhibit,Performing arts,Services,Shopping,Fitness,Dining,Bars/Clubs


### Cluster 4

In [110]:
ny_merged.loc[ny_merged['Cluster Labels'] == 3, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Eastchester,Dining,Transportation,Shopping,Entertainment,Sports/Arenas,Religion,Outdoors,Gallery/Exhibit,Performing arts,Services
7,Woodlawn,Dining,Outdoors,Bars/Clubs,Shopping,Transportation,Sports/Arenas,Religion,Gallery/Exhibit,Performing arts,Services
9,Williamsbridge,Dining,Bars/Clubs,Sports/Arenas,Religion,Outdoors,Gallery/Exhibit,Performing arts,Services,Shopping,Fitness
11,Pelham Parkway,Dining,Services,Performing arts,Shopping,Fitness,Transportation,Sports/Arenas,Religion,Outdoors,Gallery/Exhibit
13,Bedford Park,Dining,Transportation,Shopping,Bars/Clubs,Sports/Arenas,Outdoors,Fitness,Religion,Gallery/Exhibit,Performing arts
...,...,...,...,...,...,...,...,...,...,...,...
292,Lighthouse Hill,Dining,Outdoors,Gallery/Exhibit,Services,Sports/Arenas,Religion,Performing arts,Shopping,Fitness,Bars/Clubs
293,Richmond Valley,Dining,Services,Transportation,Sports/Arenas,Religion,Outdoors,Gallery/Exhibit,Performing arts,Shopping,Fitness
296,Madison,Dining,Services,Shopping,Fitness,Sports/Arenas,Religion,Outdoors,Gallery/Exhibit,Performing arts,Bars/Clubs
297,Bronxdale,Dining,Shopping,Outdoors,Performing arts,Services,Fitness,Sports/Arenas,Religion,Gallery/Exhibit,Bars/Clubs
