# 1. Import Modules

In [3]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

# All requested packages already installed.

Libraries imported.


Now instead of looking for the user's current location for simplicity we shall explore two boroughs of New York first and look for the hotels in those areas. 

# 2. Getting json data of New York

In [6]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)
newyork_data

Data downloaded!


{'type': 'FeatureCollection',
 'totalFeatures': 306,
 'features': [{'type': 'Feature',
   'id': 'nyu_2451_34572.1',
   'geometry': {'type': 'Point',
    'coordinates': [-73.84720052054902, 40.89470517661]},
   'geometry_name': 'geom',
   'properties': {'name': 'Wakefield',
    'stacked': 1,
    'annoline1': 'Wakefield',
    'annoline2': None,
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.84720052054902,
     40.89470517661,
     -73.84720052054902,
     40.89470517661]}},
  {'type': 'Feature',
   'id': 'nyu_2451_34572.2',
   'geometry': {'type': 'Point',
    'coordinates': [-73.82993910812398, 40.87429419303012]},
   'geometry_name': 'geom',
   'properties': {'name': 'Co-op City',
    'stacked': 2,
    'annoline1': 'Co-op',
    'annoline2': 'City',
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.82993910812398,
     40.87429419303012,
     -73.82993910812398,
     40.87429419303012]}},
  {'type': 'Feature',
 

# 3. Creating the dataframe

In [7]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [8]:
neighborhoods_data = newyork_data['features']
for data in neighborhoods_data:
    borough = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


We want our neighborhoods to be as far as possible for the reasons of comparison. So we shall plot the neighborhoods on the map of New York.

# 4. Map of New York and mark the neighborhoods

In [9]:
address = 'New York City, NY'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

  app.launch_new_instance()


The geograpical coordinate of New York City are 40.7127281, -74.0060152.


In [10]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

Let's take **Staten Island** and **Manhattan** as our target Boroughs.

In [11]:
manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
manhattan_data.head(10)

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688
5,Manhattan,Manhattanville,40.816934,-73.957385
6,Manhattan,Central Harlem,40.815976,-73.943211
7,Manhattan,East Harlem,40.792249,-73.944182
8,Manhattan,Upper East Side,40.775639,-73.960508
9,Manhattan,Yorkville,40.77593,-73.947118


In [12]:
staten_data = neighborhoods[neighborhoods['Borough'] == 'Staten Island'].reset_index(drop=True)
staten_data.head(10)

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Staten Island,St. George,40.644982,-74.079353
1,Staten Island,New Brighton,40.640615,-74.087017
2,Staten Island,Stapleton,40.626928,-74.077902
3,Staten Island,Rosebank,40.615305,-74.069805
4,Staten Island,West Brighton,40.631879,-74.107182
5,Staten Island,Grymes Hill,40.624185,-74.087248
6,Staten Island,Todt Hill,40.597069,-74.111329
7,Staten Island,South Beach,40.580247,-74.079553
8,Staten Island,Port Richmond,40.633669,-74.129434
9,Staten Island,Mariner's Harbor,40.632546,-74.150085


# 5. Explore the neighborhoods in those two boroughs one by one

In [13]:
CLIENT_ID = 'J5D0K1WE2SCLGSCHLQQNNJSCW0VTWEPQ3SOCO5FTQ00U3NMU' # your Foursquare ID
CLIENT_SECRET = 'B3S2Q3MWGMBYO4ASTYTTGDIZMZRISSTXZF1WTPZBGJJVZX14' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 50

In [14]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [15]:
def getNearbyVenues(names, latitudes, longitudes, radius=1500, LIMIT = 100):
    
    venues_list=[]
    dist_list = []
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng,
            v['venue']['id'],
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],
            v['venue']['location']['distance'],
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue Id',  
                  'Venue Name',
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Distance',
                  'Venue Category']
    
    return(nearby_venues)

### Explore the neighborhoods of Manhattan!!!

In [16]:
manhattan_venues = getNearbyVenues(names = manhattan_data['Neighborhood'], 
                                   latitudes = manhattan_data['Latitude'], 
                                   longitudes = manhattan_data['Longitude'])

Marble Hill
Chinatown
Washington Heights
Inwood
Hamilton Heights
Manhattanville
Central Harlem
East Harlem
Upper East Side
Yorkville
Lenox Hill
Roosevelt Island
Upper West Side
Lincoln Square
Clinton
Midtown
Murray Hill
Chelsea
Greenwich Village
East Village
Lower East Side
Tribeca
Little Italy
Soho
West Village
Manhattan Valley
Morningside Heights
Gramercy
Battery Park City
Financial District
Carnegie Hill
Noho
Civic Center
Midtown South
Sutton Place
Turtle Bay
Tudor City
Stuyvesant Town
Flatiron
Hudson Yards


In [17]:
manhattan_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Id,Venue Name,Venue Latitude,Venue Longitude,Venue Distance,Venue Category
0,Marble Hill,40.876551,-73.91066,4baf59e8f964a520a6f93be3,Bikram Yoga,40.876844,-73.906204,376,Yoga Studio
1,Marble Hill,40.876551,-73.91066,4b4429abf964a52037f225e3,Arturo's,40.874412,-73.910271,240,Pizza Place
2,Marble Hill,40.876551,-73.91066,4b79cc46f964a520c5122fe3,Tibbett Diner,40.880404,-73.908937,452,Diner
3,Marble Hill,40.876551,-73.91066,4bb114c4f964a520b9783ce3,Sam's Pizza,40.879435,-73.905859,516,Pizza Place
4,Marble Hill,40.876551,-73.91066,55f81cd2498ee903149fcc64,Starbucks,40.877531,-73.905582,441,Coffee Shop


## Filter out only those venues that are Hotels

In [18]:
manhattan_hotels = manhattan_venues[manhattan_venues['Venue Category'] == 'Hotel'].reset_index(drop=True)
manhattan_hotels.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Id,Venue Name,Venue Latitude,Venue Longitude,Venue Distance,Venue Category
0,Chinatown,40.715618,-73.994279,578692f4498e1054905dbde7,Hotel 50 Bowery NYC,40.715936,-73.996789,214,Hotel
1,Chinatown,40.715618,-73.994279,51050d4e5262d6654ddc83e1,CitizenM Bowery,40.720599,-73.993574,557,Hotel
2,Chinatown,40.715618,-73.994279,536020eb11d2ce653fb711d0,The Ludlow Hotel,40.721857,-73.987204,915,Hotel
3,Upper East Side,40.775639,-73.960508,4b37853af964a520c54125e3,The Mark Hotel,40.775201,-73.963351,244,Hotel
4,Upper East Side,40.775639,-73.960508,4ac8d0d3f964a520b3bc20e3,The Carlyle,40.774413,-73.963301,272,Hotel


In [19]:
columns_names = ['Neighborhood Name', 
                  'Venue Name',
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Like Counter',
                  'Disliked',
                  'Tip Counter',
                  'Rating']
manhattan_hotels_data = pd.DataFrame(columns=columns_names)
manhattan_hotels_data

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating


###  Fetching data about the hotels

In [20]:
for v_id,name,v_name,lat,lon in zip(manhattan_hotels['Venue Id'],manhattan_hotels['Neighborhood'],manhattan_hotels['Venue Name'],manhattan_hotels['Venue Latitude'],manhattan_hotels['Venue Longitude']):
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(
        v_id, 
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION)
    
    # make the GET request
    results = requests.get(url).json()['response']['venue']
    try :
        r = results['rating']
    except :
        r = 'Not Rated yet'
    
    manhattan_hotels_data = manhattan_hotels_data.append({'Neighborhood Name' : name, 
                                          'Venue Name':v_name,
                                          'Venue Latitude':lat, 
                                          'Venue Longitude':lon, 
                                          'Like Counter':results['likes']['count'],
                                          'Disliked':results['dislike'],
                                          'Tip Counter':results['tips']['count'],
                                          'Rating' : r}, ignore_index=True)

## Hotel Data

In [21]:
manhattan_hotels_data

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating
0,Chinatown,Hotel 50 Bowery NYC,40.715936,-73.996789,74,False,10,9.1
1,Chinatown,CitizenM Bowery,40.720599,-73.993574,34,False,5,9.2
2,Chinatown,The Ludlow Hotel,40.721857,-73.987204,209,False,25,9.0
3,Upper East Side,The Mark Hotel,40.775201,-73.963351,88,False,31,8.6
4,Upper East Side,The Carlyle,40.774413,-73.963301,152,False,59,8.7
5,Lenox Hill,The Carlyle,40.774413,-73.963301,152,False,59,8.7
6,Lenox Hill,Loews Regency Hotel,40.76455,-73.969218,136,False,27,8.8
7,Roosevelt Island,Ravel Hotel,40.753989,-73.949244,67,False,46,7.9
8,Lincoln Square,Mandarin Oriental,40.768987,-73.983017,319,False,88,9.2
9,Clinton,Kimpton Ink48 Hotel,40.764505,-73.995987,200,False,93,8.8


# User Interface

In [22]:
user_sees = manhattan_hotels.drop(['Neighborhood Latitude','Neighborhood Longitude','Venue Id','Venue Latitude','Venue Longitude','Venue Category'],axis = 1)
user_sees.columns = ['Neighborhood','Hotel Name','Distance']
user_sees['Rating'] = manhattan_hotels_data['Rating']
user_sees

Unnamed: 0,Neighborhood,Hotel Name,Distance,Rating
0,Chinatown,Hotel 50 Bowery NYC,214,9.1
1,Chinatown,CitizenM Bowery,557,9.2
2,Chinatown,The Ludlow Hotel,915,9.0
3,Upper East Side,The Mark Hotel,244,8.6
4,Upper East Side,The Carlyle,272,8.7
5,Lenox Hill,The Carlyle,795,8.7
6,Lenox Hill,Loews Regency Hotel,959,8.8
7,Roosevelt Island,Ravel Hotel,909,7.9
8,Lincoln Square,Mandarin Oriental,542,9.2
9,Clinton,Kimpton Ink48 Hotel,601,8.8


### Explore the neighborhoods of Staten Islands!!!

In [23]:
staten_venues = getNearbyVenues(names = staten_data['Neighborhood'], 
                                   latitudes = staten_data['Latitude'], 
                                   longitudes = staten_data['Longitude'])
staten_venues.head()

St. George
New Brighton
Stapleton
Rosebank
West Brighton
Grymes Hill
Todt Hill
South Beach
Port Richmond
Mariner's Harbor
Port Ivory
Castleton Corners
New Springville
Travis
New Dorp
Oakwood
Great Kills
Eltingville
Annadale
Woodrow
Tottenville
Tompkinsville
Silver Lake
Sunnyside
Park Hill
Westerleigh
Graniteville
Arlington
Arrochar
Grasmere
Old Town
Dongan Hills
Midland Beach
Grant City
New Dorp Beach
Bay Terrace
Huguenot
Pleasant Plains
Butler Manor
Charleston
Rossville
Arden Heights
Greenridge
Heartland Village
Chelsea
Bloomfield
Bulls Head
Richmond Town
Shore Acres
Clifton
Concord
Emerson Hill
Randall Manor
Howland Hook
Elm Park
Manor Heights
Willowbrook
Sandy Ground
Egbertville
Prince's Bay
Lighthouse Hill
Richmond Valley
Fox Hills


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Id,Venue Name,Venue Latitude,Venue Longitude,Venue Distance,Venue Category
0,St. George,40.644982,-74.079353,4a214841f964a520cd7c1fe3,Beso,40.643306,-74.076508,304,Tapas Restaurant
1,St. George,40.644982,-74.079353,4bf9c5c08d30d13a6bce0218,Staten Island September 11 Memorial,40.646767,-74.07651,311,Monument / Landmark
2,St. George,40.644982,-74.079353,4a271f0cf964a5205c911fe3,Enoteca Maria,40.641941,-74.07732,379,Italian Restaurant
3,St. George,40.644982,-74.079353,590928301de7651d663ae087,Marie's 2,40.642176,-74.076669,385,Italian Restaurant
4,St. George,40.644982,-74.079353,4b6da712f964a52080832ce3,St. George Theatre,40.642253,-74.077496,341,Theater


In [24]:
staten_hotels = staten_venues[staten_venues['Venue Category'] == 'Hotel'].reset_index(drop=True)
staten_hotels

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Id,Venue Name,Venue Latitude,Venue Longitude,Venue Distance,Venue Category
0,Rosebank,40.615305,-74.069805,4bc41a1cdce4eee163f0719d,Staten Island Motor Lodge,40.60845,-74.0766,954,Hotel
1,Travis,40.586314,-74.190737,5cd28f037dc9e1002c64e0f9,Fairfield Inn & Suites,40.586652,-74.19116,51,Hotel
2,Travis,40.586314,-74.190737,4c0055bfad15a5931dbe8d73,Comfort Inn,40.586191,-74.190216,46,Hotel
3,Park Hill,40.60919,-74.080157,4bc41a1cdce4eee163f0719d,Staten Island Motor Lodge,40.60845,-74.0766,311,Hotel
4,Arrochar,40.596313,-74.067124,4c96c79482b56dcbd0bde4aa,Staten Island Navy Lodge,40.598734,-74.062288,489,Hotel
5,Grasmere,40.598268,-74.076674,4c96c79482b56dcbd0bde4aa,Staten Island Navy Lodge,40.598734,-74.062288,1217,Hotel
6,Pleasant Plains,40.524699,-74.219831,4bc41a1ddce4eee165f0719d,West Shore Motor Lodge,40.532809,-74.225245,1012,Hotel
7,Charleston,40.530531,-74.232158,4bc41a1ddce4eee165f0719d,West Shore Motor Lodge,40.532809,-74.225245,637,Hotel
8,Chelsea,40.594726,-74.18956,5cd28f037dc9e1002c64e0f9,Fairfield Inn & Suites,40.586652,-74.19116,908,Hotel
9,Chelsea,40.594726,-74.18956,4c0055bfad15a5931dbe8d73,Comfort Inn,40.586191,-74.190216,951,Hotel


In [25]:
columns_names = ['Neighborhood Name', 
                  'Venue Name',
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Like Counter',
                  'Disliked',
                  'Tip Counter',
                  'Rating']
staten_hotel_data = pd.DataFrame(columns=columns_names)
staten_hotel_data

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating


## Fetching data about the hotels of Staten Islands

In [26]:
for v_id,name,v_name,lat,lon in zip(staten_hotels['Venue Id'],staten_hotels['Neighborhood'],staten_hotels['Venue Name'],staten_hotels['Venue Latitude'],staten_hotels['Venue Longitude']):
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(
        v_id, 
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION)
    
    # make the GET request
    results = requests.get(url).json()['response']['venue']
    try :
        r = results['rating']
    except :
        r = 'Not Rated yet'
    
    staten_hotel_data = staten_hotel_data.append({'Neighborhood Name' : name, 
                                          'Venue Name':v_name,
                                          'Venue Latitude':lat, 
                                          'Venue Longitude':lon, 
                                          'Like Counter':results['likes']['count'],
                                          'Disliked':results['dislike'],
                                          'Tip Counter':results['tips']['count'],
                                          'Rating' : r}, ignore_index=True)

## Hotel Data

In [27]:
staten_hotel_data

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating
0,Rosebank,Staten Island Motor Lodge,40.60845,-74.0766,0,False,2,Not Rated yet
1,Travis,Fairfield Inn & Suites,40.586652,-74.19116,0,False,0,6.6
2,Travis,Comfort Inn,40.586191,-74.190216,4,False,11,4.9
3,Park Hill,Staten Island Motor Lodge,40.60845,-74.0766,0,False,2,Not Rated yet
4,Arrochar,Staten Island Navy Lodge,40.598734,-74.062288,7,False,4,6.2
5,Grasmere,Staten Island Navy Lodge,40.598734,-74.062288,7,False,4,6.2
6,Pleasant Plains,West Shore Motor Lodge,40.532809,-74.225245,0,False,3,Not Rated yet
7,Charleston,West Shore Motor Lodge,40.532809,-74.225245,0,False,3,Not Rated yet
8,Chelsea,Fairfield Inn & Suites,40.586652,-74.19116,0,False,0,6.6
9,Chelsea,Comfort Inn,40.586191,-74.190216,4,False,11,4.9


# User Interface

In [28]:
user_sees = staten_hotels.drop(['Neighborhood Latitude','Neighborhood Longitude','Venue Id','Venue Latitude','Venue Longitude','Venue Category'],axis = 1)
user_sees.columns = ['Neighborhood','Hotel Name','Distance']
user_sees['Rating'] = staten_hotel_data['Rating']
user_sees

Unnamed: 0,Neighborhood,Hotel Name,Distance,Rating
0,Rosebank,Staten Island Motor Lodge,954,Not Rated yet
1,Travis,Fairfield Inn & Suites,51,6.6
2,Travis,Comfort Inn,46,4.9
3,Park Hill,Staten Island Motor Lodge,311,Not Rated yet
4,Arrochar,Staten Island Navy Lodge,489,6.2
5,Grasmere,Staten Island Navy Lodge,1217,6.2
6,Pleasant Plains,West Shore Motor Lodge,1012,Not Rated yet
7,Charleston,West Shore Motor Lodge,637,Not Rated yet
8,Chelsea,Fairfield Inn & Suites,908,6.6
9,Chelsea,Comfort Inn,951,4.9


# So we can straightaway tell that Manhattan has many more good hotels than Staten Island has.

## Just out of curiosity we would like to Cluster hotels with similar statistics.

### Only take the necessary columns from the dataframe containing the hotel data of Manhattan.

In [29]:
manhattan_hotels_cluster = manhattan_hotels_data.drop(['Neighborhood Name','Venue Name', 'Venue Latitude', 'Venue Longitude', 'Disliked'], axis = 1)

In [30]:
manhattan_hotels_cluster

Unnamed: 0,Like Counter,Tip Counter,Rating
0,74,10,9.1
1,34,5,9.2
2,209,25,9.0
3,88,31,8.6
4,152,59,8.7
5,152,59,8.7
6,136,27,8.8
7,67,46,7.9
8,319,88,9.2
9,200,93,8.8


# 6. Clustering the hotels of Manhattan!!

In [31]:
# set number of clusters
kclusters = 3

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(manhattan_hotels_cluster)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 2, 0, 0,
       2, 0, 2, 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 2, 0,
       0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 2, 2, 2, 2, 2,
       0, 0, 2, 0, 0, 0, 0], dtype=int32)

In [32]:
manhattan_hotels_data['Cluster Labels'] = kmeans.labels_

In [33]:
manhattan_hotels_data

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating,Cluster Labels
0,Chinatown,Hotel 50 Bowery NYC,40.715936,-73.996789,74,False,10,9.1,0
1,Chinatown,CitizenM Bowery,40.720599,-73.993574,34,False,5,9.2,0
2,Chinatown,The Ludlow Hotel,40.721857,-73.987204,209,False,25,9.0,0
3,Upper East Side,The Mark Hotel,40.775201,-73.963351,88,False,31,8.6,0
4,Upper East Side,The Carlyle,40.774413,-73.963301,152,False,59,8.7,0
5,Lenox Hill,The Carlyle,40.774413,-73.963301,152,False,59,8.7,0
6,Lenox Hill,Loews Regency Hotel,40.76455,-73.969218,136,False,27,8.8,0
7,Roosevelt Island,Ravel Hotel,40.753989,-73.949244,67,False,46,7.9,0
8,Lincoln Square,Mandarin Oriental,40.768987,-73.983017,319,False,88,9.2,2
9,Clinton,Kimpton Ink48 Hotel,40.764505,-73.995987,200,False,93,8.8,0


# Let's see the hotels in the map of Manhattan

In [34]:
address = 'Manhattan, NY'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

  app.launch_new_instance()


The geograpical coordinate of Manhattan are 40.7900869, -73.9598295.


In [35]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, nei, ven, cluster in zip(manhattan_hotels_data['Venue Latitude'], manhattan_hotels_data['Venue Longitude'], manhattan_hotels_data['Neighborhood Name'], manhattan_hotels_data['Venue Name'], manhattan_hotels_data['Cluster Labels']):
    label = folium.Popup(str(ven) + ',' + str(nei) + ',' + ' Cluster = ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Let's analyze our clusters of hotels

## Cluster 1

In [36]:
manhattan_hotels_data.loc[manhattan_hotels_data['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating,Cluster Labels
0,Chinatown,Hotel 50 Bowery NYC,40.715936,-73.996789,74,False,10,9.1,0
1,Chinatown,CitizenM Bowery,40.720599,-73.993574,34,False,5,9.2,0
2,Chinatown,The Ludlow Hotel,40.721857,-73.987204,209,False,25,9.0,0
3,Upper East Side,The Mark Hotel,40.775201,-73.963351,88,False,31,8.6,0
4,Upper East Side,The Carlyle,40.774413,-73.963301,152,False,59,8.7,0
5,Lenox Hill,The Carlyle,40.774413,-73.963301,152,False,59,8.7,0
6,Lenox Hill,Loews Regency Hotel,40.76455,-73.969218,136,False,27,8.8,0
7,Roosevelt Island,Ravel Hotel,40.753989,-73.949244,67,False,46,7.9,0
9,Clinton,Kimpton Ink48 Hotel,40.764505,-73.995987,200,False,93,8.8,0
10,Clinton,EVEN Hotels New York - Times Square South,40.753467,-73.993898,39,False,8,9.0,0


## Cluster 2

In [37]:
manhattan_hotels_data.loc[manhattan_hotels_data['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating,Cluster Labels
12,Clinton,W New York - Times Square,40.759296,-73.985573,1002,False,222,9.0,1
17,Midtown,W New York - Times Square,40.759296,-73.985573,1002,False,222,9.0,1
25,Chelsea,Soho House,40.740537,-74.005685,1104,False,201,9.2,1
26,Chelsea,"The Standard, High Line",40.740854,-74.007952,1073,False,257,9.2,1
38,West Village,Soho House,40.740537,-74.005685,1104,False,201,9.2,1
39,West Village,"The Standard, High Line",40.740854,-74.007952,1073,False,257,9.2,1


## Cluster 3

In [38]:
manhattan_hotels_data.loc[manhattan_hotels_data['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating,Cluster Labels
8,Lincoln Square,Mandarin Oriental,40.768987,-73.983017,319,False,88,9.2,2
13,Clinton,citizenM Hotel New York Times Square,40.761691,-73.984953,323,False,82,9.2,2
19,Midtown,citizenM Hotel New York Times Square,40.761691,-73.984953,323,False,82,9.2,2
22,Murray Hill,The NoMad Hotel,40.744981,-73.988819,366,False,91,9.4,2
24,Chelsea,The High Line Hotel,40.745924,-74.005389,252,False,47,9.1,2
27,Greenwich Village,The Bowery Hotel,40.726145,-73.991627,504,False,90,9.2,2
28,East Village,The Bowery Hotel,40.726145,-73.991627,504,False,90,9.2,2
42,Battery Park City,Conrad New York,40.714911,-74.015461,290,False,75,8.7,2
49,Noho,The Bowery Hotel,40.726145,-73.991627,504,False,90,9.2,2
55,Midtown South,The NoMad Hotel,40.744981,-73.988819,366,False,91,9.4,2


# Detailed Analysis :
|Cluster Labels|Analysis|
|:------------:|:------|
|0|The number of ***'Likes'*** and ***'Tips'*** are high but not as high as those in Cluster 2. The ratings of the hotels are mixed but they are mostly high.|
|1|The number of ***'Likes'*** and ***'Tips'*** are comparatively much lower than the hotels in the other clusters. The ratings are moderate and not as good as the hotels in the other clusters.|
|2|These hotels have a very high number of ***'Likes'*** and ***'Tips'***. Although the ratings of any of the hotels are not out of the charts they are really high.|

### Only take the necessary columns from the dataframe containing the hotel data of Staten Island. Remove the hotels that has not been rated.

In [39]:
staten_hotel_data1 = staten_hotel_data[staten_hotel_data['Rating'] != 'Not Rated yet']

In [40]:
staten_hotel_cluster = staten_hotel_data1.drop(['Neighborhood Name','Venue Name', 'Venue Latitude', 'Venue Longitude', 'Disliked'], axis = 1)

In [41]:
staten_hotel_cluster

Unnamed: 0,Like Counter,Tip Counter,Rating
1,0,0,6.6
2,4,11,4.9
4,7,4,6.2
5,7,4,6.2
8,0,0,6.6
9,4,11,4.9
10,67,19,8.4
11,14,4,7.0
12,18,9,6.7
13,7,4,6.2


# 6. Clustering the hotels of Staten Island!!

In [42]:
# set number of clusters
kclusters = 3

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(staten_hotel_cluster)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([2, 2, 2, 2, 2, 2, 1, 0, 0, 2], dtype=int32)

In [43]:
staten_hotel_data1['Cluster Labels'] = kmeans.labels_

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [44]:
staten_hotel_data1

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating,Cluster Labels
1,Travis,Fairfield Inn & Suites,40.586652,-74.19116,0,False,0,6.6,2
2,Travis,Comfort Inn,40.586191,-74.190216,4,False,11,4.9,2
4,Arrochar,Staten Island Navy Lodge,40.598734,-74.062288,7,False,4,6.2,2
5,Grasmere,Staten Island Navy Lodge,40.598734,-74.062288,7,False,4,6.2,2
8,Chelsea,Fairfield Inn & Suites,40.586652,-74.19116,0,False,0,6.6,2
9,Chelsea,Comfort Inn,40.586191,-74.190216,4,False,11,4.9,2
10,Bloomfield,Hilton Garden Inn Staten Island,40.614832,-74.176646,67,False,19,8.4,1
11,Bloomfield,Nicotra's Ballroom,40.614842,-74.1761,14,False,4,7.0,0
12,Bloomfield,Hampton Inn & Suites Staten Island,40.613095,-74.178888,18,False,9,6.7,0
13,Shore Acres,Staten Island Navy Lodge,40.598734,-74.062288,7,False,4,6.2,2


In [45]:
address = 'Staten Island, NY'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

  app.launch_new_instance()


The geograpical coordinate of Manhattan are 40.5834557, -74.1496048.


In [46]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, nei, ven, cluster in zip(staten_hotel_data1['Venue Latitude'], staten_hotel_data1['Venue Longitude'], staten_hotel_data1['Neighborhood Name'], staten_hotel_data1['Venue Name'], staten_hotel_data1['Cluster Labels']):
    label = folium.Popup(str(ven) + ',' + str(nei) + ',' + ' Cluster = ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Cluster 1

In [47]:
staten_hotel_data1.loc[staten_hotel_data1['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating,Cluster Labels
11,Bloomfield,Nicotra's Ballroom,40.614842,-74.1761,14,False,4,7.0,0
12,Bloomfield,Hampton Inn & Suites Staten Island,40.613095,-74.178888,18,False,9,6.7,0


## Cluster 2

In [48]:
staten_hotel_data1.loc[staten_hotel_data1['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating,Cluster Labels
10,Bloomfield,Hilton Garden Inn Staten Island,40.614832,-74.176646,67,False,19,8.4,1


## Cluster 3

In [49]:
staten_hotel_data1.loc[staten_hotel_data1['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood Name,Venue Name,Venue Latitude,Venue Longitude,Like Counter,Disliked,Tip Counter,Rating,Cluster Labels
1,Travis,Fairfield Inn & Suites,40.586652,-74.19116,0,False,0,6.6,2
2,Travis,Comfort Inn,40.586191,-74.190216,4,False,11,4.9,2
4,Arrochar,Staten Island Navy Lodge,40.598734,-74.062288,7,False,4,6.2,2
5,Grasmere,Staten Island Navy Lodge,40.598734,-74.062288,7,False,4,6.2,2
8,Chelsea,Fairfield Inn & Suites,40.586652,-74.19116,0,False,0,6.6,2
9,Chelsea,Comfort Inn,40.586191,-74.190216,4,False,11,4.9,2
13,Shore Acres,Staten Island Navy Lodge,40.598734,-74.062288,7,False,4,6.2,2


# Detailed Analysis :
|Cluster Labels|Analysis|
|:------------:|:------|
|0|These hotels have a higher number of ***'Likes'*** and ***'Tips'*** than other clusters. Although the ratings of any of the hotels are not out of the charts they are really high.|
|1|The number of ***'Likes'*** and ***'Tips'*** are comparatively much lower than the hotels in the other clusters. The ratings are moderate and not as good as the hotels in the other clusters.|
|2|The number of ***'Likes'*** and ***'Tips'*** are high but not as high as those in Cluster 0. The ratings of the hotels are mixed but they are mostly high.|