# Segmenting and Clustering Neighborhoods in Toronto

In [1]:
# Installing necessary packages
!pip install bs4
!pip install lxml

Collecting bs4
  Downloading https://files.pythonhosted.org/packages/10/ed/7e8b97591f6f456174139ec089c769f89a94a1a4025fe967691de971f314/bs4-0.0.1.tar.gz
Building wheels for collected packages: bs4
  Building wheel for bs4 (setup.py) ... [?25ldone
[?25h  Stored in directory: /home/nbuser/.cache/pip/wheels/a0/b0/b2/4f80b9456b87abedbc0bf2d52235414c3467d8889be38dd472
Successfully built bs4
Installing collected packages: bs4
Successfully installed bs4-0.0.1
[33mYou are using pip version 19.0.2, however version 19.0.3 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
[33mYou are using pip version 19.0.2, however version 19.0.3 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


### Import of necessary files

In [2]:
from bs4 import BeautifulSoup

import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /home/nbuser/anaconda3_420:

The following NEW packages will be INSTALLED:

    geographiclib: 1.49-py_0        conda-forge
    geopy:         1.18.1-py_0      conda-forge
    readline:      7.0-ha6073c6_4              

The following packages will be UPDATED:

    conda:         4.3.31-py35_0                --> 4.5.11-py35_0        conda-forge
    pycosat:       0.6.1-py35_1                 --> 0.6.3-py35h470a237_1 conda-forge

The following packages will be SUPERSEDED by a higher-priority channel:

    conda-env:     2.6.0-h36134e3_1             --> 2.6.0-1              conda-forge

conda-env-2.6. 100% |################################| Time: 0:00:00 694.00 kB/s
readline-7.0-h 100% |################################| Time: 0:00:00   6.80 MB/s
geographiclib- 100% |################################| Time: 0:00:00   2.45 MB/s
pycosat-0.6.3- 100% |########################

### Parsing of wiki page

In [3]:
website_url = requests.get('https://www.mapsofindia.com/pincode/india/maharashtra/mumbai/').text
soup = BeautifulSoup(website_url,'html.parser')
#print(soup.prettify())

### Accessing only Table Data

In [4]:
soup.table

<table><tr><th colspan="4" style="text-align: center;">Pincode Details</th></tr><tr><td><b>Location</b></td><td><b>Pincode </b></td><td><b>State </b></td><td><b>District </b></td></tr><tr><td><a href="https://www.mapsofindia.com/pincode/india/maharashtra/mumbai/a-i-staff-colony.html">A I staff colony</a></td><td><b>400029 </b></td><td>Maharashtra</td><td>Mumbai </td></tr><tr><td><a href="https://www.mapsofindia.com/pincode/india/maharashtra/mumbai/aareymilk-colony.html">Aareymilk Colony</a></td><td><b>400065 </b></td><td>Maharashtra</td><td>Mumbai </td></tr><tr><td><a href="https://www.mapsofindia.com/pincode/india/maharashtra/mumbai/agripada.html">Agripada</a></td><td><b>400011 </b></td><td>Maharashtra</td><td>Mumbai </td></tr><tr><td><a href="https://www.mapsofindia.com/pincode/india/maharashtra/mumbai/airport.html">Airport</a></td><td><b>400099 </b></td><td>Maharashtra</td><td>Mumbai </td></tr><tr><td><a href="https://www.mapsofindia.com/pincode/india/maharashtra/mumbai/ambewadi.htm

### Setting Dataframe columns

In [5]:
column_names = ['Postcode', 'Borough', 'Neighborhood'] 

### Setting Dataframe

In [6]:
df = pd.DataFrame(columns=column_names)

Postcode=[]
Borough=[]
Neighborhood=[]

### Assigning values in List

In [7]:
for row in soup.table.findAll('tr'):
    cells = row.findAll('td')
    if len(cells)==4: #Only extract table data with values
        if cells[0].find(text=True)=='Location':  #Only extract table body not heading
            continue;
        Postcode.append(cells[1].find(text=True))
        Neighborhood.append(cells[0].find(text=True))
        Borough.append(cells[3].find(text=True))
        

### Assinging list values into Dataframe

In [8]:
# print(Postcode)
df['Postcode']=Postcode
df['Neighborhood']=Neighborhood
df['Borough']=Borough


### Accessing the dataframe

In [9]:
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,400029,Mumbai,A I staff colony
1,400065,Mumbai,Aareymilk Colony
2,400011,Mumbai,Agripada
3,400099,Mumbai,Airport
4,400004,Mumbai,Ambewadi


### Viewing the shape of Dataframe

In [10]:
df.shape

(182, 3)

### Removing the Non assinged rows of Borough column

In [11]:
#Removing not assigned rows
df.drop(df[df['Borough'] == 'Not assigned'].index, inplace=True)

### Removing the non assigned values of Neighborhood and assigned borough values

In [12]:
df.loc[df.Neighborhood == 'Not assigned\n', "Neighborhood"] = df.Borough

### Accessing the dataframe

In [13]:
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,400029,Mumbai,A I staff colony
1,400065,Mumbai,Aareymilk Colony
2,400011,Mumbai,Agripada
3,400099,Mumbai,Airport
4,400004,Mumbai,Ambewadi


### New shape of dataframe as

In [14]:
df.shape

(182, 3)

### Count of duplicate values

In [15]:
#count of unique Postcode
len(df.Postcode.unique())

67

### Grouping and assinging the appended values of Neighborhood of same Postcode

In [16]:
df = df.groupby(['Postcode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()
df.columns = ['Postcode', 'Borough', 'Neighborhood']

### New DataFrame shape as

In [17]:
df.shape

(67, 3)

### Reading values of Latitude and Logitude from csv

In [18]:
temp = pd.read_csv('IN.txt', sep="\t")
temp = temp.loc[:,['Pincode','xyz','lat','log']]

df_latlon=temp.loc[temp['xyz'] == "Mumbai", ['Pincode','lat','log']]
df_latlon.drop_duplicates(subset=None, keep='first', inplace=True)

df_latlon = df_latlon.reset_index(drop=True)

df_latlon.columns = ['Postcode', 'Latitude', 'Longitude']
df_latlon.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,400001,18.6291,72.8919
1,400002,17.0509,73.291
2,400004,18.95,72.8167
3,400005,18.9069,72.8106
4,400006,18.95,72.7833


### merging of dataframe with Latitude and Longitude

In [64]:
# print(df.head())
# print(df_latlon.head())

# print(len(df))
# print(len(df_latlon))

# print (df[df.Postcode.isin(df_latlon.Postcode)])
df["Postcode"] = pd.to_numeric(df["Postcode"])

 
result = pd.merge(df, df_latlon, on='Postcode',how='inner')
print(result.head())


   Postcode  Borough                                       Neighborhood  \
0    400001  Mumbai   Bazargate, M.P.t., Mumbai., Stock Exchange, Ta...   
1    400002  Mumbai   Kalbadevi, Ramwadi, S. c. court, Shroff Mahaja...   
2    400004  Mumbai   Ambewadi, Charni Road, Chaupati, Girgaon, Madh...   
3    400005  Mumbai              Asvini, Colaba, Holiday Camp, V.W.t.c.   
4    400006  Mumbai                                        Malabar Hill   

   Latitude  Longitude  
0   18.6291    72.8919  
1   17.0509    73.2910  
2   18.9500    72.8167  
3   18.9069    72.8106  
4   18.9500    72.7833  


In [65]:
result.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,400001,Mumbai,"Bazargate, M.P.t., Mumbai., Stock Exchange, Ta...",18.6291,72.8919
1,400002,Mumbai,"Kalbadevi, Ramwadi, S. c. court, Shroff Mahaja...",17.0509,73.291
2,400004,Mumbai,"Ambewadi, Charni Road, Chaupati, Girgaon, Madh...",18.95,72.8167
3,400005,Mumbai,"Asvini, Colaba, Holiday Camp, V.W.t.c.",18.9069,72.8106
4,400006,Mumbai,Malabar Hill,18.95,72.7833


In [21]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(result['Borough'].unique()),
        result.shape[0]
    )
)

The dataframe has 1 boroughs and 51 neighborhoods.


### Use geopy library to get the latitude and longitude values of Mumbai.

In [22]:
address = 'Mumbai, India'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Mumbai are {}, {}.'.format(latitude, longitude))


  app.launch_new_instance()


The geograpical coordinate of Mumbai are 18.9387711, 72.8353355.


### Create a map of Mumbai with neighborhoods superimposed on top

In [23]:
# create map of Mumbai india using latitude and longitude values
map_mumbai = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(result['Latitude'], result['Longitude'], result['Borough'], result['Neighborhood']):
    label = '{}, {}'.format(result, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_mumbai)  
    
map_mumbai

### However, for illustration purposes, let's simplify the above map and segment and cluster only the neighborhoods in East Toronto. So let's slice the original dataframe and create a new dataframe of the East Toronto data.

In [122]:
# result['Borough'] = str(result['Borough'])


mumbai_data = result.reset_index(drop=True)
mumbai_data.head(10)

# mumbai_data = result[result['Borough'] == 'Mumbai'].reset_index(drop=True)
# mumbai_data.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,400001,Mumbai,"Bazargate, M.P.t., Mumbai., Stock Exchange, Ta...",18.6291,72.8919
1,400002,Mumbai,"Kalbadevi, Ramwadi, S. c. court, Shroff Mahaja...",17.0509,73.291
2,400004,Mumbai,"Ambewadi, Charni Road, Chaupati, Girgaon, Madh...",18.95,72.8167
3,400005,Mumbai,"Asvini, Colaba, Holiday Camp, V.W.t.c.",18.9069,72.8106
4,400006,Mumbai,Malabar Hill,18.95,72.7833
5,400007,Mumbai,"Bharat Nagar, Grant Road, N . s.patkar, S V ma...",18.9667,72.8167
6,400008,Mumbai,"Falkland Road, J.J.hospital, Kamathipura, M A ...",18.6291,72.8919
7,400009,Mumbai,"Chinchbunder, Princess Dock",18.6291,72.8919
8,400010,Mumbai,"Dockyard Road, Mazgaon, Mazgaon Dock, Mazgaon ...",18.3667,72.9333
9,400011,Mumbai,"Agripada, Chinchpokli, Haines Road, Jacob Circle",18.9833,72.8333


### Get the geographical coordinates of Mumbai

In [70]:
address = 'Mumbai, India'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of east_toronto are {}, {}.'.format(latitude, longitude))

  app.launch_new_instance()


The geograpical coordinate of east_toronto are 18.9387711, 72.8353355.


### Let's visualizat Mumbai the neighborhoods in it

In [71]:
# create map of Mumbai using latitude and longitude values
map_mumbai = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(mumbai_data['Latitude'], mumbai_data['Longitude'], mumbai_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_mumbai)  
    
map_mumbai

### Define Foursquare Credentials and Version�

In [72]:
CLIENT_ID = 'JMRORPLT5G4P1SHF3L1JRRXSFADNRKJT32PPH0FPCZSKUKW3' # your Foursquare ID
CLIENT_SECRET = 'LSVDKT4B2IDQWKYBI3YNJVSCJVUMHBKK30XZFBL4CS0KVV1V' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: JMRORPLT5G4P1SHF3L1JRRXSFADNRKJT32PPH0FPCZSKUKW3
CLIENT_SECRET:LSVDKT4B2IDQWKYBI3YNJVSCJVUMHBKK30XZFBL4CS0KVV1V


### Get the neighborhood's name.

In [73]:
mumbai_data.loc[0, 'Neighborhood']

'Bazargate, M.P.t., Mumbai., Stock Exchange, Tajmahal, Town Hall'

### Get the neighborhood's latitude and longitude values.

In [75]:
neighborhood_latitude = mumbai_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = mumbai_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = mumbai_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Bazargate, M.P.t., Mumbai., Stock Exchange, Tajmahal, Town Hall are 18.6291, 72.8919.


### top 100 venues that are in The Beaches within a radius of 500 meters

In [76]:
LIMIT = 100
radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url


'https://api.foursquare.com/v2/venues/explore?&client_id=JMRORPLT5G4P1SHF3L1JRRXSFADNRKJT32PPH0FPCZSKUKW3&client_secret=LSVDKT4B2IDQWKYBI3YNJVSCJVUMHBKK30XZFBL4CS0KVV1V&v=20180605&ll=18.6291,72.8919&radius=500&limit=100'

### Send the GET request and examine the resutls

In [77]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5c74db8c1ed2196e4a9ddfea'},
 'response': {'groups': [{'items': [{'reasons': {'count': 0,
       'items': [{'reasonName': 'globalInteractionReason',
         'summary': 'This spot is popular',
         'type': 'general'}]},
      'referralId': 'e-0-5b76f18bf96b2c002c97e093-0',
      'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/default_',
          'suffix': '.png'},
         'id': '5453de49498eade8af355881',
         'name': 'Business Service',
         'pluralName': 'Business Services',
         'primary': True,
         'shortName': 'Business Services'}],
       'id': '5b76f18bf96b2c002c97e093',
       'location': {'cc': 'IN',
        'city': 'Mumbai',
        'country': 'India',
        'distance': 107,
        'formattedAddress': ['Mumbai 400001', 'Mahārāshtra', 'India'],
        'labeledLatLngs': [{'label': 'display',
          'lat': 18.629999598704515,
          'lng': 72.89152979850769}],
     

### get_category_type function from the Foursquare lab.

In [78]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### clean the json and structure it into a pandas dataframe.

In [79]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,ACHI BIZ SERVICES PTE. LTD.,Business Service,18.63,72.89153
1,Royal Secret VIP,Night Market,18.62941,72.892592
2,INDIALAW LLP,Lawyer,18.629735,72.890382


### how many venues were returned by Foursquare?

In [80]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

3 venues were returned by Foursquare.


### function to repeat the same process to all the neighborhoods in East Toronto

In [81]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### code to run the above function on each neighborhood and create a new dataframe

In [82]:
mumbai_data_venues = getNearbyVenues(names=mumbai_data['Neighborhood'],
                                   latitudes=mumbai_data['Latitude'],
                                   longitudes=mumbai_data['Longitude']
                                  )



Bazargate, M.P.t., Mumbai., Stock Exchange, Tajmahal, Town Hall
Kalbadevi, Ramwadi, S. c. court, Shroff Mahajan, Thakurdwar
Ambewadi, Charni Road, Chaupati, Girgaon, Madhavbaug, Opera House
Asvini, Colaba, Holiday Camp, V.W.t.c.
Malabar Hill
Bharat Nagar, Grant Road, N . s.patkar, S V marg, Tardeo
Falkland Road, J.J.hospital, Kamathipura, M A marg, Mumbai Central
Chinchbunder, Princess Dock
Dockyard Road, Mazgaon, Mazgaon Dock, Mazgaon Road, V K bhavan
Agripada, Chinchpokli, Haines Road, Jacob Circle
Best Staff colony, Chamarbaug, Haffkin Institute, Lal Baug, Parel, Parel Naka
C G s colony, Delisle Road
Dadar, Dadar Colony, Naigaon
Sewri
Kapad Bazar, Mahim, Mahim Bazar, Mahim East, Mori Road
Dharavi, Dharavi Road
Worli, Worli Naka
Matunga Railway workshop
Central Building, Churchgate, Marine Lines
Nariman Point, New Yogakshema
New Prabhadevi road, Prabhadevi
Cumballa Hill, Cumballa Sea face, Dr Deshmukh marg, Gowalia Tank
V J b udyan
Bhawani Shankar, Bhawani Shankar rd, Gokhale Road, R

### check the size of the resulting dataframe

In [83]:
print(mumbai_data_venues.shape)
mumbai_data_venues.head()

(508, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Bazargate, M.P.t., Mumbai., Stock Exchange, Ta...",18.6291,72.8919,ACHI BIZ SERVICES PTE. LTD.,18.63,72.89153,Business Service
1,"Bazargate, M.P.t., Mumbai., Stock Exchange, Ta...",18.6291,72.8919,Royal Secret VIP,18.62941,72.892592,Night Market
2,"Bazargate, M.P.t., Mumbai., Stock Exchange, Ta...",18.6291,72.8919,INDIALAW LLP,18.629735,72.890382,Lawyer
3,"Ambewadi, Charni Road, Chaupati, Girgaon, Madh...",18.95,72.8167,Bachelors Juice House,18.950088,72.819185,Juice Bar
4,"Ambewadi, Charni Road, Chaupati, Girgaon, Madh...",18.95,72.8167,Bachelorr's Ice Creams,18.954113,72.815396,Ice Cream Shop


### how many venues were returned for each neighborhood

In [85]:
mumbai_data_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"A I staff colony, Santacruz P&t; colony",9,9,9,9,9,9
"Agripada, Chinchpokli, Haines Road, Jacob Circle",4,4,4,4,4,4
"Airport, International Airport, Sahar P & t colony, Sahargaon",9,9,9,9,9,9
"Ambewadi, Charni Road, Chaupati, Girgaon, Madhavbaug, Opera House",18,18,18,18,18,18
"Andheri East, Nagardas Road",8,8,8,8,8,8
"Andheri Railway station, H.M.p. school",2,2,2,2,2,2
"Andheri, Azad Nagar",7,7,7,7,7,7
Antop Hill,19,19,19,19,19,19
"Asvini, Colaba, Holiday Camp, V.W.t.c.",2,2,2,2,2,2
"B.N. bhavan, Bandra(east), Government Colony, Kherwadi",2,2,2,2,2,2


### how many unique categories can be curated from all the returned venues

In [87]:
print('There are {} uniques categories.'.format(len(mumbai_data_venues['Venue Category'].unique())))

There are 100 uniques categories.


### Analyse Each Neighborhood

In [89]:
# one hot encoding
mumbai_onehot = pd.get_dummies(mumbai_data_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
mumbai_onehot['Neighborhood'] = mumbai_data_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [mumbai_onehot.columns[-1]] + list(mumbai_onehot.columns[:-1])
mumbai_onehot = mumbai_onehot[fixed_columns]

mumbai_onehot.head()

Unnamed: 0,Neighborhood,Aquarium,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,Beach,Bengali Restaurant,Bike Rental / Bike Share,Bistro,Bookstore,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bus Station,Business Service,Café,Chaat Place,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Convenience Store,Cosmetics Shop,Cupcake Shop,Dance Studio,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Flea Market,Flower Shop,Food & Drink Shop,Food Truck,French Restaurant,Furniture / Home Store,Garden,Gastropub,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,History Museum,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indoor Play Area,Italian Restaurant,Japanese Restaurant,Juice Bar,Lawyer,Lounge,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Movie Theater,Multiplex,Music Venue,Night Market,Nightclub,Other Great Outdoors,Park,Performing Arts Venue,Pizza Place,Plaza,Pub,Racetrack,Recreation Center,Restaurant,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,Spa,Spanish Restaurant,Sports Club,Steakhouse,Supermarket,Surf Spot,Tea Room,Tennis Court,Theater,Train Station,Vegetarian / Vegan Restaurant,Wine Bar,Women's Store
0,"Bazargate, M.P.t., Mumbai., Stock Exchange, Ta...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Bazargate, M.P.t., Mumbai., Stock Exchange, Ta...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Bazargate, M.P.t., Mumbai., Stock Exchange, Ta...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Ambewadi, Charni Road, Chaupati, Girgaon, Madh...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Ambewadi, Charni Road, Chaupati, Girgaon, Madh...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### Examine New Dataframe set

In [90]:
mumbai_onehot.shape

(508, 101)

### group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [123]:
mumbai_grouped = mumbai_onehot.groupby('Neighborhood').mean().reset_index()
mumbai_grouped.head()

Unnamed: 0,Neighborhood,Aquarium,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,Beach,Bengali Restaurant,Bike Rental / Bike Share,Bistro,Bookstore,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bus Station,Business Service,Café,Chaat Place,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Convenience Store,Cosmetics Shop,Cupcake Shop,Dance Studio,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Flea Market,Flower Shop,Food & Drink Shop,Food Truck,French Restaurant,Furniture / Home Store,Garden,Gastropub,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,History Museum,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indoor Play Area,Italian Restaurant,Japanese Restaurant,Juice Bar,Lawyer,Lounge,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Movie Theater,Multiplex,Music Venue,Night Market,Nightclub,Other Great Outdoors,Park,Performing Arts Venue,Pizza Place,Plaza,Pub,Racetrack,Recreation Center,Restaurant,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,Spa,Spanish Restaurant,Sports Club,Steakhouse,Supermarket,Surf Spot,Tea Room,Tennis Court,Theater,Train Station,Vegetarian / Vegan Restaurant,Wine Bar,Women's Store
0,"A I staff colony, Santacruz P&t; colony",0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0
1,"Agripada, Chinchpokli, Haines Road, Jacob Circle",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Airport, International Airport, Sahar P & t co...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.222222,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Ambewadi, Charni Road, Chaupati, Girgaon, Madh...",0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.055556,0.0,0.0,0.055556,0.0,0.055556,0.0,0.0,0.0,0.055556,0.111111,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0
4,"Andheri East, Nagardas Road",0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0


### New Shape

In [92]:
mumbai_grouped.shape

(45, 101)

### each neighborhood along with the top 5 most common venues

In [93]:
num_top_venues = 5

for hood in mumbai_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = mumbai_grouped[mumbai_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----A I staff colony, Santacruz P&t; colony----
                        venue  freq
0      Furniture / Home Store  0.11
1  Modern European Restaurant  0.11
2            Asian Restaurant  0.11
3           Indian Restaurant  0.11
4                   Racetrack  0.11


----Agripada, Chinchpokli, Haines Road, Jacob Circle----
                  venue  freq
0                   Spa  0.25
1        History Museum  0.25
2  Fast Food Restaurant  0.25
3             Multiplex  0.25
4     Mobile Phone Shop  0.00


----Airport, International Airport, Sahar P & t colony, Sahargaon----
                             venue  freq
0                              Gym  0.22
1                   Ice Cream Shop  0.11
2                       Restaurant  0.11
3                             Café  0.11
4  Molecular Gastronomy Restaurant  0.11


----Ambewadi, Charni Road, Chaupati, Girgaon, Madhavbaug, Opera House----
               venue  freq
0  Indian Restaurant  0.11
1               Café  0.11
2           Aquarium  

### function to sort the venues in descending order.

In [94]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### new dataframe and display the top 10 venues for each neighborhood.

In [96]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = mumbai_grouped['Neighborhood']

for ind in np.arange(mumbai_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(mumbai_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"A I staff colony, Santacruz P&t; colony",Coffee Shop,Asian Restaurant,Tea Room,Indian Restaurant,Furniture / Home Store,Racetrack,Modern European Restaurant,Spa,Café,Fast Food Restaurant
1,"Agripada, Chinchpokli, Haines Road, Jacob Circle",History Museum,Multiplex,Fast Food Restaurant,Spa,Flea Market,Dance Studio,Department Store,Dessert Shop,Diner,Donut Shop
2,"Airport, International Airport, Sahar P & t co...",Gym,Ice Cream Shop,Gourmet Shop,Dessert Shop,Italian Restaurant,Restaurant,Molecular Gastronomy Restaurant,Café,Women's Store,Fast Food Restaurant
3,"Ambewadi, Charni Road, Chaupati, Girgaon, Madh...",Indian Restaurant,Café,Aquarium,Smoke Shop,Harbor / Marina,Gastropub,Ice Cream Shop,Food Truck,Juice Bar,Fast Food Restaurant
4,"Andheri East, Nagardas Road",Hotel,Diner,Vegetarian / Vegan Restaurant,Asian Restaurant,Hotel Bar,Indian Restaurant,Flower Shop,Department Store,Dessert Shop,Donut Shop
5,"Andheri Railway station, H.M.p. school",Dance Studio,Business Service,Flower Shop,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store,Farmers Market,Fast Food Restaurant
6,"Andheri, Azad Nagar",Pizza Place,Bus Station,Train Station,Athletics & Sports,Bakery,Gym,Bike Rental / Bike Share,Department Store,Dessert Shop,Diner
7,Antop Hill,Coffee Shop,Café,Indian Restaurant,Juice Bar,Convenience Store,Park,Gym,Pub,Farmers Market,Bus Station
8,"Asvini, Colaba, Holiday Camp, V.W.t.c.",Café,Garden,Women's Store,Flower Shop,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store,Farmers Market
9,"B.N. bhavan, Bandra(east), Government Colony, ...",Train Station,Mobile Phone Shop,Women's Store,Flea Market,Dance Studio,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store


### Run k-means to cluster the neighborhood into 3 clusters.

In [166]:
# set number of clusters
kclusters = 3

mumbai_grouped_clustering = mumbai_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(mumbai_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0], dtype=int32)

### New Dataframe of top 10 venues for each neighborhood

In [172]:
mumbai_merged = mumbai_data
mumbai_merged=mumbai_merged[6:]

# print(len(mumbai_merged))
# print(len(neighborhoods_venues_sorted))

# print(mumbai_merged)
# print("-------------")
# print(neighborhoods_venues_sorted)

# add clustering labels
mumbai_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
mumbai_merged = mumbai_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

mumbai_merged=mumbai_merged.dropna()
mumbai_merged.head(20) # check the last columns!

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,400008,Mumbai,"Falkland Road, J.J.hospital, Kamathipura, M A ...",18.6291,72.8919,0,Night Market,Lawyer,Business Service,Women's Store,Flea Market,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store
7,400009,Mumbai,"Chinchbunder, Princess Dock",18.6291,72.8919,0,Night Market,Lawyer,Business Service,Women's Store,Flea Market,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store
9,400011,Mumbai,"Agripada, Chinchpokli, Haines Road, Jacob Circle",18.9833,72.8333,0,History Museum,Multiplex,Fast Food Restaurant,Spa,Flea Market,Dance Studio,Department Store,Dessert Shop,Diner,Donut Shop
10,400012,Mumbai,"Best Staff colony, Chamarbaug, Haffkin Institu...",19.0,72.8333,0,Café,Dessert Shop,Indian Restaurant,Brewery,Lounge,Cosmetics Shop,Cocktail Bar,Recreation Center,Seafood Restaurant,Mediterranean Restaurant
12,400014,Mumbai,"Dadar, Dadar Colony, Naigaon",19.0201,72.8381,0,Indian Restaurant,Movie Theater,Breakfast Spot,Grocery Store,Juice Bar,Electronics Store,Plaza,Clothing Store,Fast Food Restaurant,Restaurant
13,400015,Mumbai,Sewri,19.0,72.85,0,Flea Market,Train Station,Bus Station,Cosmetics Shop,Women's Store,Dance Studio,Department Store,Dessert Shop,Diner,Donut Shop
14,400016,Mumbai,"Kapad Bazar, Mahim, Mahim Bazar, Mahim East, M...",19.0333,72.85,0,Women's Store,Train Station,Ice Cream Shop,Tennis Court,Department Store,Chinese Restaurant,Flea Market,Dance Studio,Dessert Shop,Diner
15,400017,Mumbai,"Dharavi, Dharavi Road",19.05,72.8667,0,Train Station,Snack Place,Indian Restaurant,Supermarket,Bus Station,Breakfast Spot,Seafood Restaurant,Fish & Chips Shop,Dance Studio,Department Store
16,400018,Mumbai,"Worli, Worli Naka",19.0167,72.8167,1,Café,Nightclub,French Restaurant,Sports Club,Fish & Chips Shop,Women's Store,Flea Market,Department Store,Dessert Shop,Diner
17,400019,Mumbai,Matunga Railway workshop,19.0333,72.85,2,Women's Store,Train Station,Ice Cream Shop,Tennis Court,Department Store,Chinese Restaurant,Flea Market,Dance Studio,Dessert Shop,Diner


###  visualize the resulting clusters

In [168]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(mumbai_merged['Latitude'], mumbai_merged['Longitude'], mumbai_merged['Neighborhood'], mumbai_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Cluster 1

In [169]:
mumbai_merged.loc[mumbai_merged['Cluster Labels'] == 0, mumbai_merged.columns[[1] + list(range(5, mumbai_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Mumbai,0,Night Market,Lawyer,Business Service,Women's Store,Flea Market,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store
7,Mumbai,0,Night Market,Lawyer,Business Service,Women's Store,Flea Market,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store
9,Mumbai,0,History Museum,Multiplex,Fast Food Restaurant,Spa,Flea Market,Dance Studio,Department Store,Dessert Shop,Diner,Donut Shop
10,Mumbai,0,Café,Dessert Shop,Indian Restaurant,Brewery,Lounge,Cosmetics Shop,Cocktail Bar,Recreation Center,Seafood Restaurant,Mediterranean Restaurant
12,Mumbai,0,Indian Restaurant,Movie Theater,Breakfast Spot,Grocery Store,Juice Bar,Electronics Store,Plaza,Clothing Store,Fast Food Restaurant,Restaurant
13,Mumbai,0,Flea Market,Train Station,Bus Station,Cosmetics Shop,Women's Store,Dance Studio,Department Store,Dessert Shop,Diner,Donut Shop
14,Mumbai,0,Women's Store,Train Station,Ice Cream Shop,Tennis Court,Department Store,Chinese Restaurant,Flea Market,Dance Studio,Dessert Shop,Diner
15,Mumbai,0,Train Station,Snack Place,Indian Restaurant,Supermarket,Bus Station,Breakfast Spot,Seafood Restaurant,Fish & Chips Shop,Dance Studio,Department Store
18,Mumbai,0,Coffee Shop,Asian Restaurant,Tea Room,Indian Restaurant,Furniture / Home Store,Racetrack,Modern European Restaurant,Spa,Café,Fast Food Restaurant
19,Mumbai,0,Restaurant,Italian Restaurant,Fast Food Restaurant,Indian Restaurant,Hotel,Coffee Shop,Chaat Place,Wine Bar,French Restaurant,Japanese Restaurant


### Cluser 3

In [170]:
mumbai_merged.loc[mumbai_merged['Cluster Labels'] == 1, mumbai_merged.columns[[1] + list(range(5, mumbai_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,Mumbai,1,Café,Nightclub,French Restaurant,Sports Club,Fish & Chips Shop,Women's Store,Flea Market,Department Store,Dessert Shop,Diner
33,Mumbai,1,Dance Studio,Business Service,Flower Shop,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store,Farmers Market,Fast Food Restaurant
35,Mumbai,1,Food & Drink Shop,Surf Spot,Flower Shop,Dance Studio,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store,Farmers Market
44,Mumbai,1,Indian Restaurant,Café,Chinese Restaurant,Coffee Shop,Bakery,Restaurant,Ice Cream Shop,Bar,Beach,Bengali Restaurant
45,Mumbai,1,Indian Restaurant,Smoke Shop,Fast Food Restaurant,Bookstore,Snack Place,Women's Store,Dance Studio,Department Store,Dessert Shop,Diner


### Cluster 3

In [171]:
mumbai_merged.loc[mumbai_merged['Cluster Labels'] == 2, mumbai_merged.columns[[1] + list(range(5, mumbai_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Mumbai,2,Women's Store,Train Station,Ice Cream Shop,Tennis Court,Department Store,Chinese Restaurant,Flea Market,Dance Studio,Dessert Shop,Diner
25,Mumbai,2,Coffee Shop,Café,Indian Restaurant,Juice Bar,Convenience Store,Park,Gym,Pub,Farmers Market,Bus Station
31,Mumbai,2,Coffee Shop,Café,Indian Restaurant,Juice Bar,Convenience Store,Park,Gym,Pub,Farmers Market,Bus Station
