# Opening a New Asian Restaurant in New Delhi, India

Build a dataframe of neighborhoods in New Delhi, India by web scraping the data from the Wikipedia page

Get the geographical coordinates of the neighborhoods

Obtain the venue data for the neighborhoods from Foursquare API

Explore and cluster the neighborhoods

Select the best cluster to open a new Asian Restaurant

### IMPORT LIBRARIES

In [22]:
import numpy as np 

import pandas as pd 
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json 

from geopy.geocoders import Nominatim 
import geocoder

import requests 
from bs4 import BeautifulSoup 

from pandas.io.json import json_normalize 

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import folium 
print("Libraries imported.")

Libraries imported.


### Scrap data from Wikipedia page into a DataFrame

In [7]:
data = requests.get("https://en.wikipedia.org/wiki/Category:New_Delhi_district").text
soup = BeautifulSoup(data, 'html.parser')
neighborhoodList = []
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)
ndl_df = pd.DataFrame({"Neighborhood": neighborhoodList})

ndl_df.head()

Unnamed: 0,Neighborhood
0,New Delhi district
1,New Delhi
2,Chanakyapuri
3,Chittaranjan Park
4,"Connaught Place, New Delhi"


In [8]:
ndl_df.shape

(9, 1)

### Getting the Coordinates

In [9]:
address = 'New Delhi, India'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New Delhi are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New Delhi are 28.6141793, 77.2022662.


In [15]:
def get_latlng(neighborhood):
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, New Delhi, India'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [23]:
coords = [ get_latlng(neighborhood) for neighborhood in ndl_df["Neighborhood"].tolist() ]

In [24]:
coords

[[28.637200000000064, 77.28752000000003],
 [28.63095000000004, 77.21721000000008],
 [28.595060000000046, 77.18573000000004],
 [28.538400000000024, 77.24832000000004],
 [28.633940000000052, 77.21968000000004],
 [28.574350000000038, 77.22419000000008],
 [28.57815000000005, 77.20618000000007],
 [28.583620000000053, 77.16474000000005],
 [28.580996661117194, 77.18182278573488]]

In [25]:
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

In [27]:
ndl_df['Latitude'] = df_coords['Latitude']
ndl_df['Longitude'] = df_coords['Longitude']
print(ndl_df.shape)
ndl_df

(9, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,New Delhi district,28.6372,77.28752
1,New Delhi,28.63095,77.21721
2,Chanakyapuri,28.59506,77.18573
3,Chittaranjan Park,28.5384,77.24832
4,"Connaught Place, New Delhi",28.63394,77.21968
5,Kotla Mubarakpur Complex,28.57435,77.22419
6,Laxmibai Nagar,28.57815,77.20618
7,Moti Bagh,28.58362,77.16474
8,New Moti Bagh,28.580997,77.181823


In [28]:
address = 'New Delhi, India'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New Delhi, India {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New Delhi, India 28.6141793, 77.2022662.


In [31]:
map_ndl = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lng, neighborhood in zip(ndl_df['Latitude'], ndl_df['Longitude'], ndl_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_ndl)  
    
map_ndl

### Use the Foursquare API to explore the neighborhoods

In [32]:
CLIENT_ID = '4WBCETXVKJDAR2YMWPAP1CXVAG5JMJM2VRTT4NTLCX3VB0HD' 
CLIENT_SECRET = 'GI1ISOXKS1FG3FUS4R53XGCER5XULYDGAEUYG11NFQP1EK5G' 
VERSION = '20180605' 

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 4WBCETXVKJDAR2YMWPAP1CXVAG5JMJM2VRTT4NTLCX3VB0HD
CLIENT_SECRET:GI1ISOXKS1FG3FUS4R53XGCER5XULYDGAEUYG11NFQP1EK5G


#### The top 100 venues that are within a radius of 2500 meters.

In [33]:
radius = 2500
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(ndl_df['Latitude'], ndl_df['Longitude'], ndl_df['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [37]:
venues_df = pd.DataFrame(venues)

venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(788, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,New Delhi district,28.6372,77.28752,v3s mall,28.636856,77.285363,Arcade
1,New Delhi district,28.6372,77.28752,Classic Ice Cream Parlour,28.641731,77.293177,Arcade
2,New Delhi district,28.6372,77.28752,Pizza Hut,28.637009,77.286318,Pizza Place
3,New Delhi district,28.6372,77.28752,Bercos,28.639355,77.291209,Chinese Restaurant
4,New Delhi district,28.6372,77.28752,Shiv Tikki,28.648268,77.302475,Food Truck


In [38]:
print("Venues returned for each district")
venues_df.groupby(["Neighborhood"]).count()

Venues returned for each district


Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Chanakyapuri,82,82,82,82,82,82
Chittaranjan Park,100,100,100,100,100,100
"Connaught Place, New Delhi",100,100,100,100,100,100
Kotla Mubarakpur Complex,100,100,100,100,100,100
Laxmibai Nagar,100,100,100,100,100,100
Moti Bagh,67,67,67,67,67,67
New Delhi,100,100,100,100,100,100
New Delhi district,39,39,39,39,39,39
New Moti Bagh,100,100,100,100,100,100


In [39]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 112 uniques categories.


In [55]:
"Asian Restaurant" in venues_df['VenueCategory'].unique()

True

### Analyse Each Neighborhood

In [51]:
ndl_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

ndl_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

fixed_columns = [ndl_onehot.columns[-1]] + list(ndl_onehot.columns[:-1])
ndl_onehot = ndl_onehot[fixed_columns]

print(ndl_onehot.shape)
ndl_onehot.head()

(788, 113)


Unnamed: 0,Neighborhoods,Arcade,Art Gallery,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bar,Beer Garden,Bengali Restaurant,Bistro,Breakfast Spot,Burger Joint,Bus Station,Café,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Concert Hall,Convenience Store,Deli / Bodega,Dessert Shop,Diner,Dog Run,Donut Shop,Dumpling Restaurant,Electronics Store,English Restaurant,Fast Food Restaurant,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Frozen Yogurt Shop,Garden Center,Gastropub,Gourmet Shop,Gym,Gym / Fitness Center,Hardware Store,Historic Site,History Museum,Hot Dog Joint,Hotel,Hotel Bar,Hotel Pool,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Japanese Restaurant,Jazz Club,Karnataka Restaurant,Korean Restaurant,Light Rail Station,Lounge,Market,Mediterranean Restaurant,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Mosque,Movie Theater,Multiplex,Museum,Music Venue,Neighborhood,Nightclub,North Indian Restaurant,Northeast Indian Restaurant,Office,Park,Pharmacy,Pizza Place,Planetarium,Playground,Plaza,Pool,Portuguese Restaurant,Pub,Racetrack,Restaurant,River,Salad Place,Sandwich Place,Sculpture Garden,Seafood Restaurant,Shopping Mall,Smoke Shop,Snack Place,South Indian Restaurant,Spa,Spiritual Center,Stadium,Tea Room,Temple,Thai Restaurant,Theater,Tibetan Restaurant,Trail,Train Station,University,Vietnamese Restaurant,Wings Joint,Women's Store,Yoga Studio
0,New Delhi district,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,New Delhi district,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,New Delhi district,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,New Delhi district,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,New Delhi district,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [52]:
ndl_grouped = ndl_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(ndl_grouped.shape)
ndl_grouped

(9, 113)


Unnamed: 0,Neighborhoods,Arcade,Art Gallery,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bar,Beer Garden,Bengali Restaurant,Bistro,Breakfast Spot,Burger Joint,Bus Station,Café,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Concert Hall,Convenience Store,Deli / Bodega,Dessert Shop,Diner,Dog Run,Donut Shop,Dumpling Restaurant,Electronics Store,English Restaurant,Fast Food Restaurant,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Frozen Yogurt Shop,Garden Center,Gastropub,Gourmet Shop,Gym,Gym / Fitness Center,Hardware Store,Historic Site,History Museum,Hot Dog Joint,Hotel,Hotel Bar,Hotel Pool,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Japanese Restaurant,Jazz Club,Karnataka Restaurant,Korean Restaurant,Light Rail Station,Lounge,Market,Mediterranean Restaurant,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Mosque,Movie Theater,Multiplex,Museum,Music Venue,Neighborhood,Nightclub,North Indian Restaurant,Northeast Indian Restaurant,Office,Park,Pharmacy,Pizza Place,Planetarium,Playground,Plaza,Pool,Portuguese Restaurant,Pub,Racetrack,Restaurant,River,Salad Place,Sandwich Place,Sculpture Garden,Seafood Restaurant,Shopping Mall,Smoke Shop,Snack Place,South Indian Restaurant,Spa,Spiritual Center,Stadium,Tea Room,Temple,Thai Restaurant,Theater,Tibetan Restaurant,Trail,Train Station,University,Vietnamese Restaurant,Wings Joint,Women's Store,Yoga Studio
0,Chanakyapuri,0.0,0.0,0.012195,0.0,0.012195,0.0,0.012195,0.0,0.0,0.012195,0.012195,0.0,0.0,0.060976,0.036585,0.0,0.012195,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.02439,0.0,0.060976,0.012195,0.0,0.0,0.0,0.182927,0.0,0.012195,0.012195,0.0,0.012195,0.0,0.012195,0.012195,0.012195,0.012195,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.012195,0.02439,0.0,0.0,0.036585,0.012195,0.02439,0.0,0.02439,0.0,0.012195,0.012195,0.0,0.012195,0.012195,0.0,0.012195,0.0,0.036585,0.0,0.0,0.0,0.012195,0.012195,0.012195,0.012195,0.02439,0.012195,0.012195,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.0
1,Chittaranjan Park,0.0,0.0,0.02,0.0,0.03,0.03,0.03,0.0,0.01,0.01,0.0,0.0,0.0,0.06,0.04,0.01,0.0,0.07,0.0,0.0,0.01,0.0,0.03,0.0,0.0,0.02,0.0,0.0,0.0,0.05,0.01,0.01,0.0,0.02,0.0,0.01,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.03,0.0,0.07,0.01,0.03,0.01,0.0,0.0,0.0,0.0,0.02,0.05,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.06,0.0,0.01,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0
2,"Connaught Place, New Delhi",0.01,0.02,0.02,0.0,0.01,0.02,0.05,0.01,0.0,0.01,0.0,0.0,0.0,0.07,0.03,0.01,0.0,0.03,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.02,0.01,0.0,0.11,0.0,0.0,0.01,0.01,0.17,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.04,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.02,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Kotla Mubarakpur Complex,0.01,0.0,0.01,0.01,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.08,0.04,0.01,0.01,0.04,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.03,0.0,0.0,0.01,0.04,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.01,0.0,0.01,0.0,0.12,0.01,0.05,0.01,0.0,0.0,0.0,0.0,0.01,0.07,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.03,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.05,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0
4,Laxmibai Nagar,0.0,0.0,0.03,0.01,0.0,0.03,0.02,0.0,0.0,0.0,0.02,0.01,0.0,0.01,0.04,0.0,0.01,0.06,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.04,0.0,0.0,0.0,0.0,0.17,0.0,0.05,0.03,0.01,0.0,0.01,0.0,0.02,0.05,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.04,0.0,0.04,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.03,0.0,0.0,0.02,0.0,0.01,0.01,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0
5,Moti Bagh,0.0,0.0,0.029851,0.0,0.0,0.014925,0.014925,0.0,0.0,0.014925,0.014925,0.0,0.0,0.074627,0.029851,0.0,0.014925,0.029851,0.014925,0.0,0.0,0.014925,0.0,0.0,0.014925,0.0,0.014925,0.014925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029851,0.0,0.014925,0.0,0.014925,0.029851,0.014925,0.0,0.014925,0.0,0.0,0.044776,0.014925,0.014925,0.0,0.0,0.104478,0.0,0.014925,0.014925,0.0,0.0,0.0,0.0,0.014925,0.0,0.014925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014925,0.014925,0.0,0.0,0.014925,0.014925,0.014925,0.0,0.029851,0.014925,0.014925,0.0,0.014925,0.014925,0.014925,0.0,0.0,0.0,0.029851,0.0,0.0,0.0,0.0,0.014925,0.0,0.0,0.014925,0.014925,0.014925,0.0,0.0,0.014925,0.0,0.014925,0.0,0.0,0.0,0.0,0.014925,0.014925,0.0,0.0,0.0
6,New Delhi,0.01,0.02,0.02,0.0,0.01,0.02,0.05,0.01,0.0,0.01,0.0,0.0,0.0,0.08,0.03,0.01,0.0,0.04,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.02,0.01,0.0,0.11,0.0,0.0,0.01,0.01,0.15,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.04,0.0,0.01,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.02,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,New Delhi district,0.051282,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.076923,0.025641,0.0,0.0,0.025641,0.0,0.0,0.025641,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.051282,0.0,0.0,0.025641,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.025641,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.025641,0.025641,0.0,0.128205,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.025641,0.0,0.025641,0.0,0.0,0.051282,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.025641,0.0,0.025641
8,New Moti Bagh,0.0,0.0,0.04,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.02,0.0,0.0,0.07,0.04,0.0,0.01,0.02,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.01,0.02,0.0,0.0,0.0,0.0,0.05,0.01,0.01,0.0,0.0,0.17,0.0,0.03,0.01,0.0,0.01,0.0,0.0,0.03,0.02,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.04,0.01,0.01,0.0,0.01,0.0,0.02,0.0,0.01,0.01,0.01,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.03,0.01,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0


In [57]:
len(ndl_grouped[ndl_grouped["Asian Restaurant"] > 0])

8

#### Create a new DataFrame for Asian Restaurant data only

In [61]:
ndl_AR = ndl_grouped[["Neighborhoods","Asian Restaurant"]]
ndl_AR.head()

Unnamed: 0,Neighborhoods,Asian Restaurant
0,Chanakyapuri,0.012195
1,Chittaranjan Park,0.02
2,"Connaught Place, New Delhi",0.02
3,Kotla Mubarakpur Complex,0.01
4,Laxmibai Nagar,0.03


### Clustering the Neighborhoods

In [63]:
kclusters = 3

ndl_clustering = ndl_AR.drop(["Neighborhoods"], 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ndl_clustering)

kmeans.labels_[0:10]

array([0, 2, 2, 0, 1, 1, 2, 0, 1], dtype=int32)

In [64]:
ndl_merged = ndl_AR.copy()

ndl_merged["Cluster Labels"] = kmeans.labels_

In [65]:
ndl_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
ndl_merged.head()

Unnamed: 0,Neighborhood,Asian Restaurant,Cluster Labels
0,Chanakyapuri,0.012195,0
1,Chittaranjan Park,0.02,2
2,"Connaught Place, New Delhi",0.02,2
3,Kotla Mubarakpur Complex,0.01,0
4,Laxmibai Nagar,0.03,1


In [66]:
ndl_merged = ndl_merged.join(ndl_df.set_index("Neighborhood"), on="Neighborhood")

print(ndl_merged.shape)
ndl_merged.head()

(9, 5)


Unnamed: 0,Neighborhood,Asian Restaurant,Cluster Labels,Latitude,Longitude
0,Chanakyapuri,0.012195,0,28.59506,77.18573
1,Chittaranjan Park,0.02,2,28.5384,77.24832
2,"Connaught Place, New Delhi",0.02,2,28.63394,77.21968
3,Kotla Mubarakpur Complex,0.01,0,28.57435,77.22419
4,Laxmibai Nagar,0.03,1,28.57815,77.20618


In [67]:
print(ndl_merged.shape)
ndl_merged.sort_values(["Cluster Labels"], inplace=True)
ndl_merged

(9, 5)


Unnamed: 0,Neighborhood,Asian Restaurant,Cluster Labels,Latitude,Longitude
0,Chanakyapuri,0.012195,0,28.59506,77.18573
3,Kotla Mubarakpur Complex,0.01,0,28.57435,77.22419
7,New Delhi district,0.0,0,28.6372,77.28752
4,Laxmibai Nagar,0.03,1,28.57815,77.20618
5,Moti Bagh,0.029851,1,28.58362,77.16474
8,New Moti Bagh,0.04,1,28.580997,77.181823
1,Chittaranjan Park,0.02,2,28.5384,77.24832
2,"Connaught Place, New Delhi",0.02,2,28.63394,77.21968
6,New Delhi,0.02,2,28.63095,77.21721


### Visualizing the dataframe

In [69]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(ndl_merged['Latitude'], ndl_merged['Longitude'], ndl_merged['Neighborhood'], ndl_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examining the Clusters

#### Cluster 0

In [71]:
ndl_merged.loc[ndl_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Asian Restaurant,Cluster Labels,Latitude,Longitude
0,Chanakyapuri,0.012195,0,28.59506,77.18573
3,Kotla Mubarakpur Complex,0.01,0,28.57435,77.22419
7,New Delhi district,0.0,0,28.6372,77.28752


#### Cluster 1

In [72]:
ndl_merged.loc[ndl_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Asian Restaurant,Cluster Labels,Latitude,Longitude
4,Laxmibai Nagar,0.03,1,28.57815,77.20618
5,Moti Bagh,0.029851,1,28.58362,77.16474
8,New Moti Bagh,0.04,1,28.580997,77.181823


#### Cluster 2

In [74]:
ndl_merged.loc[ndl_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Asian Restaurant,Cluster Labels,Latitude,Longitude
1,Chittaranjan Park,0.02,2,28.5384,77.24832
2,"Connaught Place, New Delhi",0.02,2,28.63394,77.21968
6,New Delhi,0.02,2,28.63095,77.21721


### Observations/Conclusion:

Cluster 1 has the most amount of Asian Restaurants in the New Delhi Area and while Cluster 2 is not that behind, Cluster 0 has almost negligible presence in the Asian Restaurant Market. Therefore, this project recommends my friend to capitalize on these findings to open new Asian Restaurant in neighborhoods in cluster 0 with little to no competition or Cluster 2 where there is moderate competition.
Even though the market for Asian Restaurant doesn't seem to be saturated in Cluster 1, the other 2 clusters present a better opportunity at this point.