# ----- Best Place for Groceries Warehouse -----

## Let's first import the libraries that we need in this project

In [1]:
import pandas as pd 
import numpy as np
import folium
import requests
from geopy.geocoders import Nominatim
import json
from pandas.io.json import json_normalize
import bs4 as bs
import lxml.html as lh
import urllib.request

In [2]:
# URL for Postal Codes of Canada
postal_code_url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [3]:
page = urllib.request.urlopen(postal_code_url).read()
soup = bs.BeautifulSoup(page, 'lxml')
table = soup.find('table', class_='wikitable')

## Extracting data for Postal Codes Table from website

In [4]:
# extracting header and cells data from the table
header = [head.findAll(text=True)[0].strip() for head in table.findAll('th')]
data = [[td.findAll(text=True)[0].strip() for td in tr.findAll('td')] for tr in table.findAll('tr')]
data = [row for row in data if len(row) == 3]

# Make DataFrame
raw_data = pd.DataFrame(data,columns=header)
raw_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


## Data Cleaning
Remove the rows that don't have any Borough assigned and also make the borough as neighbourhood whose neighbourhood has not assigned.

In [5]:
raw_data = raw_data[raw_data['Borough'] != 'Not assigned'].reset_index(drop=True)
raw_data.loc[raw_data['Neighbourhood'] == 'Not assigned', ['Neighbourhood']] = raw_data['Borough']
raw_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


## Postal Codes of Canada

In [6]:
toronto_codes = raw_data.groupby(['Postcode','Borough'])['Neighbourhood'].apply(', '.join).reset_index()
toronto_codes.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


## * ------------------------------------------------------------------------------------------------------------------------- *

## Download geolocation data set for the Postal Codes

In [7]:
url = 'https://cocl.us/Geospatial_data'
!wget -q -O 'toronto_m.geospatial_data.csv' url
locs = pd.read_csv(url)
locs.head()

'wget' is not recognized as an internal or external command,
operable program or batch file.


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [8]:
toronto_codes = toronto_codes.set_index('Postcode') # set Postcode as index
toronto_codes.rename_axis('Postal Code', axis='index', inplace=True) # Change the name of index to Postal Code
toronto_codes.head()

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Rouge, Malvern"
M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


In [9]:
locs.set_index('Postal Code', inplace=True) # set Postal Code as Index
locs.head()

Unnamed: 0_level_0,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476


In [10]:
# Join both dataframe; Toronto Postal Codes geolocation data with Borough and neighbourhoods of Toronto
toronto = toronto_codes.join(locs) 
toronto.head()

Unnamed: 0_level_0,Borough,Neighbourhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
M1G,Scarborough,Woburn,43.770992,-79.216917
M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [11]:
toronto = toronto.reset_index()
toronto.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## * ------------------------------------------------------------------------------------------------------------------------- *

In [12]:
toronto['Borough'].unique()

array(['Scarborough', 'North York', 'East York', 'East Toronto',
       'Central Toronto', 'Downtown Toronto', 'York', 'West Toronto',
       "Queen's Park", 'Mississauga', 'Etobicoke'], dtype=object)

## By using Geopy library Nominatim function, find coordinates of Toronto

In [13]:
address = 'Toronto, Canada'
geolocator = Nominatim(user_agent='t_agent')
t_location = geolocator.geocode(address)
t_latitude = t_location.latitude
t_longitude = t_location.longitude
print(f'Toronto lat: {t_latitude}, long: {t_longitude}')

Toronto lat: 43.653963, long: -79.387207


In [14]:
# Visualize Toronto Map showing all neighbourhoods with boroughs.

map_toronto = folium.Map(location=[t_latitude, t_longitude], zoom_start=10.5)

for lat, lng, pc, borough, neigh in zip(toronto['Latitude'],
                                       toronto['Longitude'],
                                       toronto['Postal Code'],
                                       toronto['Borough'],
                                       toronto['Neighbourhood']):
    label = '{}, {}, {}'.format(pc, neigh, borough)
    label = folium.Popup(label)
    folium.CircleMarker([lat,lng],
                       radius=4,
                       popup=label,
                       color='blue',
                       fill=True,
                       fill_color='lightblue',
                       fill_opacity=0.7).add_to(map_toronto)
    
map_toronto

## Make Scarborough dataframe with all scarborough neighbourhood's information as the contractor made his mind to build warehouse in Scarborough only.

In [15]:
scarborough = toronto[toronto['Borough'] == 'Scarborough']
scarborough

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


## Get the Scarborough geolocation coordinates to visualize the map of Scarborough with all its neighbourhoods.

In [16]:
s_address = 'Scarborough, Canada'
geolocator = Nominatim(user_agent='s_agent')
location = geolocator.geocode(s_address)
s_lat = location.latitude
s_lng = location.longitude
print(f'Scarborough lat: {s_lat}, long: {s_lng}')

Scarborough lat: 43.773077, long: -79.257774


In [17]:
# Visualize Scraborough map showing all its neighbourhoods

map_scarborough = folium.Map(location=[s_lat, s_lng], zoom_start=12)

for lat, lng, label in zip(scarborough['Latitude'],
                          scarborough['Longitude'],
                          scarborough['Neighbourhood']):
    label = folium.Popup(label)
    folium.CircleMarker([lat,lng],
                       radius=4,
                       popup=label,
                       color='blue',
                       fill=True,
                       fill_color='royalblue',
                       fill_opacity=0.7).add_to(map_scarborough)
    
map_scarborough 

## Foursquare:

In [18]:
# Defining details for Foursquare method through which we will get all the venues details from each neighbourhood.

CLIENT_ID = '** hidden **'
CLIENT_SECRET = '** hidden **'
VERSION = '20180605'

In [19]:
result_ds = []
radius = 1000
LIMIT = 500

for pc, lat, lng, neigh in zip(list(scarborough['Postal Code']),
                               list(scarborough['Latitude']),
                               list(scarborough['Longitude']),
                               list(scarborough['Neighbourhood'])):
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID,
                                                                                                                               CLIENT_SECRET,
                                                                                                                               VERSION,
                                                                                                                               lat,
                                                                                                                               lng,
                                                                                                                               radius,
                                                                                                                               LIMIT)
    results = requests.get(url).json()['response']['groups'][0]['items']
    temp = {}
    temp['Postal Code'] = pc
    temp['Latitude'] = lat
    temp['Longitude'] = lng
    temp['Neughbourhood'] = neigh
    temp['Results'] = results
    result_ds.append(temp)
    
result_ds

[{'Postal Code': 'M1B',
  'Latitude': 43.806686299999996,
  'Longitude': -79.19435340000001,
  'Neughbourhood': 'Rouge, Malvern',
  'Results': [{'reasons': {'count': 0,
     'items': [{'summary': 'This spot is popular',
       'type': 'general',
       'reasonName': 'globalInteractionReason'}]},
    'venue': {'id': '4d669cba83865481c948fa53',
     'name': 'Images Salon & Spa',
     'location': {'address': '8130 Sheppard Ave E',
      'crossStreet': 'Morningside Ave',
      'lat': 43.80228301948931,
      'lng': -79.19856472801668,
      'labeledLatLngs': [{'label': 'display',
        'lat': 43.80228301948931,
        'lng': -79.19856472801668}],
      'distance': 595,
      'postalCode': 'M1B 3W3',
      'cc': 'CA',
      'city': 'Toronto',
      'state': 'ON',
      'country': 'Canada',
      'formattedAddress': ['8130 Sheppard Ave E (Morningside Ave)',
       'Toronto ON M1B 3W3',
       'Canada']},
     'categories': [{'id': '4bf58dd8d48988d1ed941735',
       'name': 'Spa',
       '

In [20]:
# Make new DataFrame that will store information that we extracted from the Foursquare.

scar_data = pd.DataFrame(columns=['Postal Code', 'Neighbourhood', 'Latitude', 'Longitude', 'Venue', 'Venue Lat', 'Venue Long', 'Venue Category', 'Venue Summary', 'Distance'])
scar_data

Unnamed: 0,Postal Code,Neighbourhood,Latitude,Longitude,Venue,Venue Lat,Venue Long,Venue Category,Venue Summary,Distance


In [21]:
# Inserting all the values into the Scarborough DataFrame extracted from the Foursquare.

for neigh in result_ds:
    pc = neigh['Postal Code']
    neighb = neigh['Neughbourhood']
    lat = neigh['Latitude']
    lng = neigh['Longitude']
    
    for venues in neigh['Results']:
        name = venues['venue']['name']
        cat = venues['venue']['categories'][0]['name']
        summ = venues['reasons']['items'][0]['summary']
        dist = venues['venue']['location']['distance']
        v_lat = venues['venue']['location']['lat']
        v_lng = venues['venue']['location']['lng']
        
        scar_data = scar_data.append({'Postal Code': pc, 'Neighbourhood': neighb, 'Latitude': lat, 'Longitude': lng,
                          'Venue': name, 'Venue Lat': v_lat, 'Venue Long': v_lng, 'Venue Category': cat, 'Venue Summary': summ, 'Distance': dist}, ignore_index=True)
        
scar_data.head()

Unnamed: 0,Postal Code,Neighbourhood,Latitude,Longitude,Venue,Venue Lat,Venue Long,Venue Category,Venue Summary,Distance
0,M1B,"Rouge, Malvern",43.806686,-79.194353,Images Salon & Spa,43.802283,-79.198565,Spa,This spot is popular,595
1,M1B,"Rouge, Malvern",43.806686,-79.194353,Caribbean Wave,43.798558,-79.195777,Caribbean Restaurant,This spot is popular,912
2,M1B,"Rouge, Malvern",43.806686,-79.194353,Staples Morningside,43.800285,-79.196607,Paper / Office Supplies Store,This spot is popular,735
3,M1B,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.802008,-79.19808,Fast Food Restaurant,This spot is popular,600
4,M1B,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant,This spot is popular,387


In [22]:
scar_data.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Postal Code,Latitude,Longitude,Venue,Venue Lat,Venue Long,Venue Category,Venue Summary,Distance
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Agincourt,50,50,50,50,50,50,50,50,50
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",30,30,30,30,30,30,30,30,30
"Birch Cliff, Cliffside West",14,14,14,14,14,14,14,14,14
Cedarbrae,27,27,27,27,27,27,27,27,27
"Clairlea, Golden Mile, Oakridge",29,29,29,29,29,29,29,29,29
"Clarks Corners, Sullivan, Tam O'Shanter",34,34,34,34,34,34,34,34,34
"Cliffcrest, Cliffside, Scarborough Village West",12,12,12,12,12,12,12,12,12
"Dorset Park, Scarborough Town Centre, Wexford Heights",42,42,42,42,42,42,42,42,42
"East Birchmount Park, Ionview, Kennedy Park",23,23,23,23,23,23,23,23,23
"Guildwood, Morningside, West Hill",22,22,22,22,22,22,22,22,22


## One Hot Encoding

In [23]:
# Make dummies of Venue Category
dum = pd.get_dummies(scar_data['Venue Category'])
dum.head()

Unnamed: 0,African Restaurant,Asian Restaurant,Athletics & Sports,Auto Garage,Auto Workshop,Automotive Shop,BBQ Joint,Badminton Court,Bakery,Bank,...,Tennis Court,Thai Restaurant,Thrift / Vintage Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wings Joint,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
# Insert Neighbourhood column
dum['Neighbourhood'] = scar_data['Neighbourhood']
dum.head()

Unnamed: 0,African Restaurant,Asian Restaurant,Athletics & Sports,Auto Garage,Auto Workshop,Automotive Shop,BBQ Joint,Badminton Court,Bakery,Bank,...,Thai Restaurant,Thrift / Vintage Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wings Joint,Yoga Studio,Neighbourhood
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Rouge, Malvern"
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Rouge, Malvern"
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Rouge, Malvern"
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Rouge, Malvern"
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Rouge, Malvern"


In [25]:
# Make the Neighbourhood column, column 1.

feat = [dum.columns[-1]] + list(dum.columns[:-1])

In [26]:
# Creating OneHot Encoding DataFrame.
onehot = dum[feat]
onehot.head()

Unnamed: 0,Neighbourhood,African Restaurant,Asian Restaurant,Athletics & Sports,Auto Garage,Auto Workshop,Automotive Shop,BBQ Joint,Badminton Court,Bakery,...,Tennis Court,Thai Restaurant,Thrift / Vintage Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wings Joint,Yoga Studio
0,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [27]:
onehot = onehot.groupby('Neighbourhood').sum()
onehot.head()

Unnamed: 0_level_0,African Restaurant,Asian Restaurant,Athletics & Sports,Auto Garage,Auto Workshop,Automotive Shop,BBQ Joint,Badminton Court,Bakery,Bank,...,Tennis Court,Thai Restaurant,Thrift / Vintage Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wings Joint,Yoga Studio
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Agincourt,0,0,0,0,0,0,1,0,2,1,...,0,0,0,0,0,0,0,1,0,0
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",0,0,0,0,0,0,1,0,2,0,...,0,0,0,0,0,1,0,0,0,0
"Birch Cliff, Cliffside West",0,1,0,0,0,0,0,0,0,1,...,0,1,0,0,0,0,0,0,0,0
Cedarbrae,0,1,1,0,0,0,0,0,3,1,...,0,1,0,0,0,0,0,0,1,1
"Clairlea, Golden Mile, Oakridge",0,0,0,0,0,0,0,0,2,1,...,0,0,0,1,0,0,0,0,0,0


## List of Venue Categories where Goceries Contractor supplies his groceries
By using this list we can find the venues of these categories and then check how many are there in Scarborough and then find the best neighbourhood with most Venues like these, that can profit contractor.

In [28]:
# Making a list of places where Groceries contractor supplies his groceries.

important_features = [
 'Asian Restaurant',  
 'Bakery',     
 'Breakfast Spot',
 'Burger Joint',   
 'Cajun / Creole Restaurant',
 'Cantonese Restaurant',
 'Caribbean Restaurant',
 'Chinese Restaurant', 
 'Diner',
 'Fast Food Restaurant',
 'Fish Market',
 'Food & Drink Shop',
 'Fried Chicken Joint',
 'Fruit & Vegetable Store', 
 'Greek Restaurant',
 'Grocery Store', 
 'Hakka Restaurant', 
 'Hong Kong Restaurant',
 'Hotpot Restaurant', 
 'Indian Restaurant',
 'Italian Restaurant',
 'Japanese Restaurant',
 'Korean Restaurant',
 'Latin American Restaurant',
 'Malay Restaurant', 
 'Mediterranean Restaurant', 
 'Mexican Restaurant',
 'Middle Eastern Restaurant', 
 'Noodle House', 
 'Pizza Place', 
 'Restaurant',
 'Sandwich Place',
 'Seafood Restaurant',
 'Shanghai Restaurant', 
 'Sushi Restaurant',
 'Taiwanese Restaurant', 
 'Thai Restaurant', 
 'Vegetarian / Vegan Restaurant', 
 'Vietnamese Restaurant',
 'Wings Joint']

In [29]:
# Make new DataFrame of important categories to whom contractor supplies
scar_onehot = onehot[important_features]
scar_onehot.head()

Unnamed: 0_level_0,Asian Restaurant,Bakery,Breakfast Spot,Burger Joint,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Diner,Fast Food Restaurant,...,Restaurant,Sandwich Place,Seafood Restaurant,Shanghai Restaurant,Sushi Restaurant,Taiwanese Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Agincourt,0,2,1,0,0,1,2,8,0,0,...,2,2,1,1,1,0,0,0,1,0
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",0,2,0,0,0,0,1,6,0,2,...,0,0,0,0,0,0,0,1,0,0
"Birch Cliff, Cliffside West",1,0,0,0,0,0,0,0,1,1,...,1,0,0,0,0,0,1,0,0,0
Cedarbrae,1,3,0,1,0,0,1,1,0,1,...,0,0,0,0,0,0,1,0,0,1
"Clairlea, Golden Mile, Oakridge",0,2,0,0,0,0,0,0,2,2,...,0,1,0,0,0,0,0,0,0,0


In [30]:
feat_list = list(scar_onehot.columns.values)
feat_list

['Asian Restaurant',
 'Bakery',
 'Breakfast Spot',
 'Burger Joint',
 'Cajun / Creole Restaurant',
 'Cantonese Restaurant',
 'Caribbean Restaurant',
 'Chinese Restaurant',
 'Diner',
 'Fast Food Restaurant',
 'Fish Market',
 'Food & Drink Shop',
 'Fried Chicken Joint',
 'Fruit & Vegetable Store',
 'Greek Restaurant',
 'Grocery Store',
 'Hakka Restaurant',
 'Hong Kong Restaurant',
 'Hotpot Restaurant',
 'Indian Restaurant',
 'Italian Restaurant',
 'Japanese Restaurant',
 'Korean Restaurant',
 'Latin American Restaurant',
 'Malay Restaurant',
 'Mediterranean Restaurant',
 'Mexican Restaurant',
 'Middle Eastern Restaurant',
 'Noodle House',
 'Pizza Place',
 'Restaurant',
 'Sandwich Place',
 'Seafood Restaurant',
 'Shanghai Restaurant',
 'Sushi Restaurant',
 'Taiwanese Restaurant',
 'Thai Restaurant',
 'Vegetarian / Vegan Restaurant',
 'Vietnamese Restaurant',
 'Wings Joint']

In [31]:
# Making list of restaurants only from all the venues

restaurant_list = []
for num, value in enumerate(feat_list):
    if value.find('Restaurant') != (-1):
        restaurant_list.append(value)
restaurant_list       

['Asian Restaurant',
 'Cajun / Creole Restaurant',
 'Cantonese Restaurant',
 'Caribbean Restaurant',
 'Chinese Restaurant',
 'Fast Food Restaurant',
 'Greek Restaurant',
 'Hakka Restaurant',
 'Hong Kong Restaurant',
 'Hotpot Restaurant',
 'Indian Restaurant',
 'Italian Restaurant',
 'Japanese Restaurant',
 'Korean Restaurant',
 'Latin American Restaurant',
 'Malay Restaurant',
 'Mediterranean Restaurant',
 'Mexican Restaurant',
 'Middle Eastern Restaurant',
 'Restaurant',
 'Seafood Restaurant',
 'Shanghai Restaurant',
 'Sushi Restaurant',
 'Taiwanese Restaurant',
 'Thai Restaurant',
 'Vegetarian / Vegan Restaurant',
 'Vietnamese Restaurant']

In [32]:
# Sum the values of all the restaurants and insert it into new column name 'Total Restaurants'
scar_onehot['Total Restaurants'] = scar_onehot[restaurant_list].sum(axis=1)
scar_onehot.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0_level_0,Asian Restaurant,Bakery,Breakfast Spot,Burger Joint,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Diner,Fast Food Restaurant,...,Sandwich Place,Seafood Restaurant,Shanghai Restaurant,Sushi Restaurant,Taiwanese Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint,Total Restaurants
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Agincourt,0,2,1,0,0,1,2,8,0,0,...,2,1,1,1,0,0,0,1,0,21
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",0,2,0,0,0,0,1,6,0,2,...,0,0,0,0,0,0,1,0,0,13
"Birch Cliff, Cliffside West",1,0,0,0,0,0,0,0,1,1,...,0,0,0,0,0,1,0,0,0,4
Cedarbrae,1,3,0,1,0,0,1,1,0,1,...,0,0,0,0,0,1,0,0,1,8
"Clairlea, Golden Mile, Oakridge",0,2,0,0,0,0,0,0,2,2,...,1,0,0,0,0,0,0,0,0,3


In [33]:
# Drop Restaurant columns as we already inserted 'Total Restaurant' column in the dataframe
scar_onehot = scar_onehot.drop(restaurant_list, axis=1)
scar_onehot

Unnamed: 0_level_0,Bakery,Breakfast Spot,Burger Joint,Diner,Fish Market,Food & Drink Shop,Fried Chicken Joint,Fruit & Vegetable Store,Grocery Store,Noodle House,Pizza Place,Sandwich Place,Wings Joint,Total Restaurants
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Agincourt,2,1,0,0,0,0,0,0,1,1,2,2,0,21
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",2,0,0,0,0,0,0,0,0,2,2,0,0,13
"Birch Cliff, Cliffside West",0,0,0,1,0,0,0,0,0,0,0,0,0,4
Cedarbrae,3,0,1,0,0,0,1,0,1,0,1,0,1,8
"Clairlea, Golden Mile, Oakridge",2,0,0,2,0,0,0,0,1,0,1,1,0,3
"Clarks Corners, Sullivan, Tam O'Shanter",1,0,0,0,0,0,1,0,1,1,2,2,0,12
"Cliffcrest, Cliffside, Scarborough Village West",0,0,1,0,0,0,0,0,0,0,3,0,0,3
"Dorset Park, Scarborough Town Centre, Wexford Heights",1,0,2,0,0,0,1,0,1,0,1,1,1,11
"East Birchmount Park, Ionview, Kennedy Park",0,0,1,0,0,0,0,0,2,0,1,1,0,4
"Guildwood, Morningside, West Hill",0,0,1,0,0,1,1,0,0,0,4,1,0,3


In [34]:
# Now make the list of all the joints from the venues

joint_list = []
for num, value in enumerate(list(scar_onehot.columns.values)):
    if value.find('Joint') != (-1):
        joint_list.append(value)
        
joint_list

['Burger Joint', 'Fried Chicken Joint', 'Wings Joint']

In [35]:
# Add new column 'Total joints' with sum of all the joint values. 
# and drop other joint columns from the dataframe as we already inserted 'Total joint' column.

scar_onehot['Total Joints'] = scar_onehot[joint_list].sum(axis=1)
scar_onehot = scar_onehot.drop(joint_list, axis=1)
scar_onehot

Unnamed: 0_level_0,Bakery,Breakfast Spot,Diner,Fish Market,Food & Drink Shop,Fruit & Vegetable Store,Grocery Store,Noodle House,Pizza Place,Sandwich Place,Total Restaurants,Total Joints
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Agincourt,2,1,0,0,0,0,1,1,2,2,21,0
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",2,0,0,0,0,0,0,2,2,0,13,0
"Birch Cliff, Cliffside West",0,0,1,0,0,0,0,0,0,0,4,0
Cedarbrae,3,0,0,0,0,0,1,0,1,0,8,3
"Clairlea, Golden Mile, Oakridge",2,0,2,0,0,0,1,0,1,1,3,0
"Clarks Corners, Sullivan, Tam O'Shanter",1,0,0,0,0,0,1,1,2,2,12,1
"Cliffcrest, Cliffside, Scarborough Village West",0,0,0,0,0,0,0,0,3,0,3,1
"Dorset Park, Scarborough Town Centre, Wexford Heights",1,0,0,0,0,0,1,0,1,1,11,4
"East Birchmount Park, Ionview, Kennedy Park",0,0,0,0,0,0,2,0,1,1,4,1
"Guildwood, Morningside, West Hill",0,0,0,0,1,0,0,0,4,1,3,2


## * ------------------------------------------------------------------------------------------------------------------------- *

##  Predicting the best neighbourhood in Scarborough for Groceries Warehouse using KMeans Clustering

In [36]:
from sklearn.cluster import KMeans # import KMeans Clustering

In [37]:
kmean = KMeans(n_clusters=5, random_state=0).fit(scar_onehot)

In [38]:
# Make new Dataframe with Kmeans cluster centers
means = pd.DataFrame(kmean.cluster_centers_)
means

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.0,0.333333,0.0,0.0,0.333333,0.0,0.0,0.0,2.333333,0.333333,2.333333,1.333333
1,1.666667,0.333333,0.0,0.333333,0.0,0.0,1.333333,0.0,1.333333,0.333333,9.333333,3.0
2,2.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,2.0,21.0,0.0
3,0.714286,0.142857,0.428571,0.0,0.0,0.142857,0.571429,0.142857,0.571429,0.714286,4.285714,0.142857
4,1.5,0.0,0.0,0.0,0.0,0.0,0.5,1.5,2.0,1.0,12.5,0.5


In [39]:
# Inserting Columns
means.columns = scar_onehot.columns
means

Unnamed: 0,Bakery,Breakfast Spot,Diner,Fish Market,Food & Drink Shop,Fruit & Vegetable Store,Grocery Store,Noodle House,Pizza Place,Sandwich Place,Total Restaurants,Total Joints
0,0.0,0.333333,0.0,0.0,0.333333,0.0,0.0,0.0,2.333333,0.333333,2.333333,1.333333
1,1.666667,0.333333,0.0,0.333333,0.0,0.0,1.333333,0.0,1.333333,0.333333,9.333333,3.0
2,2.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,2.0,21.0,0.0
3,0.714286,0.142857,0.428571,0.0,0.0,0.142857,0.571429,0.142857,0.571429,0.714286,4.285714,0.142857
4,1.5,0.0,0.0,0.0,0.0,0.0,0.5,1.5,2.0,1.0,12.5,0.5


## Naming the Rows with Group numbers.
## Group number with highest value is the best neighbourhood to build warehouse.
Beacause Group number (G1 or G2 or G3 or G4 or G5) with highest 'Total Sum' value represents that there are more venues near to the neighbourhood as compare to other group numbers.

In [40]:
# Naming the groups

means.index = ['G0','G1','G2','G3','G4']

# Adding all the values and store it in 'Total Sum' column
means['Total Sum'] = means.sum(axis=1)
means

Unnamed: 0,Bakery,Breakfast Spot,Diner,Fish Market,Food & Drink Shop,Fruit & Vegetable Store,Grocery Store,Noodle House,Pizza Place,Sandwich Place,Total Restaurants,Total Joints,Total Sum
G0,0.0,0.333333,0.0,0.0,0.333333,0.0,0.0,0.0,2.333333,0.333333,2.333333,1.333333,7.0
G1,1.666667,0.333333,0.0,0.333333,0.0,0.0,1.333333,0.0,1.333333,0.333333,9.333333,3.0,17.666667
G2,2.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,2.0,21.0,0.0,30.0
G3,0.714286,0.142857,0.428571,0.0,0.0,0.142857,0.571429,0.142857,0.571429,0.714286,4.285714,0.142857,7.857143
G4,1.5,0.0,0.0,0.0,0.0,0.0,0.5,1.5,2.0,1.0,12.5,0.5,19.5


## Sorting the Dataframe according to the 'Total Sum' column.

In [41]:
means = means.sort_values(by='Total Sum', axis=0, ascending=False)
means

Unnamed: 0,Bakery,Breakfast Spot,Diner,Fish Market,Food & Drink Shop,Fruit & Vegetable Store,Grocery Store,Noodle House,Pizza Place,Sandwich Place,Total Restaurants,Total Joints,Total Sum
G2,2.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,2.0,21.0,0.0,30.0
G4,1.5,0.0,0.0,0.0,0.0,0.0,0.5,1.5,2.0,1.0,12.5,0.5,19.5
G1,1.666667,0.333333,0.0,0.333333,0.0,0.0,1.333333,0.0,1.333333,0.333333,9.333333,3.0,17.666667
G3,0.714286,0.142857,0.428571,0.0,0.0,0.142857,0.571429,0.142857,0.571429,0.714286,4.285714,0.142857,7.857143
G0,0.0,0.333333,0.0,0.0,0.333333,0.0,0.0,0.0,2.333333,0.333333,2.333333,1.333333,7.0


## Summary DataFrame

In [42]:
summary = pd.DataFrame([scar_onehot.index,kmean.labels_]).T

In [43]:
summary.columns=['Neighbourhood', 'Group']
print(kmean.labels_)
summary

[2 4 3 1 3 4 0 1 3 0 0 3 1 3 3 3]


Unnamed: 0,Neighbourhood,Group
0,Agincourt,2
1,"Agincourt North, L'Amoreaux East, Milliken, St...",4
2,"Birch Cliff, Cliffside West",3
3,Cedarbrae,1
4,"Clairlea, Golden Mile, Oakridge",3
5,"Clarks Corners, Sullivan, Tam O'Shanter",4
6,"Cliffcrest, Cliffside, Scarborough Village West",0
7,"Dorset Park, Scarborough Town Centre, Wexford ...",1
8,"East Birchmount Park, Ionview, Kennedy Park",3
9,"Guildwood, Morningside, West Hill",0


## The Best Result is : G2

In [52]:
best = summary[summary['Group'] == 2]['Neighbourhood'][0]
print('Best Neighbourhood for building Groceries Warehouse is : ',best)

Best Neighbourhood for building Groceries Warehouse is :  Agincourt


## Different Groups or Clusters : 

In [45]:
def group_result(group):
    a = group['Neighbourhood']
    g_data = pd.DataFrame(columns= ['Neighbourhood', 'Latitude', 'Longitude'])
    for i in a:
        v = scar_data[scar_data['Neighbourhood'] == i]
        v_neigh = v['Neighbourhood'].iloc[0]
        v_lat = v['Latitude'].iloc[0]
        v_lon =v['Longitude'].iloc[0]
        g_data = g_data.append({'Neighbourhood': v_neigh, 'Latitude': v_lat, 'Longitude': v_lon}, ignore_index=True)
    return g_data

## G2 (1st Best) : 'Green'

In [46]:
g2 = summary[summary['Group'] == 2]
g2_data = group_result(g2)
print('G2')
g2_data

G2


Unnamed: 0,Neighbourhood,Latitude,Longitude
0,Agincourt,43.7942,-79.262029


## G4 (2nd Best) : 'Blue'

In [47]:
g4 = summary[summary['Group'] == 4]
g4_data = group_result(g4)
print('G4')
g4_data

G4


Unnamed: 0,Neighbourhood,Latitude,Longitude
0,"Agincourt North, L'Amoreaux East, Milliken, St...",43.815252,-79.284577
1,"Clarks Corners, Sullivan, Tam O'Shanter",43.781638,-79.304302


## G1 (3rd Best) : 'Yellow'

In [48]:
g1 = summary[summary['Group'] == 1]
g1_data = group_result(g1)
print('G1')
g1_data

G1


Unnamed: 0,Neighbourhood,Latitude,Longitude
0,Cedarbrae,43.773136,-79.239476
1,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304
2,"Maryvale, Wexford",43.750072,-79.295849


## G3 (4th Best) : 'Black'

In [49]:
g3 = summary[summary['Group'] == 3]
g3_data = group_result(g3)
print('G3')
g3_data

G3


Unnamed: 0,Neighbourhood,Latitude,Longitude
0,"Birch Cliff, Cliffside West",43.692657,-79.264848
1,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
2,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
3,L'Amoreaux West,43.799525,-79.318389
4,"Rouge, Malvern",43.806686,-79.194353
5,Scarborough Village,43.744734,-79.239476
6,Woburn,43.770992,-79.216917


## G0 (5th Best) : 'Red'

In [50]:
g0 = summary[summary['Group'] == 0]
g0_data = group_result(g0)
print('G0')
g0_data

G0


Unnamed: 0,Neighbourhood,Latitude,Longitude
0,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
1,"Guildwood, Morningside, West Hill",43.763573,-79.188711
2,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497


## Visualize Map with all the neighbourhoods with different colors representing different clusters or groups
Locations for Groceries Warehouse highlighted with different colors on the Map.
#### 1st Best Location(s) : 'Green' color
#### 2nd Best Location(s) : 'Blue' color
#### 3rd Best Location(s) : 'Yellow' color
#### 4th Best Location(s) : 'Black' color
#### 5th Best Location(s) : 'Red' color

In [55]:
map_finale = folium.Map(location=[s_lat, s_lng], zoom_start=12)
for lat, lng, neigh in zip(g2_data['Latitude'],g2_data['Longitude'],g2_data['Neighbourhood']):
    label = 'G2, {}'.format(neigh)
    label = folium.Popup(label)
    folium.CircleMarker([lat,lng],
                       radius=5,
                       popup=label,
                       color='green',
                       fill=True,
                       fill_color='green',
                       fill_opacity=0.7).add_to(map_finale)
    
for lat, lng, neigh in zip(g4_data['Latitude'],g4_data['Longitude'],g4_data['Neighbourhood']):
    label = 'G4, {}'.format(neigh)
    label = folium.Popup(label)
    folium.CircleMarker([lat,lng],
                       radius=5,
                       popup=label,
                       color='blue',
                       fill=True,
                       fill_color='blue',
                       fill_opacity=0.7).add_to(map_finale)
    
for lat, lng, neigh in zip(g1_data['Latitude'],g1_data['Longitude'],g1_data['Neighbourhood']):
    label = 'G1, {}'.format(neigh)
    label = folium.Popup(label)
    folium.CircleMarker([lat,lng],
                       radius=5,
                       popup=label,
                       color='darkorange',
                       fill=True,
                       fill_color='darkorange',
                       fill_opacity=0.7).add_to(map_finale)
    
for lat, lng, neigh in zip(g3_data['Latitude'],g3_data['Longitude'],g3_data['Neighbourhood']):
    label = 'G3, {}'.format(neigh)
    label = folium.Popup(label)
    folium.CircleMarker([lat,lng],
                       radius=5,
                       popup=label,
                       color='black',
                       fill=True,
                       fill_color='black',
                       fill_opacity=0.7).add_to(map_finale)
    
for lat, lng, neigh in zip(g0_data['Latitude'],g0_data['Longitude'],g0_data['Neighbourhood']):
    label = 'G0, {}'.format(neigh)
    label = folium.Popup(label)
    folium.CircleMarker([lat,lng],
                       radius=5,
                       popup=label,
                       color='red',
                       fill=True,
                       fill_color='red',
                       fill_opacity=0.7).add_to(map_finale)
    
map_finale