# Step 1: Retrive info from internet and create a panda data frame including all needed data

In [5]:
# importing necessary libraries
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

# retrieve data from internet
wikipedia_link='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
raw_wikipedia_page= requests.get(wikipedia_link).text

# using beautiful soup to parse the HTML/XML codes.
soup = BeautifulSoup(raw_wikipedia_page,'lxml')
# print(soup.prettify())

In [6]:
# extracting the raw table inside that webpage
table = soup.find('table')

Postcode      = []
Borough       = []
Neighbourhood = []

# print(table)

# extracting a clean form of the table
for tr_cell in table.find_all('tr'):
    
    counter = 1
    Postcode_var      = -1
    Borough_var       = -1
    Neighbourhood_var = -1
    
    for td_cell in tr_cell.find_all('td'):
        if counter == 1: 
            Postcode_var = td_cell.text
        if counter == 2: 
            Borough_var = td_cell.text
            tag_a_Borough = td_cell.find('a')
            
        if counter == 3: 
            Neighbourhood_var = str(td_cell.text).strip()
            tag_a_Neighbourhood = td_cell.find('a')
            
        counter +=1
        
    if (Postcode_var == 'Not assigned' or Borough_var == 'Not assigned' or Neighbourhood_var == 'Not assigned'): 
        continue
    try:
        if ((tag_a_Borough is None) or (tag_a_Neighbourhood is None)):
            continue
    except:
        pass
    if(Postcode_var == -1 or Borough_var == -1 or Neighbourhood_var == -1):
        continue
        
    Postcode.append(Postcode_var)
    Borough.append(Borough_var)
    Neighbourhood.append(Neighbourhood_var)

In [7]:
unique_p = set(Postcode)

print('Number of unique Postal codes:', len(unique_p))

Postcode_u      = []
Borough_u       = []
Neighbourhood_u = []


for postcode_unique_element in unique_p:
    p_var = ''; b_var = ''; n_var = ''; 
    for postcode_idx, postcode_element in enumerate(Postcode):
        if postcode_unique_element == postcode_element:
            p_var = postcode_element;
            b_var = Borough[postcode_idx]
            if n_var == '': 
                n_var = Neighbourhood[postcode_idx]
            else:
                n_var = n_var + ', ' + Neighbourhood[postcode_idx]
    Postcode_u.append(p_var)
    Borough_u.append(b_var)
    Neighbourhood_u.append(n_var)

Number of unique Postal codes: 84


In [16]:
## get all geographical coordinates of the neighborhoods.
geo_data_df = pd.read_csv('https://cocl.us/Geospatial_data')
geo_data_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [19]:

toronto_dict = {'Postcode':Postcode_u, 'Borough':Borough_u, 'Neighbourhood':Neighbourhood_u}
toronto_base = pd.DataFrame.from_dict(toronto_dict)
# df_toronto.to_csv('toronto_part1.csv')
toronto_base.head()

Unnamed: 0,Borough,Neighbourhood,Postcode
0,Downtown Toronto,Ryerson,M5B
1,Scarborough,"Dorset Park, Scarborough Town Centre, Wexford ...",M1P
2,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Sou...",M5V
3,Etobicoke,"Islington, Princess Gardens, West Deane Park",M9B
4,West Toronto,Dovercourt Village,M6H


In [21]:
# Merge the coordinates info in Postalcode/Borough/Neighborhood dataframe.
df_toronto = pd.merge(toronto_base, geo_data_df,left_on='Postcode',right_on='Postal Code',how='left').drop('Postal Code', axis=1)
df_toronto.head()

Unnamed: 0,Borough,Neighbourhood,Postcode,Latitude,Longitude
0,Downtown Toronto,Ryerson,M5B,43.657162,-79.378937
1,Scarborough,"Dorset Park, Scarborough Town Centre, Wexford ...",M1P,43.75741,-79.273304
2,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Sou...",M5V,43.628947,-79.39442
3,Etobicoke,"Islington, Princess Gardens, West Deane Park",M9B,43.650943,-79.554724
4,West Toronto,Dovercourt Village,M6H,43.669005,-79.442259


# Step2:  Creat Toronto and Scarborough's map and find all venues info, Clusting

In [23]:
# importing new libraries
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    geographiclib: 1.49-py_0   conda-forge
    geopy:         1.18.1-py_0 conda-forge

geographiclib- 100% |################################| Time: 0:00:00  24.74 MB/s
geopy-1.18.1-p 100% |################################| Time: 0:00:00  36.82 MB/s
Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge

altair-2.2.2-p 100% |################################| Time: 0:00:00  55.16 MB/s
branca-0.3.1-p 100% |################################| Time: 0:00:00  34.15 MB/s
vincent-0.4.4- 100% |###################

In [25]:

# Create a map of toronto
# for the city Toronto, latitude and longtitude are manually extracted via google search
toronto_latitude = 43.6532; toronto_longitude = -79.3832
map_toronto = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    

map_toronto

In [26]:
## Define Foursquare Credentials and Version
CLIENT_ID = 'BTDVXGUZAWHIDRCOFFDJO1EH3L5HA3Y2BLE4BSWNDYVYLM03' # your Foursquare ID
CLIENT_SECRET = '4LGW4PEZ1HIXT3MNGK4ZDNNX53T4GP5G0CNVCJMRGHZ0BMIT' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: BTDVXGUZAWHIDRCOFFDJO1EH3L5HA3Y2BLE4BSWNDYVYLM03
CLIENT_SECRET:4LGW4PEZ1HIXT3MNGK4ZDNNX53T4GP5G0CNVCJMRGHZ0BMIT


In [31]:
# Create a new data frame with neighborhoods in Scarborough
scarborough_data = df_toronto[df_toronto['Borough'] == 'Scarborough'].reset_index(drop=True)
scarborough_data.head(10)

Unnamed: 0,Borough,Neighbourhood,Postcode,Latitude,Longitude
0,Scarborough,"Dorset Park, Scarborough Town Centre, Wexford ...",M1P,43.75741,-79.273304
1,Scarborough,"Agincourt North, Milliken",M1V,43.815252,-79.284577
2,Scarborough,Upper Rouge,M1X,43.836125,-79.205636
3,Scarborough,Woburn,M1G,43.770992,-79.216917
4,Scarborough,Steeles West,M1W,43.799525,-79.318389
5,Scarborough,"Clairlea, Golden Mile, Oakridge",M1L,43.711112,-79.284577
6,Scarborough,Cedarbrae,M1H,43.773136,-79.239476
7,Scarborough,Tam O'Shanter,M1T,43.781638,-79.304302
8,Scarborough,Agincourt,M1S,43.7942,-79.262029
9,Scarborough,"Cliffcrest, Cliffside",M1M,43.716316,-79.239476


In [29]:
# Create a map of Scarborough and its neighbourhoods
address_scar = 'Scarborough,Toronto'
latitude_scar = 43.773077
longitude_scar = -79.257774
print('The geograpical coordinate of Scarborough are {}, {}.'.format(latitude_scar, longitude_scar))

The geograpical coordinate of Scarborough are 43.773077, -79.257774.


In [100]:
map_scarb = folium.Map(location=[latitude_scar, longitude_scar], zoom_start=12)

# add markers to map
for lat, lng, label in zip(scarborough_data['Latitude'], scarborough_data['Longitude'], scarborough_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_scarb)  
    
map_scarb

In [32]:
neighborhood_latitude = scarborough_data.loc[4, 'Latitude'] # neighbourhood latitude value
neighborhood_longitude = scarborough_data.loc[4, 'Longitude'] # neighbourhood longitude value

neighborhood_name = scarborough_data.loc[4, 'Neighbourhood'] # neighbourhood name

print('Latitude and longitude values of "{}" are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of "Steeles West" are 43.799525200000005, -79.3183887.


In [37]:
LIMIT = 100
radius = 1000
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude_scar, longitude_scar, VERSION, radius, LIMIT)

results = requests.get(url).json()
# results

In [38]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [39]:
import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

venues = results['response']['groups'][0]['items']  
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head(10)

Unnamed: 0,name,categories,lat,lng
0,Disney Store,Toy / Game Store,43.775537,-79.256833
1,Canyon Creek Chophouse,Steakhouse,43.776959,-79.261694
2,DAVIDsTEA,Tea Room,43.776613,-79.258516
3,Tommy Hilfiger Company Store,Clothing Store,43.776015,-79.257369
4,American Eagle Outfitters,Clothing Store,43.775908,-79.258352
5,Chipotle Mexican Grill,Mexican Restaurant,43.77641,-79.258069
6,SEPHORA,Cosmetics Shop,43.775592,-79.258242
7,Coliseum Scarborough Cinemas,Movie Theater,43.775995,-79.255649
8,Shoppers Drug Mart,Pharmacy,43.772747,-79.251123
9,CANBE Foods Inc,Indian Restaurant,43.773546,-79.246082


In [40]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

63 venues were returned by Foursquare.


In [41]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [42]:
# Get venues for each neighborhood in Scarborough

scarborough_venues = getNearbyVenues(names=scarborough_data['Neighbourhood'],
                                   latitudes=scarborough_data['Latitude'],
                                   longitudes=scarborough_data['Longitude']
                                  )

Dorset Park, Scarborough Town Centre, Wexford Heights
Agincourt North, Milliken
Upper Rouge
Woburn
Steeles West
Clairlea, Golden Mile, Oakridge
Cedarbrae
Tam O'Shanter
Agincourt
Cliffcrest, Cliffside
Ionview, Kennedy Park
Rouge, Malvern
Maryvale, Wexford
Scarborough Village
Highland Creek, Rouge Hill, Port Union
Birch Cliff
Morningside, West Hill


In [47]:
scarborough_venues.head(10)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,Kairali,43.754768,-79.277199,Indian Restaurant
1,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,Kim Kim restaurant,43.753833,-79.276611,Chinese Restaurant
2,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,El Pulgarcito,43.75479,-79.277064,Latin American Restaurant
3,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,Karaikudi Chettinad South Indian Restaurant,43.756042,-79.276276,Indian Restaurant
4,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,Big Al's Pet Supercentre,43.759279,-79.278325,Pet Store
5,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,Pho Vietnam,43.75777,-79.278572,Vietnamese Restaurant
6,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,Mill St. Brewery,43.759472,-79.271121,Brewery
7,"Agincourt North, Milliken",43.815252,-79.284577,Port Royal Park,43.815477,-79.289773,Park
8,"Agincourt North, Milliken",43.815252,-79.284577,Jun Yue Foods,43.814737,-79.289824,Asian Restaurant
9,"Agincourt North, Milliken",43.815252,-79.284577,Milliken Public School Playground,43.815383,-79.289867,Playground


In [48]:
scarborough_venues.tail(10)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
75,Birch Cliff,43.692657,-79.264848,The Birchcliff,43.691666,-79.264532,Café
76,Birch Cliff,43.692657,-79.264848,Birchmount Community Centre,43.695175,-79.262161,General Entertainment
77,Birch Cliff,43.692657,-79.264848,Scarborough Gardens,43.694647,-79.26223,Skating Rink
78,Birch Cliff,43.692657,-79.264848,Birchmount Stadium,43.695323,-79.261293,College Stadium
79,"Morningside, West Hill",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
80,"Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
81,"Morningside, West Hill",43.763573,-79.188711,Big Bite Burrito,43.766299,-79.19072,Mexican Restaurant
82,"Morningside, West Hill",43.763573,-79.188711,Enterprise Rent-A-Car,43.764076,-79.193406,Rental Car Location
83,"Morningside, West Hill",43.763573,-79.188711,Woburn Medical Centre,43.766631,-79.192286,Medical Center
84,"Morningside, West Hill",43.763573,-79.188711,Eggsmart,43.7678,-79.190466,Breakfast Spot


In [50]:
scarborough_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
"Agincourt North, Milliken",3,3,3,3,3,3
Birch Cliff,4,4,4,4,4,4
Cedarbrae,7,7,7,7,7,7
"Clairlea, Golden Mile, Oakridge",10,10,10,10,10,10
"Cliffcrest, Cliffside",2,2,2,2,2,2
"Dorset Park, Scarborough Town Centre, Wexford Heights",7,7,7,7,7,7
"Highland Creek, Rouge Hill, Port Union",3,3,3,3,3,3
"Ionview, Kennedy Park",7,7,7,7,7,7
"Maryvale, Wexford",4,4,4,4,4,4


In [51]:
print('There are {} uniques categories.'.format(len(scarborough_venues['Venue Category'].unique())))

There are 55 uniques categories.


In [55]:
# one hot encoding
scarb_onehot = pd.get_dummies(scarborough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
scarb_onehot['Neighborhood'] = scarborough_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [scarb_onehot.columns[-1]] + list(scarb_onehot.columns[:-1])
scarb_onehot = scarb_onehot[fixed_columns]

scarb_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,Breakfast Spot,Brewery,Bus Line,Bus Station,Business Service,Café,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,College Stadium,Construction & Landscaping,Convenience Store,Department Store,Discount Store,Electronics Store,Fast Food Restaurant,Fried Chicken Joint,General Entertainment,Grocery Store,Hakka Restaurant,Hobby Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Lounge,Medical Center,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Motel,Moving Target,Noodle House,Park,Pet Store,Pharmacy,Pizza Place,Playground,Rental Car Location,Sandwich Place,Skating Rink,Smoke Shop,Soccer Field,Thai Restaurant,Train Station,Vietnamese Restaurant
0,"Dorset Park, Scarborough Town Centre, Wexford ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Dorset Park, Scarborough Town Centre, Wexford ...",0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Dorset Park, Scarborough Town Centre, Wexford ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Dorset Park, Scarborough Town Centre, Wexford ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Dorset Park, Scarborough Town Centre, Wexford ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


In [56]:
scarb_onehot.shape

(85, 56)

In [57]:
scarb_grouped = scarb_onehot.groupby('Neighborhood').mean().reset_index()
scarb_grouped.head(7)

Unnamed: 0,Neighborhood,American Restaurant,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,Breakfast Spot,Brewery,Bus Line,Bus Station,Business Service,Café,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,College Stadium,Construction & Landscaping,Convenience Store,Department Store,Discount Store,Electronics Store,Fast Food Restaurant,Fried Chicken Joint,General Entertainment,Grocery Store,Hakka Restaurant,Hobby Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Lounge,Medical Center,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Motel,Moving Target,Noodle House,Park,Pet Store,Pharmacy,Pizza Place,Playground,Rental Car Location,Sandwich Place,Skating Rink,Smoke Shop,Soccer Field,Thai Restaurant,Train Station,Vietnamese Restaurant
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0
1,"Agincourt North, Milliken",0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Birch Cliff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0
3,Cedarbrae,0.0,0.0,0.142857,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0
4,"Clairlea, Golden Mile, Oakridge",0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0
5,"Cliffcrest, Cliffside",0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Dorset Park, Scarborough Town Centre, Wexford ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857


In [58]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [59]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = scarb_grouped['Neighborhood']

for ind in np.arange(scarb_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(scarb_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Skating Rink,Breakfast Spot,Lounge,Clothing Store,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store
1,"Agincourt North, Milliken",Asian Restaurant,Playground,Park,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store,Department Store
2,Birch Cliff,General Entertainment,Skating Rink,College Stadium,Café,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store
3,Cedarbrae,Caribbean Restaurant,Thai Restaurant,Athletics & Sports,Bakery,Bank,Fried Chicken Joint,Hakka Restaurant,Coffee Shop,General Entertainment,Fast Food Restaurant
4,"Clairlea, Golden Mile, Oakridge",Bakery,Bus Line,Metro Station,Bus Station,Fast Food Restaurant,Soccer Field,Intersection,Park,Coffee Shop,College Stadium
5,"Cliffcrest, Cliffside",American Restaurant,Motel,Hakka Restaurant,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store,Department Store,Convenience Store
6,"Dorset Park, Scarborough Town Centre, Wexford ...",Indian Restaurant,Vietnamese Restaurant,Brewery,Latin American Restaurant,Pet Store,Chinese Restaurant,Clothing Store,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
7,"Highland Creek, Rouge Hill, Port Union",Moving Target,Bar,Construction & Landscaping,Chinese Restaurant,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store,Department Store
8,"Ionview, Kennedy Park",Hobby Shop,Bus Station,Discount Store,Department Store,Convenience Store,Coffee Shop,Train Station,Breakfast Spot,Construction & Landscaping,Grocery Store
9,"Maryvale, Wexford",Middle Eastern Restaurant,Smoke Shop,Bakery,Breakfast Spot,Vietnamese Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store,Department Store


In [60]:

# import k-means from clustering stage
from sklearn.cluster import KMeans

scarb_data = scarborough_data.drop(16)
# set number of clusters
kclusters = 5

scarb_grouped_clustering = scarb_grouped.drop('Neighborhood', 1)


# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(scarb_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 
#len(kmeans.labels_)#=16
#scarborough_data.shape

array([1, 1, 1, 1, 1, 4, 1, 3, 1, 1], dtype=int32)

In [61]:

scarb_merged = scarb_data

# add clustering labels
scarb_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
scarb_merged = scarb_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

scarb_merged

Unnamed: 0,Borough,Neighbourhood,Postcode,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,"Dorset Park, Scarborough Town Centre, Wexford ...",M1P,43.75741,-79.273304,1,Indian Restaurant,Vietnamese Restaurant,Brewery,Latin American Restaurant,Pet Store,Chinese Restaurant,Clothing Store,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
1,Scarborough,"Agincourt North, Milliken",M1V,43.815252,-79.284577,1,Asian Restaurant,Playground,Park,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store,Department Store
2,Scarborough,Upper Rouge,M1X,43.836125,-79.205636,1,,,,,,,,,,
3,Scarborough,Woburn,M1G,43.770992,-79.216917,1,Coffee Shop,Korean Restaurant,Vietnamese Restaurant,Hakka Restaurant,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store,Department Store
4,Scarborough,Steeles West,M1W,43.799525,-79.318389,1,Grocery Store,Chinese Restaurant,Fast Food Restaurant,Coffee Shop,Japanese Restaurant,Pharmacy,Pizza Place,Breakfast Spot,Sandwich Place,Discount Store
5,Scarborough,"Clairlea, Golden Mile, Oakridge",M1L,43.711112,-79.284577,4,Bakery,Bus Line,Metro Station,Bus Station,Fast Food Restaurant,Soccer Field,Intersection,Park,Coffee Shop,College Stadium
6,Scarborough,Cedarbrae,M1H,43.773136,-79.239476,1,Caribbean Restaurant,Thai Restaurant,Athletics & Sports,Bakery,Bank,Fried Chicken Joint,Hakka Restaurant,Coffee Shop,General Entertainment,Fast Food Restaurant
7,Scarborough,Tam O'Shanter,M1T,43.781638,-79.304302,3,Pizza Place,Fried Chicken Joint,Italian Restaurant,Noodle House,Pharmacy,Chinese Restaurant,Fast Food Restaurant,Thai Restaurant,Department Store,Discount Store
8,Scarborough,Agincourt,M1S,43.7942,-79.262029,1,Skating Rink,Breakfast Spot,Lounge,Clothing Store,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store
9,Scarborough,"Cliffcrest, Cliffside",M1M,43.716316,-79.239476,1,American Restaurant,Motel,Hakka Restaurant,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store,Department Store,Convenience Store


In [62]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location = [latitude_scar, longitude_scar], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(scarb_merged['Latitude'], scarb_merged['Longitude'], scarb_merged['Neighbourhood'], scarb_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [63]:
scarb_merged.loc[scarb_merged['Cluster Labels'] == 0, scarb_merged.columns[[1] + list(range(5, scarb_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,"Rouge, Malvern",0,Fast Food Restaurant,Vietnamese Restaurant,Chinese Restaurant,General Entertainment,Fried Chicken Joint,Electronics Store,Discount Store,Department Store,Convenience Store,Construction & Landscaping


In [64]:
scarb_merged.loc[scarb_merged['Cluster Labels'] == 1, scarb_merged.columns[[1] + list(range(5, scarb_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Dorset Park, Scarborough Town Centre, Wexford ...",1,Indian Restaurant,Vietnamese Restaurant,Brewery,Latin American Restaurant,Pet Store,Chinese Restaurant,Clothing Store,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
1,"Agincourt North, Milliken",1,Asian Restaurant,Playground,Park,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store,Department Store
2,Upper Rouge,1,,,,,,,,,,
3,Woburn,1,Coffee Shop,Korean Restaurant,Vietnamese Restaurant,Hakka Restaurant,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store,Department Store
4,Steeles West,1,Grocery Store,Chinese Restaurant,Fast Food Restaurant,Coffee Shop,Japanese Restaurant,Pharmacy,Pizza Place,Breakfast Spot,Sandwich Place,Discount Store
6,Cedarbrae,1,Caribbean Restaurant,Thai Restaurant,Athletics & Sports,Bakery,Bank,Fried Chicken Joint,Hakka Restaurant,Coffee Shop,General Entertainment,Fast Food Restaurant
8,Agincourt,1,Skating Rink,Breakfast Spot,Lounge,Clothing Store,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store
9,"Cliffcrest, Cliffside",1,American Restaurant,Motel,Hakka Restaurant,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store,Department Store,Convenience Store
10,"Ionview, Kennedy Park",1,Hobby Shop,Bus Station,Discount Store,Department Store,Convenience Store,Coffee Shop,Train Station,Breakfast Spot,Construction & Landscaping,Grocery Store
12,"Maryvale, Wexford",1,Middle Eastern Restaurant,Smoke Shop,Bakery,Breakfast Spot,Vietnamese Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store,Department Store


In [65]:
scarb_merged.loc[scarb_merged['Cluster Labels'] == 2, scarb_merged.columns[[1] + list(range(5, scarb_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,Birch Cliff,2,General Entertainment,Skating Rink,College Stadium,Café,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store


In [66]:
scarb_merged.loc[scarb_merged['Cluster Labels'] == 3, scarb_merged.columns[[1] + list(range(5, scarb_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Tam O'Shanter,3,Pizza Place,Fried Chicken Joint,Italian Restaurant,Noodle House,Pharmacy,Chinese Restaurant,Fast Food Restaurant,Thai Restaurant,Department Store,Discount Store


In [67]:
scarb_merged.loc[scarb_merged['Cluster Labels'] == 4, scarb_merged.columns[[1] + list(range(5, scarb_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,"Clairlea, Golden Mile, Oakridge",4,Bakery,Bus Line,Metro Station,Bus Station,Fast Food Restaurant,Soccer Field,Intersection,Park,Coffee Shop,College Stadium


In [68]:
def foursquare_crawler (postal_code_list, neighborhood_list, lat_list, lng_list, LIMIT = 500, radius = 1000):
    result_ds = []
    counter = 0
    for postal_code, neighborhood, lat, lng in zip(postal_code_list, neighborhood_list, lat_list, lng_list):
         
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, CLIENT_SECRET, VERSION, 
            lat, lng, radius, LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        tmp_dict = {}
        tmp_dict['Postal Code'] = postal_code; tmp_dict['Neighborhood(s)'] = neighborhood; 
        tmp_dict['Latitude'] = lat; tmp_dict['Longitude'] = lng;
        tmp_dict['Crawling_result'] = results;
        result_ds.append(tmp_dict)
        counter += 1
        print('{}.'.format(counter))
        print('Data is Obtained, for the Postal Code {} (and Neighborhoods {}) SUCCESSFULLY.'.format(postal_code, neighborhood))
    return result_ds;

In [69]:
print('Crawling different neighborhoods inside "Scarborough"')
Scarborough_foursquare_dataset = foursquare_crawler(list(scarborough_data['Postcode']),
                                                   list(scarborough_data['Neighbourhood']),
                                                   list(scarborough_data['Latitude']),
                                                   list(scarborough_data['Longitude']),)

Crawling different neighborhoods inside "Scarborough"
1.
Data is Obtained, for the Postal Code M1P (and Neighborhoods Dorset Park, Scarborough Town Centre, Wexford Heights) SUCCESSFULLY.
2.
Data is Obtained, for the Postal Code M1V (and Neighborhoods Agincourt North, Milliken) SUCCESSFULLY.
3.
Data is Obtained, for the Postal Code M1X (and Neighborhoods Upper Rouge) SUCCESSFULLY.
4.
Data is Obtained, for the Postal Code M1G (and Neighborhoods Woburn) SUCCESSFULLY.
5.
Data is Obtained, for the Postal Code M1W (and Neighborhoods Steeles West) SUCCESSFULLY.
6.
Data is Obtained, for the Postal Code M1L (and Neighborhoods Clairlea, Golden Mile, Oakridge) SUCCESSFULLY.
7.
Data is Obtained, for the Postal Code M1H (and Neighborhoods Cedarbrae) SUCCESSFULLY.
8.
Data is Obtained, for the Postal Code M1T (and Neighborhoods Tam O'Shanter) SUCCESSFULLY.
9.
Data is Obtained, for the Postal Code M1S (and Neighborhoods Agincourt) SUCCESSFULLY.
10.
Data is Obtained, for the Postal Code M1M (and Neighb

In [70]:

import pickle
with open("Scarborough_foursquare_dataset.txt", "wb") as fp:   #Pickling
    pickle.dump(Scarborough_foursquare_dataset, fp)
print('Received Data from Internet is Saved to Computer.')


Received Data from Internet is Saved to Computer.


In [72]:
with open("Scarborough_foursquare_dataset.txt", "rb") as fp:   # Unpickling
    Scarborough_foursquare_dataset = pickle.load(fp)
# print(type(Scarborough_foursquare_dataset))
# Scarborough_foursquare_dataset

In [73]:
# This function is created to connect to the saved list which is the received database. It will extract each venue 
# for every neighborhood inside the database

def get_venue_dataset(foursquare_dataset):
    result_df = pd.DataFrame(columns = ['Postal Code', 'Neighborhood', 
                                           'Neighborhood Latitude', 'Neighborhood Longitude',
                                          'Venue', 'Venue Summary', 'Venue Category', 'Distance'])
    # print(result_df)
    
    for neigh_dict in foursquare_dataset:
        postal_code = neigh_dict['Postal Code']; neigh = neigh_dict['Neighborhood(s)']
        lat = neigh_dict['Latitude']; lng = neigh_dict['Longitude']
        print('Number of Venuse in Coordination "{}" Posal Code and "{}" Negihborhood(s) is:'.format(postal_code, neigh))
        print(len(neigh_dict['Crawling_result']))
        
        for venue_dict in neigh_dict['Crawling_result']:
            summary = venue_dict['reasons']['items'][0]['summary']
            name = venue_dict['venue']['name']
            dist = venue_dict['venue']['location']['distance']
            cat =  venue_dict['venue']['categories'][0]['name']
            
            
            # print({'Postal Code': postal_code, 'Neighborhood': neigh, 
            #                   'Neighborhood Latitude': lat, 'Neighborhood Longitude':lng,
            #                   'Venue': name, 'Venue Summary': summary, 
            #                   'Venue Category': cat, 'Distance': dist})
            
            result_df = result_df.append({'Postal Code': postal_code, 'Neighborhood': neigh, 
                              'Neighborhood Latitude': lat, 'Neighborhood Longitude':lng,
                              'Venue': name, 'Venue Summary': summary, 
                              'Venue Category': cat, 'Distance': dist}, ignore_index = True)
            # print(result_df)
    
    return(result_df)

In [74]:
scarborough_venues = get_venue_dataset(Scarborough_foursquare_dataset)

Number of Venuse in Coordination "M1P" Posal Code and "Dorset Park, Scarborough Town Centre, Wexford Heights" Negihborhood(s) is:
45
Number of Venuse in Coordination "M1V" Posal Code and "Agincourt North, Milliken" Negihborhood(s) is:
28
Number of Venuse in Coordination "M1X" Posal Code and "Upper Rouge" Negihborhood(s) is:
0
Number of Venuse in Coordination "M1G" Posal Code and "Woburn" Negihborhood(s) is:
8
Number of Venuse in Coordination "M1W" Posal Code and "Steeles West" Negihborhood(s) is:
26
Number of Venuse in Coordination "M1L" Posal Code and "Clairlea, Golden Mile, Oakridge" Negihborhood(s) is:
28
Number of Venuse in Coordination "M1H" Posal Code and "Cedarbrae" Negihborhood(s) is:
30
Number of Venuse in Coordination "M1T" Posal Code and "Tam O'Shanter" Negihborhood(s) is:
30
Number of Venuse in Coordination "M1S" Posal Code and "Agincourt" Negihborhood(s) is:
48
Number of Venuse in Coordination "M1M" Posal Code and "Cliffcrest, Cliffside" Negihborhood(s) is:
13
Number of Ve

In [75]:
scarborough_venues.head()

Unnamed: 0,Postal Code,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Venue Category,Distance
0,M1P,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,Kim Kim restaurant,This spot is popular,Chinese Restaurant,478
1,M1P,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,Kairali,This spot is popular,Indian Restaurant,429
2,M1P,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,Starbucks,This spot is popular,Coffee Shop,600
3,M1P,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,Harry's Drive-In,This spot is popular,Burger Joint,965
4,M1P,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,Spicy Dragon,This spot is popular,Asian Restaurant,922


In [76]:
scarborough_venues.to_csv('scarborough_venues.csv')
scarborough_venues = pd.read_csv('scarborough_venues.csv')

neigh_list = list(scarborough_venues['Neighborhood'].unique())
print('Number of Neighborhoods inside Scarborough:')
print(len(neigh_list))
print('List of Neighborhoods inside Scarborough:')
neigh_list

Number of Neighborhoods inside Scarborough:
16
List of Neighborhoods inside Scarborough:


['Dorset Park, Scarborough Town Centre, Wexford Heights',
 'Agincourt North, Milliken',
 'Woburn',
 'Steeles West',
 'Clairlea, Golden Mile, Oakridge',
 'Cedarbrae',
 "Tam O'Shanter",
 'Agincourt',
 'Cliffcrest, Cliffside',
 'Ionview, Kennedy Park',
 'Rouge, Malvern',
 'Maryvale, Wexford',
 'Scarborough Village',
 'Highland Creek, Rouge Hill, Port Union',
 'Birch Cliff',
 'Morningside, West Hill']

In [77]:
print('There are {} uniques categories.'.format(len(scarborough_venues['Venue Category'].unique())))

print('Here is the list of different categories:')
list(scarborough_venues['Venue Category'].unique())

There are 115 uniques categories.
Here is the list of different categories:


['Chinese Restaurant',
 'Indian Restaurant',
 'Coffee Shop',
 'Burger Joint',
 'Asian Restaurant',
 'Electronics Store',
 'Latin American Restaurant',
 'Bakery',
 'Gym / Fitness Center',
 'Pet Store',
 'Italian Restaurant',
 'Vietnamese Restaurant',
 'Pharmacy',
 'Grocery Store',
 'Bank',
 'Beer Store',
 'Sandwich Place',
 'Fast Food Restaurant',
 'Fried Chicken Joint',
 'Brewery',
 'Wings Joint',
 'Pizza Place',
 'Wine Shop',
 'Bowling Alley',
 'Furniture / Home Store',
 'American Restaurant',
 'Event Service',
 'Light Rail Station',
 'Plaza',
 'Intersection',
 'Restaurant',
 'Automotive Shop',
 'Dessert Shop',
 'Caribbean Restaurant',
 'Noodle House',
 'Vegetarian / Vegan Restaurant',
 'Event Space',
 'Korean Restaurant',
 'Malay Restaurant',
 'Hobby Shop',
 'Park',
 'Bubble Tea Shop',
 'Gym',
 'Shop & Service',
 'Shopping Mall',
 'Hong Kong Restaurant',
 'Hotpot Restaurant',
 'Japanese Restaurant',
 'Breakfast Spot',
 'Discount Store',
 'Other Great Outdoors',
 'Auto Garage',
 'Tenn

In [78]:
# one hot encoding
scarborough_onehot = pd.get_dummies(data = scarborough_venues, drop_first  = False, 
                              prefix = "", prefix_sep = "", columns = ['Venue Category'])
scarborough_onehot.head()

Unnamed: 0.1,Unnamed: 0,Postal Code,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Distance,American Restaurant,Asian Restaurant,Athletics & Sports,Auto Garage,Automotive Shop,Badminton Court,Bakery,Bank,Bar,Beach,Beer Store,Bowling Alley,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Bus Line,Bus Station,Bus Stop,Café,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,College Stadium,Convenience Store,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Electronics Store,Event Service,Event Space,Fast Food Restaurant,Filipino Restaurant,Fish Market,Flea Market,Food & Drink Shop,Fried Chicken Joint,Fruit & Vegetable Store,Furniture / Home Store,General Entertainment,German Restaurant,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hakka Restaurant,Hobby Shop,Hong Kong Restaurant,Hookah Bar,Hotpot Restaurant,Indian Chinese Restaurant,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Light Rail Station,Liquor Store,Lounge,Malay Restaurant,Martial Arts Dojo,Mediterranean Restaurant,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Motorcycle Shop,Music Store,Noodle House,Other Great Outdoors,Paper / Office Supplies Store,Park,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pool,Pool Hall,Print Shop,Pub,Rental Car Location,Restaurant,Sandwich Place,Seafood Restaurant,Shanghai Restaurant,Shop & Service,Shopping Mall,Skating Rink,Smoke Shop,Smoothie Shop,Soccer Field,Spa,Sporting Goods Shop,Sports Bar,Sri Lankan Restaurant,Supermarket,Sushi Restaurant,Taiwanese Restaurant,Tennis Court,Thai Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Shop,Wings Joint,Yoga Studio
0,0,M1P,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,Kim Kim restaurant,This spot is popular,478,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,M1P,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,Kairali,This spot is popular,429,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2,M1P,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,Starbucks,This spot is popular,600,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,3,M1P,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,Harry's Drive-In,This spot is popular,965,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,4,M1P,"Dorset Park, Scarborough Town Centre, Wexford ...",43.75741,-79.273304,Spicy Dragon,This spot is popular,922,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [81]:
# This list is created manually 
important_list_of_features = [
 
 'Neighborhood',
 'Neighborhood Latitude',
 'Neighborhood Longitude',

 'American Restaurant',
 'Asian Restaurant',
 
 'Bakery',
 
 'Breakfast Spot',

 'Burger Joint',
 
  
 'Cajun / Creole Restaurant',
 'Cantonese Restaurant',
 'Caribbean Restaurant',
 'Chinese Restaurant',

 'Fast Food Restaurant',
 'Filipino Restaurant',
 'Fish Market',
 'Food & Drink Shop',
 'Fried Chicken Joint',

 'Greek Restaurant',
 
 'Hakka Restaurant',
 
 'Hong Kong Restaurant',

 'Hotpot Restaurant',
 
 'Indian Restaurant',

 'Italian Restaurant',
 'Japanese Restaurant',
 'Korean Restaurant',
 'Latin American Restaurant',

 'Malay Restaurant',
 
 'Mediterranean Restaurant',
 
 'Mexican Restaurant',
 'Middle Eastern Restaurant',
 
 'Noodle House',
 
 'Pizza Place',
 
 'Restaurant',
 'Sandwich Place',
 'Seafood Restaurant',
 'Shanghai Restaurant',
    
 'Sri Lankan Restaurant',
 'Sushi Restaurant',
 'Taiwanese Restaurant',
 
 'Thai Restaurant',
 
 'Vietnamese Restaurant',
 'Wings Joint']

In [82]:
scarborough_onehot = scarborough_onehot[important_list_of_features].drop(
    columns = ['Neighborhood Latitude', 'Neighborhood Longitude']).groupby(
    'Neighborhood').sum()


scarborough_onehot.head()

Unnamed: 0_level_0,American Restaurant,Asian Restaurant,Bakery,Breakfast Spot,Burger Joint,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Fast Food Restaurant,Filipino Restaurant,Fish Market,Food & Drink Shop,Fried Chicken Joint,Greek Restaurant,Hakka Restaurant,Hong Kong Restaurant,Hotpot Restaurant,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Noodle House,Pizza Place,Restaurant,Sandwich Place,Seafood Restaurant,Shanghai Restaurant,Sri Lankan Restaurant,Sushi Restaurant,Taiwanese Restaurant,Thai Restaurant,Vietnamese Restaurant,Wings Joint
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1
Agincourt,1,1,2,1,0,0,1,2,6,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,0,1,1,2,2,1,1,1,1,0,0,1,0
"Agincourt North, Milliken",0,0,1,0,0,0,0,1,6,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0
Birch Cliff,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0
Cedarbrae,0,0,4,0,1,0,0,1,1,1,0,0,0,1,0,1,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1
"Clairlea, Golden Mile, Oakridge",0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0


In [83]:
feat_name_list = list(scarborough_onehot.columns)
restaurant_list = []


for counter, value in enumerate(feat_name_list):
    if value.find('Restaurant') != (-1):
        restaurant_list.append(value)
        
scarborough_onehot['Total Restaurants'] = scarborough_onehot[restaurant_list].sum(axis = 1)
scarborough_onehot = scarborough_onehot.drop(columns = restaurant_list)


feat_name_list = list(scarborough_onehot.columns)
joint_list = []


for counter, value in enumerate(feat_name_list):
    if value.find('Joint') != (-1):
        joint_list.append(value)
        
scarborough_onehot['Total Joints'] = scarborough_onehot[joint_list].sum(axis = 1)
scarborough_onehot = scarborough_onehot.drop(columns = joint_list)

In [84]:
scarborough_onehot

Unnamed: 0_level_0,Bakery,Breakfast Spot,Fish Market,Food & Drink Shop,Noodle House,Pizza Place,Sandwich Place,Total Restaurants,Total Joints
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Agincourt,2,1,0,0,1,1,2,22,0
"Agincourt North, Milliken",1,0,0,0,2,2,0,11,0
Birch Cliff,0,0,0,0,0,0,0,3,0
Cedarbrae,4,0,0,0,0,1,0,7,3
"Clairlea, Golden Mile, Oakridge",2,0,0,0,0,1,1,2,0
"Cliffcrest, Cliffside",0,0,0,0,0,3,0,3,2
"Dorset Park, Scarborough Town Centre, Wexford Heights",2,0,0,0,0,1,1,13,4
"Highland Creek, Rouge Hill, Port Union",0,1,0,0,0,0,0,1,1
"Ionview, Kennedy Park",0,0,0,0,0,2,1,5,1
"Maryvale, Wexford",1,1,1,0,0,2,0,9,1


In [85]:

# import k-means from clustering stage
from sklearn.cluster import KMeans

# run k-means clustering
kmeans = KMeans(n_clusters = 5, random_state = 0).fit(scarborough_onehot)

In [86]:
means_df = pd.DataFrame(kmeans.cluster_centers_)
means_df.columns = scarborough_onehot.columns
means_df.index = ['G1','G2','G3','G4','G5']
means_df['Total Sum'] = means_df.sum(axis = 1)
means_df.sort_values(axis = 0, by = ['Total Sum'], ascending=False)

Unnamed: 0,Bakery,Breakfast Spot,Fish Market,Food & Drink Shop,Noodle House,Pizza Place,Sandwich Place,Total Restaurants,Total Joints,Total Sum
G3,2.0,1.0,0.0,0.0,1.0,1.0,2.0,22.0,0.0,29.0
G1,1.0,0.0,0.0,0.0,1.0,1.666667,1.0,12.333333,1.666667,18.666667
G4,2.333333,0.666667,0.333333,0.0,0.0,1.333333,0.333333,8.0,1.333333,14.333333
G2,0.2,0.0,0.0,0.2,0.0,1.8,0.8,4.4,1.0,8.4
G5,0.5,0.25,0.0,0.0,0.0,0.25,0.25,2.25,0.25,3.75


## Result: best group is G3

In [89]:
neigh_summary = pd.DataFrame([scarb_grouped.Neighborhood, 1 + kmeans.labels_]).T
neigh_summary.columns = ['Neighborhood', 'Group']
neigh_summary

Unnamed: 0,Neighborhood,Group
0,Agincourt,3
1,"Agincourt North, Milliken",1
2,Birch Cliff,5
3,Cedarbrae,4
4,"Clairlea, Golden Mile, Oakridge",5
5,"Cliffcrest, Cliffside",2
6,"Dorset Park, Scarborough Town Centre, Wexford ...",1
7,"Highland Creek, Rouge Hill, Port Union",5
8,"Ionview, Kennedy Park",2
9,"Maryvale, Wexford",4


# Final results:

## Best Neighborhood Are...¶

In [94]:
neigh_summary[neigh_summary['Group'] == 3]

Unnamed: 0,Neighborhood,Group
0,Agincourt,3


In [99]:
name_of_neigh = list(neigh_summary[neigh_summary['Group'] == 3]['Neighborhood'])[0]
scarborough_venues[scarborough_venues['Neighborhood'] == name_of_neigh].iloc[0,1:5].to_dict()

{'Neighborhood': 'Agincourt',
 'Neighborhood Latitude': 43.7942003,
 'Neighborhood Longitude': -79.262029400000017,
 'Postal Code': 'M1S'}

# second best Neighborhoods:

In [95]:
neigh_summary[neigh_summary['Group'] == 1]

Unnamed: 0,Neighborhood,Group
1,"Agincourt North, Milliken",1
6,"Dorset Park, Scarborough Town Centre, Wexford ...",1
14,Tam O'Shanter,1


In [97]:
name_of_neigh = list(neigh_summary[neigh_summary['Group'] == 1]['Neighborhood'])[0]
scarborough_venues[scarborough_venues['Neighborhood'] == name_of_neigh].iloc[0,1:5].to_dict()

{'Neighborhood': 'Agincourt North, Milliken',
 'Neighborhood Latitude': 43.815252200000003,
 'Neighborhood Longitude': -79.284577200000001,
 'Postal Code': 'M1V'}

## Third Best Neighborhood¶


In [96]:
neigh_summary[neigh_summary['Group'] == 4]

Unnamed: 0,Neighborhood,Group
3,Cedarbrae,4
9,"Maryvale, Wexford",4
13,Steeles West,4


In [98]:
name_of_neigh = list(neigh_summary[neigh_summary['Group'] == 4]['Neighborhood'])[0]
scarborough_venues[scarborough_venues['Neighborhood'] == name_of_neigh].iloc[0,1:5].to_dict()

{'Neighborhood': 'Cedarbrae',
 'Neighborhood Latitude': 43.773136000000001,
 'Neighborhood Longitude': -79.239476099999976,
 'Postal Code': 'M1H'}