# Initiate Dataframe for Clustering

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

# The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood
column_names = ['PostalCode', 'Borough', 'Neighborhood', 'Latitude', 'Longitude'] 
toronto_neighborhoods = pd.DataFrame(columns=column_names)

website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
from bs4 import BeautifulSoup
soup = BeautifulSoup(website_url,'lxml')
My_table = soup.find('table',{'class':'wikitable sortable'})

toronto_neighborhoods.drop(toronto_neighborhoods.index, inplace=True)
num = 0 
for row in My_table.findAll("tr"):
    cells = row.findAll("td")
    # Assign each column/cell of a given row to a variable 
    if len(cells) == 3:
        pc = cells[0].find(text=True)
        bo = cells[1].find(text=True)
        nb = cells[2].find(text=True)
        # Replace new line character with blank
        nb = nb.replace("\n","")
        
        # If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
        # if Neighborhood contains value of 'Not assigned', Assign value in Borough to Neighborhood 
        if nb.find('Not assigned',0,13) != -1:
            nb = bo
        
        # Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
        if bo != 'Not assigned':     
            #print( len(toronto_neighborhoods.Postal_code))
            # More than one neighborhood can exist in one postal code area and those rows has to be combined into one row with the neighborhoods separated with a comma.
            if num == 0:
                # First Successful Entry
                toronto_neighborhoods = toronto_neighborhoods.append({'PostalCode': pc, 'Borough': bo, 'Neighborhood': nb}, ignore_index=True)
                num += 1
            else:
                # If the Postal Code already exists in toronto neighborhood, add value to existing Neighborhood of Postal Code
                found = 0
                for index, row in toronto_neighborhoods.iterrows() :
                    if row['PostalCode'] == pc:
                        nb = row['Neighborhood'] + ',' + nb
                        found = 1
                        row['Neighborhood'] = nb
                # If the Postal Code is not found, add new value
                if found == 0:    
                    toronto_neighborhoods = toronto_neighborhoods.append({'PostalCode': pc, 'Borough': bo, 'Neighborhood': nb}, ignore_index=True)

latlon = pd.read_csv('http://cocl.us/Geospatial_data')
for lindex, llrow in latlon.iterrows() :
    for dindex, dfrow in toronto_neighborhoods.iterrows() :
        if dfrow['PostalCode'] == llrow['Postal Code']:
            dfrow['Latitude'] = llrow['Latitude']
            dfrow['Longitude'] = llrow['Longitude']
            

# Get Latitude and Longitude for Toronto

In [6]:
address = 'Toronto, Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))



The geograpical coordinate of Toronto are 43.653963, -79.387207.


## Create neighborhood map of Toronto

In [9]:
# create neighborhood map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_neighborhoods['Latitude'], toronto_neighborhoods['Longitude'], toronto_neighborhoods['Borough'], toronto_neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## Cluster neighborhoods of North York

In [23]:
northyork_data = toronto_neighborhoods[toronto_neighborhoods['Borough'] == 'North York'].reset_index(drop=True)
northyork_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7533,-79.3297
1,M4A,North York,Victoria Village,43.7259,-79.3156
2,M6A,North York,"Lawrence Heights,Lawrence Manor",43.7185,-79.4648
3,M3B,North York,Don Mills North,43.7459,-79.3522
4,M6B,North York,Glencairn,43.7096,-79.4451


In [24]:
address = 'North York, Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of North York are {}, {}.'.format(latitude, longitude))



The geograpical coordinate of North York are 43.7709163, -79.4124102.


In [27]:
# create map of Manhattan using latitude and longitude values
map_northyork = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(northyork_data['Latitude'], northyork_data['Longitude'], northyork_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_northyork)  
    
map_northyork

## Explore neighborhood in North York using Foursquare (Client Id and its Secret are not displayed)

In [26]:
northyork_data.loc[0, 'Neighborhood']

'Parkwoods'

In [28]:
neighborhood_latitude = northyork_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = northyork_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = northyork_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.


## Top Venues near Parkwoods

In [29]:
# type your answer here
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
results = requests.get(url).json()


In [30]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [31]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,KFC,Fast Food Restaurant,43.754387,-79.333021
2,TTC stop #8380,Bus Stop,43.752672,-79.326351
3,Variety Store,Food & Drink Shop,43.751974,-79.333114


In [32]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

4 venues were returned by Foursquare.
