# Capstone Project — The Battle of Neighborhoods (Week 2)

In [1]:
import pandas as pd
import numpy as np

In [3]:
# !conda install -c conda-forge folium=0.5.0 --yes
import folium

# !conda install -c conda-forge geopy
from geopy.geocoders import Nominatim

import json
from pandas.io.json import json_normalize
import requests
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

print('Libraries imported.')

Libraries imported.


## Introduction

Toronto, the most populous city in Canada, is an international center of business, finance, arts, and culture. Its economy is highly diversified with strengths, such as technology, financial services, education, art, and tourism. [1] In the city of Toronto, booksellers could enjoy being part of a community, encouraging the pleasure of reading in adults, and helping to make lifelong readers out of children. For someone who is looking to open a bookstore, it is vital to choose the neighborhood and retail location. The goal of this project is to figure out where a bookstore should be set up for success with data analysis.


##  Data Acquiring

In [4]:
url = 'https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=945633050.'

dfs = pd.read_html(url)

In [5]:
df = dfs[0]

## Data Cleaning

#### 1. drop each cell with a borough that is Not assigned.

In [6]:
df_tmp = df.loc[df['Borough']!= 'Not assigned']

#### 2. for each cell having a borough but a Not assigned neighborhood, change its neighborhood to borough.

In [7]:
for i in range(210):
    if df_tmp.iloc[i,2]=='Not assigned':
        df_tmp.iloc[i,2] = df_tmp.iloc[i,1]

#### 3. combine rows with same postal code. separate neighbourhoods with commas.

In [8]:
df_new = df_tmp

df_new = df_new.groupby(['Postcode', 'Borough']).apply(lambda x: ', '.join(x['Neighbourhood']))

df_new = df_new.to_frame()

df_new = df_new.reset_index()

df_new = df_new.rename(columns = {0: 'Neighbourhood'})

df_new.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [9]:
df_new.shape

(103, 3)

#### 4. obtain the data of latitude and the longitude coordinates in Toronto

In [10]:
url2 ='http://cocl.us/Geospatial_data'

df2 = pd.read_csv(url2)

df2 = df2.rename(columns = {'Postal Code': 'Postcode'})

#### 5. add the latitude and the longitude coordinates of each neighborhood

In [11]:
data = pd.merge(df_new, df2, how = 'left', on= 'Postcode')

#### 6. boroughs that contain the word Toronto

In [12]:
data= data[data['Borough'].str.contains("Toronto")]
data = data.rename(columns = {'Neighbourhood': 'Neighborhood'})
data.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


## Data Exploring

#### 1. Define Foursquare Credentials and Version

In [13]:
# Foursquare information
CLIENT_ID = 'SXXI11BGUK3P54RYICC3UADXYDR41ZDLBMIMXRUC0HDJVXME' 
CLIENT_SECRET = 'GVBTLDDBYAUI3VFBHMWQ4EVRXAIPH1O20BMRVZXFUZJJEQ0M' 
VERSION = '20200317'

#### 2. Explore Neighborhoods in Toronto

In [14]:
LIMIT = 100

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [15]:
toronto_venues = getNearbyVenues(names= data['Neighborhood'],
                                   latitudes= data['Latitude'],
                                   longitudes= data['Longitude']
                                  )
toronto_venues.head()

The Beaches
The Danforth West, Riverdale
The Beaches West, India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North, Forest Hill West
The Annex, North Midtown, Yorkville
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie
Dovercourt Village, Dufferin
Little Portugal, Trinity
Brockton, Exhibition Place, Parkdale Village
High Park, The Junction Sout

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"The Danforth West, Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant


In [16]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy Park,56,56,56,56,56,56
"Brockton, Exhibition Place, Parkdale Village",24,24,24,24,24,24
Business Reply Mail Processing Centre 969 Eastern,18,18,18,18,18,18
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",16,16,16,16,16,16
"Cabbagetown, St. James Town",46,46,46,46,46,46
Central Bay Street,83,83,83,83,83,83
"Chinatown, Grange Park, Kensington Market",87,87,87,87,87,87
Christie,18,18,18,18,18,18
Church and Wellesley,87,87,87,87,87,87


#### 3. Analyze Each Neighborhood

In [17]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
toronto_onehot.drop(['Neighborhood'],axis=1,inplace=True) 
toronto_onehot.insert(loc=0, column='Neighborhood', value=toronto_venues['Neighborhood'] )
toronto_onehot.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Train Station,Transportation Service,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## results

#### 1. Information of bookstores in Toronto

In [23]:
# sort the neighborhoods with the bookstores they have

toronto_bookstore = pd.merge(toronto_onehot[['Neighborhood', 'Bookstore']], toronto_venues[['Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude']], how = 'left', on = 'Neighborhood')

toronto_bookstore = toronto_bookstore.groupby(['Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude']).sum()

toronto_bookstore = toronto_bookstore.sort_values(by=['Bookstore'], ascending = False).reset_index()

toronto_bookstore.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Bookstore
0,"Adelaide, King, Richmond",43.650571,-79.384568,200
1,"Ryerson, Garden District",43.657162,-79.378937,200
2,St. James Town,43.651494,-79.375418,100
3,"First Canadian Place, Underground city",43.648429,-79.38228,100
4,"Design Exchange, Toronto Dominion Centre",43.647177,-79.381576,100


In [19]:
# Neighborhoods without a bookstore
NeighborhoodList = toronto_bookstore[toronto_bookstore['Bookstore'] == 0]
NeighborhoodList

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Bookstore
15,Christie,43.669542,-79.422564,0
16,"The Beaches West, India Bazaar",43.668999,-79.315572,0
17,The Beaches,43.676357,-79.293031,0
18,"The Annex, North Midtown, Yorkville",43.67271,-79.405678,0
19,"Brockton, Exhibition Place, Parkdale Village",43.636847,-79.428191,0
20,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558,0
21,"CN Tower, Bathurst Quay, Island airport, Harbo...",43.628947,-79.39442,0
22,Roselawn,43.711695,-79.416936,0
23,Rosedale,43.679563,-79.377529,0
24,Queen's Park,43.662301,-79.389494,0


In [20]:
# Neighborhoods with a bookstore
Neighborhoods = toronto_bookstore[toronto_bookstore['Bookstore'] != 0]
Neighborhoods

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Bookstore
0,"Adelaide, King, Richmond",43.650571,-79.384568,200
1,"Ryerson, Garden District",43.657162,-79.378937,200
2,St. James Town,43.651494,-79.375418,100
3,"First Canadian Place, Underground city",43.648429,-79.38228,100
4,"Design Exchange, Toronto Dominion Centre",43.647177,-79.381576,100
5,"Commerce Court, Victoria Hotel",43.648198,-79.379817,100
6,Stn A PO Boxes 25 The Esplanade,43.646435,-79.374846,95
7,Church and Wellesley,43.66586,-79.38316,87
8,Central Bay Street,43.657952,-79.387383,83
9,"The Danforth West, Riverdale",43.679557,-79.352188,82


#### 2.Visualize the result

In [21]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [22]:
# create map showing the neighborhoods without a bookstore
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lon, poi in zip(NeighborhoodList['Neighborhood Latitude'], NeighborhoodList['Neighborhood Longitude'], NeighborhoodList['Neighborhood']):
    label = folium.Popup(str(poi) , parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        fill=True,
        fill_opacity=0.7).add_to(map_clusters)
    
map_clusters    