# Data Science Capstone (Toronto Neighborhoods)

## Part 3
### Retrieve data from Foursquare
In this part, we are going to use data available in Foursquare to identify venues near each neighborhood

### First, lets plot neighborhoods in a map

In [537]:
lat, long = df_2['Latitude'][0], df_2['Longitude'][0]

map_toronto = folium.Map(location=[lat, long], zoom_start=10)

for lat, long, label in zip(df_2['Latitude'], df_2['Longitude'], df_2['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)

map_toronto

### Next, lets use Foursquare API and retrieve information about our first neighborhood

In [372]:
CLIENT_ID = '1XCUXXCEPR21OVC1PCPBR1OPI03GBMZZ4UQJ4Q4LFDQBHLDV'
CLIENT_SECRET = 'H4KQLAQLER5XYNLCE4FE3QMRTXNAYG5ZYCL5I3DAUXCRVR4E'
VERSION = '20180605'

In [373]:
neighborhood, lat, long = df_2.loc[0, 'Neighborhood'], df_2.loc[0, 'Latitude'], df_2.loc[0, 'Longitude']
print(neighborhood, lat, long)

Parkwoods 43.7532586 -79.3296565


In [516]:
radius=650
LIMIT=100
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, lat, long, VERSION, radius, LIMIT)
results = requests.get(url).json()
# print(results)

In [407]:
# venues = results['response']['groups'][0]['items']
# venues

In [517]:
def get_category_name(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [518]:
nearby_venues = pd.json_normalize(venues)
# print(nearby_venues.columns)

filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng', 'venue.location.distance']
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_name, 1)
nearby_venues.loc[:, filtered_columns]

Unnamed: 0,venue.name,venue.categories,venue.location.lat,venue.location.lng,venue.location.distance
0,Brookbanks Park,Park,43.751976,-79.33214,245
1,PetSmart,Pet Store,43.748639,-79.333488,599
2,Variety Store,Food & Drink Shop,43.751974,-79.333114,312
3,Ranchdale Park,Park,43.751388,-79.322138,639
4,Joey,Burger Joint,43.753441,-79.32164,644


### Get venues for each neighborhood

In [523]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [524]:
toronto_venues = getNearbyVenues(names=df_2['Neighborhood'],
                                   latitudes=df_2['Latitude'],
                                   longitudes=df_2['Longitude']
                                  )

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmount Park
Bayview Village
Downsview
The Danforth West, Ri

### Show the number of venues by neighborhood

In [525]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,44,44,44,44,44,44
"Alderwood, Long Branch",27,27,27,27,27,27
"Bathurst Manor, Wilson Heights, Downsview North",30,30,30,30,30,30
Bayview Village,16,16,16,16,16,16
"Bedford Park, Lawrence Manor East",40,40,40,40,40,40
...,...,...,...,...,...,...
"Willowdale, Newtonbrook",30,30,30,30,30,30
Woburn,10,10,10,10,10,10
Woodbine Heights,32,32,32,32,32,32
York Mills West,22,22,22,22,22,22


### Show how many unique categories

In [526]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 331 uniques categories.


### Analyzing each neighborhood

In [527]:
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood']
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("-------"+hood+"-------")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue', 'freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print("\n")    

-------Agincourt-------
                  venue  freq
0    Chinese Restaurant  0.16
1         Shopping Mall  0.07
2  Caribbean Restaurant  0.05
3                Bakery  0.05
4           Supermarket  0.02


-------Alderwood, Long Branch-------
            venue  freq
0        Pharmacy  0.11
1  Discount Store  0.11
2     Pizza Place  0.07
3            Park  0.07
4             Pub  0.04


-------Bathurst Manor, Wilson Heights, Downsview North-------
         venue  freq
0  Pizza Place  0.07
1         Bank  0.07
2  Coffee Shop  0.07
3  Supermarket  0.03
4  Bridal Shop  0.03


-------Bayview Village-------
                 venue  freq
0                 Bank  0.12
1        Grocery Store  0.12
2  Japanese Restaurant  0.12
3          Gas Station  0.12
4                 Park  0.06


-------Bedford Park, Lawrence Manor East-------
                venue  freq
0  Italian Restaurant  0.08
1         Coffee Shop  0.08
2                Bank  0.05
3                Park  0.05
4      Sandwich Place  0.05

### Return TOP-10 categories for neighborhood

In [545]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [610]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

### Group neighborhoods based on categories

In [611]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
# kmeans.labels_

In [612]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
toronto_merged = pd.merge(left=neighborhoods_venues_sorted, right=df_2, left_on='Neighborhood', right_on='Neighborhood').drop(['PostalCode', 'Borough'], 1)
toronto_merged.set_index(['Neighborhood'], drop=True)

Unnamed: 0_level_0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Latitude,Longitude
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Agincourt,0,Chinese Restaurant,Shopping Mall,Bakery,Caribbean Restaurant,Skating Rink,Dim Sum Restaurant,Discount Store,Bank,Japanese Restaurant,Café,43.794200,-79.262029
"Alderwood, Long Branch",0,Discount Store,Pharmacy,Park,Pizza Place,Coffee Shop,Garden Center,Donut Shop,Liquor Store,Shopping Mall,Intersection,43.602414,-79.543484
"Bathurst Manor, Wilson Heights, Downsview North",0,Pizza Place,Bank,Coffee Shop,Ski Chalet,Ice Cream Shop,Supermarket,Fried Chicken Joint,Sandwich Place,Diner,Restaurant,43.754328,-79.442259
Bayview Village,2,Bank,Japanese Restaurant,Gas Station,Grocery Store,Shopping Mall,Chinese Restaurant,Café,Trail,Intersection,Restaurant,43.786947,-79.385975
"Bedford Park, Lawrence Manor East",3,Italian Restaurant,Coffee Shop,Park,Sandwich Place,Bank,Restaurant,Sushi Restaurant,Pub,Thai Restaurant,Bagel Shop,43.733283,-79.419750
...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Willowdale, Newtonbrook",3,Korean Restaurant,Café,Pizza Place,Diner,Middle Eastern Restaurant,Coffee Shop,Indian Restaurant,Shopping Mall,Sandwich Place,Supermarket,43.789053,-79.408493
Woburn,2,Coffee Shop,Park,Indian Restaurant,Chinese Restaurant,Business Service,Pharmacy,Mobile Phone Shop,Fast Food Restaurant,Farm,Elementary School,43.770992,-79.216917
Woodbine Heights,2,Park,Coffee Shop,Pizza Place,Skating Rink,Sandwich Place,Thai Restaurant,Café,Pharmacy,Farmers Market,Bus Stop,43.695344,-79.318389
York Mills West,2,Park,Restaurant,Coffee Shop,Convenience Store,Dentist's Office,Gas Station,Bank,Tennis Court,Chinese Restaurant,French Restaurant,43.752758,-79.400049


In [614]:
map_clusters = folium.Map(location=[lat, long], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine clusters

#### Cluster 1

In [615]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[1:12]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Shopping Mall,Bakery,Caribbean Restaurant,Skating Rink,Dim Sum Restaurant,Discount Store,Bank,Japanese Restaurant,Café
1,"Alderwood, Long Branch",Discount Store,Pharmacy,Park,Pizza Place,Coffee Shop,Garden Center,Donut Shop,Liquor Store,Shopping Mall,Intersection
2,"Bathurst Manor, Wilson Heights, Downsview North",Pizza Place,Bank,Coffee Shop,Ski Chalet,Ice Cream Shop,Supermarket,Fried Chicken Joint,Sandwich Place,Diner,Restaurant
10,Caledonia-Fairbanks,Pizza Place,Pharmacy,Park,Hostel,Japanese Restaurant,Coffee Shop,Discount Store,Café,Bus Stop,Falafel Restaurant
12,Cedarbrae,Bakery,Coffee Shop,Indian Restaurant,Gas Station,Bank,Pharmacy,Fast Food Restaurant,Caribbean Restaurant,Burger Joint,Thai Restaurant
16,"Clarks Corners, Tam O'Shanter, Sullivan",Coffee Shop,Bank,Pizza Place,Sandwich Place,Fast Food Restaurant,Thai Restaurant,Deli / Bodega,Seafood Restaurant,Intersection,Italian Restaurant
17,"Cliffside, Cliffcrest, Scarborough Village West",Pizza Place,Ice Cream Shop,Beach,Sports Bar,Cajun / Creole Restaurant,Burger Joint,Hardware Store,Park,Event Space,Dumpling Restaurant
21,"Del Ray, Mount Dennis, Keelsdale and Silverthorn",Furniture / Home Store,Grocery Store,Restaurant,Video Store,Sandwich Place,Dessert Shop,Italian Restaurant,Bar,Gas Station,Convenience Store
24,"Dorset Park, Wexford Heights, Scarborough Town...",Electronics Store,Coffee Shop,Restaurant,Pharmacy,Chinese Restaurant,Indian Restaurant,Asian Restaurant,Bakery,Fast Food Restaurant,Sandwich Place
25,Downsview,Coffee Shop,Vietnamese Restaurant,Pizza Place,Hotel,Park,Gas Station,Grocery Store,Turkish Restaurant,Discount Store,Fast Food Restaurant


#### Cluster 2

In [616]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[1:12]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
72,"Rouge Hill, Port Union, Highland Creek",Breakfast Spot,Playground,Burger Joint,Park,Italian Restaurant,Zoo,Ethiopian Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant


#### Cluster 3

In [617]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[1:12]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Bayview Village,Bank,Japanese Restaurant,Gas Station,Grocery Store,Shopping Mall,Chinese Restaurant,Café,Trail,Intersection,Restaurant
6,"Birch Cliff, Cliffside West",Park,General Entertainment,Gym Pool,Diner,Thai Restaurant,Dessert Shop,Café,Restaurant,Skating Rink,Gym
34,Forest Hill North & West,Park,Sushi Restaurant,Coffee Shop,Café,Bank,Pharmacy,Bakery,Trail,Italian Restaurant,Japanese Restaurant
43,"Humberlea, Emery",Business Service,Storage Facility,Gas Station,Discount Store,Golf Course,Park,Intersection,Bakery,Convenience Store,Farm
46,Islington Avenue,Pharmacy,Grocery Store,Bakery,Café,Shopping Mall,Golf Course,Skating Rink,Park,Bank,Convenience Store
51,Lawrence Park,Café,Bookstore,Park,College Quad,College Gym,Coffee Shop,Trail,Gym / Fitness Center,Zoo,Ethiopian Restaurant
58,"New Toronto, Mimico South, Humber Bay Shores",Park,Bakery,Grocery Store,Mexican Restaurant,Skating Rink,Café,Liquor Store,Fried Chicken Joint,Dessert Shop,Italian Restaurant
63,"Old Mill South, King's Mill Park, Sunnylea, Hu...",Park,Italian Restaurant,Shopping Mall,Eastern European Restaurant,Ice Cream Shop,Gym / Fitness Center,Zoo,Elementary School,Doner Restaurant,Donut Shop
66,Parkwoods,Park,Bus Stop,Convenience Store,Pharmacy,Shopping Mall,Café,Discount Store,Caribbean Restaurant,Tennis Court,Supermarket
70,Rosedale,Park,Coffee Shop,Grocery Store,Breakfast Spot,Trail,Pie Shop,Sandwich Place,Candy Store,Filipino Restaurant,Metro Station


#### Cluster 4

In [618]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[1:12]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,"Bedford Park, Lawrence Manor East",Italian Restaurant,Coffee Shop,Park,Sandwich Place,Bank,Restaurant,Sushi Restaurant,Pub,Thai Restaurant,Bagel Shop
5,Berczy Park,Coffee Shop,Café,Hotel,Restaurant,Japanese Restaurant,Park,Gym,Bakery,Grocery Store,Liquor Store
7,"Brockton, Parkdale Village, Exhibition Place",Café,Restaurant,Coffee Shop,Bar,Bakery,Furniture / Home Store,Gift Shop,Tibetan Restaurant,Performing Arts Venue,Park
8,Business reply mail Processing Centre,Park,Coffee Shop,Brewery,Pizza Place,Sushi Restaurant,Bakery,Italian Restaurant,Fast Food Restaurant,Gym / Fitness Center,Thai Restaurant
9,"CN Tower, King and Spadina, Railway Lands, Har...",Harbor / Marina,Café,Coffee Shop,Dance Studio,Garden,Sculpture Garden,Sushi Restaurant,Track,Park,Scenic Lookout
11,Canada Post Gateway Processing Centre,Coffee Shop,Middle Eastern Restaurant,Hotel,Bakery,Mexican Restaurant,Gym,Burrito Place,Sushi Restaurant,Indian Restaurant,Fried Chicken Joint
13,Central Bay Street,Coffee Shop,Café,Gastropub,Sushi Restaurant,Park,Clothing Store,Ramen Restaurant,Japanese Restaurant,Theater,Yoga Studio
14,Christie,Korean Restaurant,Café,Coffee Shop,Grocery Store,Ice Cream Shop,Mexican Restaurant,Cocktail Bar,Japanese Restaurant,Park,Bar
15,Church and Wellesley,Coffee Shop,Sushi Restaurant,Park,Japanese Restaurant,Diner,Café,Thai Restaurant,Pizza Place,Men's Store,Ramen Restaurant
18,"Commerce Court, Victoria Hotel",Coffee Shop,Hotel,Café,Japanese Restaurant,Restaurant,Seafood Restaurant,Concert Hall,Plaza,Deli / Bodega,Thai Restaurant


#### Cluster 5

In [619]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[1:12]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
101,"York Mills, Silver Hills",Park,Pool,Zoo,Event Space,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School
