# IBM Data Science Capstone 

### Week 5: Final Report

1.Import Libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

!conda install -c conda-forge beautifulsoup4 --yes
from bs4 import BeautifulSoup

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-1.22.0               |     pyh9f0ad1d_0          63 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          97 KB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-1.22.0-pyh9f0ad1d_0



Downloading and Extracting Packages
geopy-1.22.0         | 63 KB     | ##################################### | 100% 
geographiclib-1.50   | 34 KB     | ###############################

2. Scrap Data from Wiki Page into a Data Frame

In [2]:
#Send the get request
data = requests.get("https://en.wikipedia.org/wiki/Category:Regions_of_Sydney").text

In [3]:
# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(data, 'html.parser')

In [4]:
# create a list to store region data
regionList = []

In [5]:
# append the data into the list
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    regionList.append(row.text)
# create a new DataFrame from the list
kl_df = pd.DataFrame({"Region": regionList})
k2=kl_df.drop(labels=0,axis=0)
k2.reset_index()
k2.head()


Unnamed: 0,Region
1,Blue Mountains (New South Wales)
2,Canterbury-Bankstown
3,Dee Why
4,Eastern Suburbs (Sydney)
5,Forest District (Sydney)


In [6]:
# print the number of rows of the dataframe
k2.shape

(18, 1)

In [7]:
k3=pd.DataFrame({"Region":k2['Region'].unique()})
k3


Unnamed: 0,Region
0,Blue Mountains (New South Wales)
1,Canterbury-Bankstown
2,Dee Why
3,Eastern Suburbs (Sydney)
4,Forest District (Sydney)
5,Greater Western Sydney
6,Hills District
7,Inner West
8,Long Reef (New South Wales)
9,"Macarthur, New South Wales"


### Get the geographical coordinates

In [8]:
!pip install geocoder

Collecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K     |████████████████████████████████| 102kB 7.2MB/s ta 0:00:011
[?25hCollecting click (from geocoder)
[?25l  Downloading https://files.pythonhosted.org/packages/d2/3d/fa76db83bf75c4f8d338c2fd15c8d33fdd7ad23a9b5e57eb6c5de26b430e/click-7.1.2-py2.py3-none-any.whl (82kB)
[K     |████████████████████████████████| 92kB 6.2MB/s  eta 0:00:01
Collecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Collecting future (from geocoder)
[?25l  Downloading https://files.pythonhosted.org/packages/45/0b/38b06fd9b92dc2b68d58b75f900e97884c45bedd2ff83203d933cf5851c9/future-0.18.2.tar.gz (829kB)
[K     |████████████████████████████████| 829kB 6.7MB/s eta 0:00:01
Building wheels

In [9]:
import geocoder

In [10]:
# define a function to get coordinates
def get_latlng(region):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Sydney,Australia'.format(region))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [11]:
coords = [ get_latlng(region) for region in k3["Region"].tolist() ]

In [12]:
coords

[[-33.71330999999998, 150.3306500000001],
 [-33.93404173160261, 151.0302294397121],
 [-33.753619999999955, 151.2853500000001],
 [-33.869599999999934, 151.2069100000001],
 [-33.86885127099998, 151.209332248],
 [-33.91343999971678, 151.16112899946776],
 [-33.72091999999998, 150.9883000000001],
 [-33.78388282976996, 151.27150516674274],
 [-34.040582646524456, 150.82380975342596],
 [-34.058460059437614, 150.82150005669635],
 [-33.83991998167807, 151.20789006275598],
 [-33.83920999999998, 151.20721000000003],
 [-33.86559999999997, 151.20865000000003],
 [-33.926139, 150.87346200000002],
 [-33.95259999999996, 151.2312700000001],
 [-33.91343999971678, 151.16112899946776],
 [-33.86481999999995, 151.20773000000008],
 [-33.91343999971678, 151.16112899946776]]

In [13]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

In [14]:
# merge the coordinates into the original dataframe
k3['Latitude'] = df_coords['Latitude']
k3['Longitude'] = df_coords['Longitude']

In [15]:
# check the neighborhoods and the coordinates
print(k3.shape)
k3

(18, 3)


Unnamed: 0,Region,Latitude,Longitude
0,Blue Mountains (New South Wales),-33.71331,150.33065
1,Canterbury-Bankstown,-33.934042,151.030229
2,Dee Why,-33.75362,151.28535
3,Eastern Suburbs (Sydney),-33.8696,151.20691
4,Forest District (Sydney),-33.868851,151.209332
5,Greater Western Sydney,-33.91344,151.161129
6,Hills District,-33.72092,150.9883
7,Inner West,-33.783883,151.271505
8,Long Reef (New South Wales),-34.040583,150.82381
9,"Macarthur, New South Wales",-34.05846,150.8215


### Create a map of Sydney with regions superimposed on top

In [16]:
# get the coordinates of Sydney
address = 'Sydney, Australia'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Sydney,Australia {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Sydney,Australia -33.8548157, 151.2164539.


In [17]:
# create map of Toronto using latitude and longitude values
map_kl = folium.Map(location=[latitude, longitude], zoom_start=11)
region=k3['Region']
# add markers to map
for lat, lng, neighborhood in zip(k3['Latitude'], k3['Longitude'], k3['Region']):
    label = '{}'.format(region)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_kl)  
    
map_kl

### Use the Foursquare API to explore the neighborhoods

In [18]:
# define Foursquare Credentials and Version
CLIENT_ID = 'YYEYFMMLWD1VOWL3SOM3IPFH2X5T0PJWQHCALJM35LQ3OSIG' # your Foursquare ID
CLIENT_SECRET = '1IZAA3UKBBFJREKRRG5K5DUU3VX0TJV1DHPAA3S5AMCRHZNI' # your Foursquare Secret
VERSION = '20200523' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)


Your credentails:
CLIENT_ID: YYEYFMMLWD1VOWL3SOM3IPFH2X5T0PJWQHCALJM35LQ3OSIG
CLIENT_SECRET:1IZAA3UKBBFJREKRRG5K5DUU3VX0TJV1DHPAA3S5AMCRHZNI


Top 100 venues within a radius of 3000 meters

In [19]:
radius = 3000
LIMIT = 100

venues = []

for lat, long, region in zip(k3['Latitude'],k3['Longitude'],k3['Region']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
           venues.append((
            region,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [20]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Region', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(1571, 7)


Unnamed: 0,Region,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Blue Mountains (New South Wales),-33.71331,150.33065,Josophan's Chocolate Shop,-33.712735,150.331198,Chocolate Shop
1,Blue Mountains (New South Wales),-33.71331,150.33065,Leura Garage,-33.71247,150.331677,Café
2,Blue Mountains (New South Wales),-33.71331,150.33065,Flemish Flavours at the Hillcrest,-33.711626,150.331028,Belgian Restaurant
3,Blue Mountains (New South Wales),-33.71331,150.33065,Leura Cascades,-33.719447,150.322309,Waterfall
4,Blue Mountains (New South Wales),-33.71331,150.33065,Everglades,-33.721451,150.337886,Garden


#### How many venues returned for each region

In [21]:
venues_df.groupby(["Region"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Blue Mountains (New South Wales),55,55,55,55,55,55
Canterbury-Bankstown,95,95,95,95,95,95
Dee Why,100,100,100,100,100,100
Eastern Suburbs (Sydney),100,100,100,100,100,100
Forest District (Sydney),100,100,100,100,100,100
Greater Western Sydney,100,100,100,100,100,100
Hills District,87,87,87,87,87,87
Inner West,100,100,100,100,100,100
Long Reef (New South Wales),42,42,42,42,42,42
"Macarthur, New South Wales",75,75,75,75,75,75


In [21]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 173 uniques categories.


In [22]:
venues_df['VenueCategory'].unique()[:50]

array(['Chocolate Shop', 'Café', 'Belgian Restaurant', 'Waterfall',
       'Garden', 'Tea Room', 'Scenic Lookout', 'Liquor Store', 'Bakery',
       'Asian Restaurant', 'Restaurant', 'Candy Store', 'Bar',
       'Malay Restaurant', 'Hotel', 'Resort', 'Cultural Center', 'Town',
       'Australian Restaurant', 'Fruit & Vegetable Store',
       'Italian Restaurant', 'Hostel', 'Department Store', 'Multiplex',
       'Trail', 'Supermarket', 'Coffee Shop', 'French Restaurant',
       'Gourmet Shop', 'Park', 'RV Park', 'Pool', 'Motel',
       'Kebab Restaurant', 'Middle Eastern Restaurant',
       'Vietnamese Restaurant', 'Lebanese Restaurant', 'Sports Club',
       'Electronics Store', 'Sports Bar', 'Burger Joint',
       'Gym / Fitness Center', 'Japanese Restaurant', 'Sandwich Place',
       'Buffet', 'Gym', 'Pizza Place', 'Grocery Store',
       'Portuguese Restaurant', 'Fast Food Restaurant'], dtype=object)

In [23]:
# Check if the bar is in the result
"Bar" in venues_df['VenueCategory'].unique()

True

#### Analyze each neighbourhood 

In [24]:
# one hot encoding
kl_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
kl_onehot['Region'] = venues_df['Region'] 

# move neighborhood column to the first column
fixed_columns = [kl_onehot.columns[-1]] + list(kl_onehot.columns[:-1])
kl_onehot = kl_onehot[fixed_columns]

print(kl_onehot.shape)
kl_onehot.head()

(1571, 174)


Unnamed: 0,Region,Airport Lounge,American Restaurant,Arcade,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,Australian Restaurant,BBQ Joint,Bakery,Bar,Baseball Field,Basketball Stadium,Bay,Beach,Beach Bar,Beer Garden,Beer Store,Belgian Restaurant,Big Box Store,Bistro,Bookstore,Botanical Garden,Bowling Alley,Breakfast Spot,Brewery,Bridge,Buffet,Burger Joint,Burrito Place,Bus Station,Butcher,Café,Candy Store,Cantonese Restaurant,Chinese Restaurant,Chocolate Shop,Churrascaria,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Concert Hall,Convenience Store,Costume Shop,Cultural Center,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Doctor's Office,Dog Run,Dumpling Restaurant,Egyptian Restaurant,Electronics Store,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food Court,Food Truck,Football Stadium,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Garden,Garden Center,Gas Station,General Entertainment,German Restaurant,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,History Museum,Hostel,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indonesian Restaurant,Italian Restaurant,Japanese Restaurant,Juice Bar,Kebab Restaurant,Lebanese Restaurant,Liquor Store,Lounge,Malay Restaurant,Martial Arts Dojo,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Monument / Landmark,Motel,Movie Theater,Multiplex,Music Store,National Park,Noodle House,Opera House,Other Repair Shop,Pakistani Restaurant,Paper / Office Supplies Store,Park,Performing Arts Venue,Pet Store,Pharmacy,Pie Shop,Pier,Pizza Place,Planetarium,Platform,Playground,Plaza,Pool,Portuguese Restaurant,Pub,RV Park,Ramen Restaurant,Record Shop,Rental Car Location,Resort,Restaurant,Rock Club,Rugby Pitch,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,Seafood Restaurant,Shoe Store,Shopping Mall,Shopping Plaza,Soccer Field,South Indian Restaurant,Souvlaki Shop,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Stadium,Steakhouse,Supermarket,Surf Spot,Sushi Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Park,Tiki Bar,Tour Provider,Town,Trail,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Waterfall,Whisky Bar,Wine Bar,Wine Shop,Yoga Studio
0,Blue Mountains (New South Wales),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Blue Mountains (New South Wales),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Blue Mountains (New South Wales),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Blue Mountains (New South Wales),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
4,Blue Mountains (New South Wales),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### Group Rows by Region and the mean of the frequency of occurence of each category 

In [25]:
kl_grouped = kl_onehot.groupby(["Region"]).mean().reset_index()

print(kl_grouped.shape)
kl_grouped

(18, 174)


Unnamed: 0,Region,Airport Lounge,American Restaurant,Arcade,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,Australian Restaurant,BBQ Joint,Bakery,Bar,Baseball Field,Basketball Stadium,Bay,Beach,Beach Bar,Beer Garden,Beer Store,Belgian Restaurant,Big Box Store,Bistro,Bookstore,Botanical Garden,Bowling Alley,Breakfast Spot,Brewery,Bridge,Buffet,Burger Joint,Burrito Place,Bus Station,Butcher,Café,Candy Store,Cantonese Restaurant,Chinese Restaurant,Chocolate Shop,Churrascaria,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Concert Hall,Convenience Store,Costume Shop,Cultural Center,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Doctor's Office,Dog Run,Dumpling Restaurant,Egyptian Restaurant,Electronics Store,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food Court,Food Truck,Football Stadium,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Garden,Garden Center,Gas Station,General Entertainment,German Restaurant,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,History Museum,Hostel,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indonesian Restaurant,Italian Restaurant,Japanese Restaurant,Juice Bar,Kebab Restaurant,Lebanese Restaurant,Liquor Store,Lounge,Malay Restaurant,Martial Arts Dojo,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Monument / Landmark,Motel,Movie Theater,Multiplex,Music Store,National Park,Noodle House,Opera House,Other Repair Shop,Pakistani Restaurant,Paper / Office Supplies Store,Park,Performing Arts Venue,Pet Store,Pharmacy,Pie Shop,Pier,Pizza Place,Planetarium,Platform,Playground,Plaza,Pool,Portuguese Restaurant,Pub,RV Park,Ramen Restaurant,Record Shop,Rental Car Location,Resort,Restaurant,Rock Club,Rugby Pitch,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,Seafood Restaurant,Shoe Store,Shopping Mall,Shopping Plaza,Soccer Field,South Indian Restaurant,Souvlaki Shop,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Stadium,Steakhouse,Supermarket,Surf Spot,Sushi Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Park,Tiki Bar,Tour Provider,Town,Trail,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Waterfall,Whisky Bar,Wine Bar,Wine Shop,Yoga Studio
0,Blue Mountains (New South Wales),0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.018182,0.0,0.036364,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.145455,0.018182,0.0,0.0,0.036364,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.018182,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.018182,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.036364,0.036364,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.018182,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.018182,0.0,0.0,0.0,0.036364,0.036364,0.0,0.0,0.0,0.0,0.145455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.036364,0.0,0.0,0.0,0.036364,0.0,0.0,0.0,0.0
1,Canterbury-Bankstown,0.0,0.010204,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.020408,0.020408,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.020408,0.0,0.0,0.0,0.112245,0.0,0.010204,0.010204,0.0,0.0,0.0,0.0,0.0,0.030612,0.0,0.030612,0.0,0.0,0.0,0.030612,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.05102,0.020408,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.020408,0.0,0.010204,0.030612,0.0,0.0,0.010204,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.010204,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.061224,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.061224,0.0,0.0,0.010204,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.010204,0.0,0.010204,0.040816,0.0,0.0,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,0.010204,0.0,0.091837,0.0,0.0,0.0,0.0,0.0
2,Dee Why,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.0,0.03,0.02,0.0,0.0,0.0,0.07,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.04,0.01,0.0,0.0,0.11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.05,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.02,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.03,0.02,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.03,0.0,0.0,0.01,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.03,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Eastern Suburbs (Sydney),0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.04,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.1,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.04,0.06,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.04,0.02,0.03,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.05,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.0,0.04,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.04,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.01
4,Forest District (Sydney),0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.01,0.04,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.1,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.04,0.06,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.04,0.02,0.02,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.05,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.0,0.03,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.04,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.01
5,Greater Western Sydney,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.06,0.06,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.02,0.01,0.01,0.02,0.01,0.0,0.0,0.01,0.0,0.05,0.01,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.03,0.0,0.0,0.0,0.01,0.0
6,Hills District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011111,0.0,0.011111,0.011111,0.011111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.088889,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.011111,0.0,0.0,0.0,0.033333,0.011111,0.0,0.0,0.0,0.0,0.0,0.011111,0.011111,0.0,0.022222,0.0,0.055556,0.011111,0.0,0.0,0.011111,0.022222,0.0,0.0,0.0,0.011111,0.0,0.0,0.022222,0.0,0.0,0.0,0.011111,0.0,0.011111,0.0,0.011111,0.022222,0.033333,0.011111,0.0,0.0,0.0,0.022222,0.0,0.0,0.011111,0.0,0.0,0.033333,0.011111,0.022222,0.0,0.0,0.011111,0.0,0.0,0.0,0.022222,0.0,0.011111,0.0,0.0,0.022222,0.011111,0.011111,0.0,0.011111,0.0,0.011111,0.0,0.011111,0.011111,0.0,0.0,0.022222,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.011111,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.011111,0.0,0.0,0.011111,0.0,0.044444,0.011111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011111,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.011111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Inner West,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.05,0.06,0.0,0.0,0.01,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.03,0.03,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0
8,Long Reef (New South Wales),0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.025,0.0,0.0,0.0,0.025,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.025,0.0,0.075,0.0,0.0,0.0,0.025,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.025,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Macarthur, New South Wales",0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.012821,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.115385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.025641,0.012821,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.128205,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.012821,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.064103,0.012821,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.012821,0.012821,0.0,0.012821,0.0,0.0,0.0,0.012821,0.012821,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.012821,0.0,0.0,0.0,0.012821,0.025641,0.0,0.0,0.0,0.0,0.0,0.012821,0.012821,0.0,0.038462,0.0,0.0,0.0,0.0,0.064103,0.0,0.012821,0.0,0.0,0.0,0.0,0.012821,0.0,0.012821,0.012821,0.038462,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0


In [26]:
len(kl_grouped[kl_grouped["Bar"] > 0])

15

#### New DataFrame for bar data only

In [27]:
kl_bar = kl_grouped[["Region","Bar"]]
kl_bar.head()

Unnamed: 0,Region,Bar
0,Blue Mountains (New South Wales),0.018182
1,Canterbury-Bankstown,0.020408
2,Dee Why,0.02
3,Eastern Suburbs (Sydney),0.01
4,Forest District (Sydney),0.01


#### Cluster Neighborhoods

Run k-means to cluster the neighborhoods in Sydney

In [29]:
#set number of clusters
kclusters = 3

kl_clustering = kl_bar.drop(["Region"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(kl_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 2, 2, 0, 0, 1, 0, 1, 0, 0], dtype=int32)

In [30]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
kl_merged = kl_bar.copy()

# add clustering labels
kl_merged["Cluster Labels"] = kmeans.labels_

In [31]:
kl_merged.rename(columns={"Region": "Region"}, inplace=True)
kl_merged.head()

Unnamed: 0,Region,Bar,Cluster Labels
0,Blue Mountains (New South Wales),0.018182,2
1,Canterbury-Bankstown,0.020408,2
2,Dee Why,0.02,2
3,Eastern Suburbs (Sydney),0.01,0
4,Forest District (Sydney),0.01,0


In [32]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
kl_merged = kl_merged.join(k3.set_index("Region"), on="Region")

print(kl_merged.shape)
kl_merged.head() # check the last columns

(18, 5)


Unnamed: 0,Region,Bar,Cluster Labels,Latitude,Longitude
0,Blue Mountains (New South Wales),0.018182,2,-33.71331,150.33065
1,Canterbury-Bankstown,0.020408,2,-33.934042,151.030229
2,Dee Why,0.02,2,-33.75362,151.28535
3,Eastern Suburbs (Sydney),0.01,0,-33.8696,151.20691
4,Forest District (Sydney),0.01,0,-33.868851,151.209332


In [33]:
# sort the results by Cluster Labels
print(kl_merged.shape)
kl_merged.sort_values(["Cluster Labels"], inplace=True)
kl_merged

(18, 5)


Unnamed: 0,Region,Bar,Cluster Labels,Latitude,Longitude
8,Long Reef (New South Wales),0.0,0,-34.040583,150.82381
15,St George (Sydney),0.01,0,-33.8656,151.20865
3,Eastern Suburbs (Sydney),0.01,0,-33.8696,151.20691
4,Forest District (Sydney),0.01,0,-33.868851,151.209332
13,South-Eastern Sydney,0.0,0,-33.9526,151.23127
6,Hills District,0.011111,0,-33.72092,150.9883
9,"Macarthur, New South Wales",0.0,0,-34.05846,150.8215
16,Sydney central business district,0.01,0,-33.86482,151.20773
14,Southern Sydney,0.06,1,-33.91344,151.161129
17,Western Sydney,0.06,1,-33.91344,151.161129


#### Visualize the resulting clusters

In [34]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(kl_merged['Latitude'], kl_merged['Longitude'], kl_merged['Region'], kl_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Examine Clusters

Cluster 0

In [40]:
kl_merged.loc[kl_merged['Cluster Labels'] == 0]


Unnamed: 0,Region,Bar,Cluster Labels,Latitude,Longitude
8,Long Reef (New South Wales),0.0,0,-34.040583,150.82381
15,St George (Sydney),0.01,0,-33.8656,151.20865
3,Eastern Suburbs (Sydney),0.01,0,-33.8696,151.20691
4,Forest District (Sydney),0.01,0,-33.868851,151.209332
13,South-Eastern Sydney,0.0,0,-33.9526,151.23127
6,Hills District,0.011111,0,-33.72092,150.9883
9,"Macarthur, New South Wales",0.0,0,-34.05846,150.8215
16,Sydney central business district,0.01,0,-33.86482,151.20773


Cluster 1

In [36]:
kl_merged.loc[kl_merged['Cluster Labels'] == 1]

Unnamed: 0,Region,Bar,Cluster Labels,Latitude,Longitude
14,Southern Sydney,0.06,1,-33.91344,151.161129
17,Western Sydney,0.06,1,-33.91344,151.161129
5,Greater Western Sydney,0.06,1,-33.91344,151.161129
7,Inner West,0.06,1,-33.783883,151.271505


Cluster 2

In [37]:
kl_merged.loc[kl_merged['Cluster Labels'] == 2]

Unnamed: 0,Region,Bar,Cluster Labels,Latitude,Longitude
10,North Shore (Sydney),0.02,2,-33.83992,151.20789
11,Northern Sydney,0.02,2,-33.83921,151.20721
12,South Western Sydney,0.032258,2,-33.926139,150.873462
2,Dee Why,0.02,2,-33.75362,151.28535
1,Canterbury-Bankstown,0.020408,2,-33.934042,151.030229
0,Blue Mountains (New South Wales),0.018182,2,-33.71331,150.33065
