In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


### Get Singapore LatLong

In [2]:
address = 'Singapore'
geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Singapore are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Singapore are 1.3408528, 103.878446863736.


In [3]:
import urllib.request
url = "https://en.wikipedia.org/wiki/Planning_Areas_of_Singapore"
page = urllib.request.urlopen(url)
page

<http.client.HTTPResponse at 0x10c8562e8>

In [4]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(page, "lxml")

In [5]:
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Planning Areas of Singapore - Wikipedia
  </title>
  <script>
   document.documentElement.className=document.documentElement.className.replace(/(^|\s)client-nojs(\s|$)/,"$1client-js$2");RLCONF={"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"Planning_Areas_of_Singapore","wgTitle":"Planning Areas of Singapore","wgCurRevisionId":911917670,"wgRevisionId":911917670,"wgArticleId":2224605,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Pages using deprecated image syntax","Urban planning in Singapore","Subdivisions of Singapore"],"wgBreakFrames":!1,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","

# find the exact table using soup.find #

In [6]:
right_table=soup.find('table', class_='wikitable sortable')
right_table

<table class="wikitable sortable">
<tbody><tr>
<th>Name <small>(<a href="/wiki/English_language" title="English language">English</a>)</small>
</th>
<th><a href="/wiki/Malay_language" title="Malay language">Malay</a>
</th>
<th><a href="/wiki/Chinese_language" title="Chinese language">Chinese</a>
</th>
<th><a href="/wiki/Pinyin" title="Pinyin">Pinyin</a>
</th>
<th><a href="/wiki/Tamil_language" title="Tamil language">Tamil</a>
</th>
<th>Region
</th>
<th>Area (km2)
</th>
<th>Population<sup class="reference" id="cite_ref-6"><a href="#cite_note-6">[6]</a></sup>
</th>
<th>Density (/km2)
</th></tr>
<tr>
<td><a href="/wiki/Ang_Mo_Kio" title="Ang Mo Kio">Ang Mo Kio</a>
</td>
<td>
</td>
<td>宏茂桥
</td>
<td>Hóng mào qiáo
</td>
<td>ஆங் மோ கியோ
</td>
<td><a href="/wiki/North-East_Region,_Singapore" title="North-East Region, Singapore">North-East</a>
</td>
<td>13.94
</td>
<td>165,710
</td>
<td>12,000
</td></tr>
<tr>
<td><a href="/wiki/Bedok" title="Bedok">Bedok</a>
</td>
<td>*
</td>
<td>勿洛
</td>
<td>

# Scrap data and store in 9 lists and then convert only corresponding columns to dataframe

In [7]:
A=[]
B=[]
C=[]
D=[]
E=[]
F=[]
G=[]
H=[]
I=[]


for row in right_table.findAll('tr'):
    cells=row.findAll('td')
    if len(cells)==9:
        A.append(cells[0].find(text=True))
        B.append(cells[1].find(text=True))
        C.append(cells[2].find(text=True))
        D.append(cells[3].find(text=True))
        E.append(cells[4].find(text=True))
        F.append(cells[5].find(text=True))
        G.append(cells[6].find(text=True))
        H.append(cells[7].find(text=True))
        I.append(cells[8].find(text=True))

In [8]:
import pandas as pd
df=pd.DataFrame(A,columns=['Name'])
df['Region']=F

df = df.loc[df['Region'] =='Central'].reset_index(drop=True) #Keeps only Areas in Central region 
df = df.drop(columns= 'Region')

In [9]:
df.head()

Unnamed: 0,Name
0,Bishan
1,Bukit Merah
2,Bukit Timah
3,Downtown Core
4,Geylang


In [10]:
!pip install geocoder
import geocoder



In [11]:
# define a function to get coordinates
def get_latlng(df):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Singapore'.format(df))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [12]:
# call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(df) for df in df["Name"].tolist() ]

In [13]:
coords

[[1.3507900000000745, 103.85110000000009],
 [1.2841700000000742, 103.82306000000005],
 [1.3404100000000199, 103.77221000000009],
 [1.3771599483526997, 103.95552993392594],
 [1.3114700000000425, 103.88218000000006],
 [1.3333300000000463, 103.86667000000006],
 [1.2957900000000677, 103.89544000000006],
 [1.2785700000000588, 103.85762000000005],
 [1.3030600000000732, 103.90778000000006],
 [1.2967200000000503, 103.84900000000005],
 [1.312180000000069, 103.83912000000004],
 [1.3191000000000486, 103.84372000000008],
 [1.3011200000000258, 103.83955000000009],
 [1.2892410000000072, 103.83500249999999],
 [1.299660000000074, 103.80172000000005],
 [1.293136591078245, 103.8278319656131],
 [1.3041300000000433, 103.85029000000009],
 [1.286670000000072, 103.85250000000008],
 [1.366670000000056, 103.80000000000007],
 [1.2798626945358378, 103.85359472544839],
 [1.3166700000000446, 103.81667000000004],
 [1.3344800000000419, 103.85108000000008]]

In [15]:
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude']) #create a table using latlong from above
df['Latitude'] = df_coords['Latitude']
df['Longitude'] = df_coords['Longitude'] #add latlong to df table 
print(df.shape)
df

(22, 3)


Unnamed: 0,Name,Latitude,Longitude
0,Bishan,1.35079,103.8511
1,Bukit Merah,1.28417,103.82306
2,Bukit Timah,1.34041,103.77221
3,Downtown Core,1.37716,103.95553
4,Geylang,1.31147,103.88218
5,Kallang,1.33333,103.86667
6,Marina East,1.29579,103.89544
7,Marina South,1.27857,103.85762
8,Marine Parade,1.30306,103.90778
9,Museum,1.29672,103.849


In [16]:
# save the DataFrame as CSV file
df.to_csv("df.csv", index=False)

## 4. Create a map of Singapore Central Region with neighborhoods superimposed on top¶

In [17]:
# create map of Singapore using latitude and longitude values
map_cbd = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, name in zip(df['Latitude'], df['Longitude'], df['Name']):
    label = '{}'.format(name)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_cbd)  
    
map_cbd

 # Use the Foursquare API to explore the region

In [18]:
# define Foursquare Credentials and Version
CLIENT_ID = 'TYCFDFYBZGGNCM2DYJW5EZX0HML3P3T21OUP0YVKZXCAK0QI' # your Foursquare ID
CLIENT_SECRET = 'QUBZ1JEMQ0OXXTUPVFVEW2BKAKZJ1WQHLBG2HOH42353MJIH' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: TYCFDFYBZGGNCM2DYJW5EZX0HML3P3T21OUP0YVKZXCAK0QI
CLIENT_SECRET:QUBZ1JEMQ0OXXTUPVFVEW2BKAKZJ1WQHLBG2HOH42353MJIH


### Search venues with query=gym

In [55]:
radius = 500
LIMIT = 50

venues = []

for lat, long, name in zip(df['Latitude'], df['Longitude'], df['Name']):
    url = "https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&v={}&ll={},{}&intent=browse&radius={}&query=gym&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['venues']
    
    for venue in results:
        venues.append((
            name,
            lat, 
            long, 
            venue['name'], 
            venue['location']['lat'], 
            venue['location']['lng'],  
            #venue['categories']['name']
        ))

In [62]:
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Name', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude']

print(venues_df.shape)
venues_df

(246, 6)


Unnamed: 0,Name,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude
0,Bishan,1.35079,103.8511,Bishan ActiveSG Gym,1.355259,103.851376
1,Bishan,1.35079,103.8511,Bishan Loft Gym,1.347484,103.852884
2,Bishan,1.35079,103.8511,Bishan Sports Complex Gym,1.350117,103.849018
3,Bishan,1.35079,103.8511,ITE College Central Bishan Gym,1.351662,103.853405
4,Bishan,1.35079,103.8511,Gym®,1.348367,103.851661
5,Bishan,1.35079,103.8511,ITE Bishan Campus GYM,1.351533,103.854019
6,Bishan,1.35079,103.8511,Bishan Gym,1.354366,103.850161
7,Bishan,1.35079,103.8511,Bishan 8 Gym Centre,1.34971,103.847211
8,Bishan,1.35079,103.8511,Gymm Boxx XL,1.349909,103.850689
9,Bishan,1.35079,103.8511,GymmBoxx XL,1.349905,103.850778


In [98]:
cbd_grouped = venues_df.groupby(["Name"], as_index=False).count()
cbd_gym = cbd_grouped.loc[:,['Name', 'VenueName']]

In [99]:
cbd_gym

Unnamed: 0,Name,VenueName
0,Bishan,12
1,Bukit Merah,6
2,Bukit Timah,9
3,Downtown Core,12
4,Geylang,9
5,Kallang,10
6,Marina East,3
7,Marina South,4
8,Marine Parade,12
9,Museum,17


# Clustering areas

In [109]:
# set number of clusters
kclusters = 4

cbd_grouped_clustering = cbd_grouped.drop(["Name"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(cbd_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 0, 2, 2, 2, 2, 0, 0, 2, 1], dtype=int32)

In [110]:
# create a new dataframe that includes the cluster as well as the top 5 venues for each neighborhood.
cbd_merged = cbd_gym.copy()

# add clustering labels
cbd_merged["Cluster Labels"] = kmeans.labels_

cbd_merged.rename(columns={"Names": "Name"}, inplace=True)
cbd_merged.head()

Unnamed: 0,Name,VenueName,Cluster Labels
0,Bishan,12,2
1,Bukit Merah,6,0
2,Bukit Timah,9,2
3,Downtown Core,12,2
4,Geylang,9,2


In [111]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
cbd_merged = cbd_merged.join(df.set_index("Name"), on="Name")

print(cbd_merged.shape)
cbd_merged.head()

(21, 5)


Unnamed: 0,Name,VenueName,Cluster Labels,Latitude,Longitude
0,Bishan,12,2,1.35079,103.8511
1,Bukit Merah,6,0,1.28417,103.82306
2,Bukit Timah,9,2,1.34041,103.77221
3,Downtown Core,12,2,1.37716,103.95553
4,Geylang,9,2,1.31147,103.88218


In [112]:
# sort the results by Cluster Labels
print(cbd_merged.shape)
cbd_merged.sort_values(["Cluster Labels"], inplace=True)
cbd_merged

(21, 5)


Unnamed: 0,Name,VenueName,Cluster Labels,Latitude,Longitude
1,Bukit Merah,6,0,1.28417,103.82306
6,Marina East,3,0,1.29579,103.89544
7,Marina South,4,0,1.27857,103.85762
19,Tanglin,3,0,1.31667,103.81667
10,Newton,14,1,1.31218,103.83912
18,Straits View,14,1,1.279863,103.853595
17,Singapore River,15,1,1.28667,103.8525
16,Rochor,16,1,1.30413,103.85029
15,River Valley,13,1,1.293137,103.827832
9,Museum,17,1,1.29672,103.849


In [113]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(cbd_merged['Latitude'], cbd_merged['Longitude'], cbd_merged['Name'], cbd_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [114]:
cbd_merged.loc[cbd_merged['Cluster Labels'] == 0]

Unnamed: 0,Name,VenueName,Cluster Labels,Latitude,Longitude
1,Bukit Merah,6,0,1.28417,103.82306
6,Marina East,3,0,1.29579,103.89544
7,Marina South,4,0,1.27857,103.85762
19,Tanglin,3,0,1.31667,103.81667


In [115]:
cbd_merged.loc[cbd_merged['Cluster Labels'] == 1]

Unnamed: 0,Name,VenueName,Cluster Labels,Latitude,Longitude
10,Newton,14,1,1.31218,103.83912
18,Straits View,14,1,1.279863,103.853595
17,Singapore River,15,1,1.28667,103.8525
16,Rochor,16,1,1.30413,103.85029
15,River Valley,13,1,1.293137,103.827832
9,Museum,17,1,1.29672,103.849
11,Novena,14,1,1.3191,103.84372
13,Outram,14,1,1.289241,103.835002


In [116]:
cbd_merged.loc[cbd_merged['Cluster Labels'] == 2]

Unnamed: 0,Name,VenueName,Cluster Labels,Latitude,Longitude
14,Queenstown,8,2,1.29966,103.80172
0,Bishan,12,2,1.35079,103.8511
8,Marine Parade,12,2,1.30306,103.90778
5,Kallang,10,2,1.33333,103.86667
4,Geylang,9,2,1.31147,103.88218
3,Downtown Core,12,2,1.37716,103.95553
2,Bukit Timah,9,2,1.34041,103.77221
20,Toa Payoh,12,2,1.33448,103.85108


In [117]:
cbd_merged.loc[cbd_merged['Cluster Labels'] == 3]

Unnamed: 0,Name,VenueName,Cluster Labels,Latitude,Longitude
12,Orchard,29,3,1.30112,103.83955


In [118]:
cbd_merged.loc[cbd_merged['Cluster Labels'] == 4]

Unnamed: 0,Name,VenueName,Cluster Labels,Latitude,Longitude
