In [1]:
# Importing libraries
from bs4 import BeautifulSoup
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [2]:
# Web scraping using BeautifulSoup
wiki_link = 'https://www.hdb.gov.sg/cs/infoweb/residential/buying-a-flat/resale/resale-statistics'
source = requests.get(wiki_link)
page_content = BeautifulSoup(source.content, "html.parser")
table = page_content.find_all('table')[0] 
df_raw = pd.read_html(str(table))[0]

In [3]:
df_raw

Unnamed: 0,Town,1-Room,2-Room,3-Room,4-Room,5-Room,Executive
0,Ang Mo Kio,-,*,"$290,000","$440,000","$699,900",*
1,Bedok,-,*,"$290,000","$405,000","$578,000",*
2,Bishan,-,-,*,"$558,000","$741,500",*
3,Bukit Batok,-,-,"$258,000","$378,400","$520,000","$623,500"
4,Bukit Merah,*,*,"$355,000","$660,000","$810,000",-
5,Bukit Panjang,-,*,*,"$368,000","$450,000","$650,000"
6,Bukit Timah,-,-,*,*,*,*
7,Central,-,*,"$404,000",*,*,-
8,Choa Chu Kang,-,*,"$268,000","$338,500","$400,000","$538,500"
9,Clementi,-,*,"$310,500","$504,000","$635,500",*


In [4]:
df = df_raw[['Town', '4-Room']]
df = df[df['4-Room']!='*']
df.columns = ['region','room_price']

In [5]:
df.head()

Unnamed: 0,region,room_price
0,Ang Mo Kio,"$440,000"
1,Bedok,"$405,000"
2,Bishan,"$558,000"
3,Bukit Batok,"$378,400"
4,Bukit Merah,"$660,000"


In [6]:
geo_loc = pd.DataFrame(columns = ['region','latitude','longitude'])
geo_loc

Unnamed: 0,region,latitude,longitude


In [7]:
for place in df['region']:
    geolocator = Nominatim()
    place_tmp = place + ' Singapore'
    location = geolocator.geocode(place_tmp,timeout=10)
    latitude = location.latitude
    longitude = location.longitude
    print('The geograpical coordinate of {} are {}, {}.'.format(place,latitude, longitude))
    geo_loc = geo_loc.append({'region': place,
        'latitude' : latitude,
        'longitude' : longitude}, ignore_index = True)

  


The geograpical coordinate of Ang Mo Kio are 1.369842, 103.8466086.
The geograpical coordinate of Bedok are 1.3239765, 103.930216.
The geograpical coordinate of Bishan are 1.3514551, 103.8482628.
The geograpical coordinate of Bukit Batok are 1.3490572, 103.7495906.
The geograpical coordinate of Bukit Merah are 1.2806275, 103.8305915.
The geograpical coordinate of Bukit Panjang are 1.377921, 103.7718658.
The geograpical coordinate of Choa Chu Kang are 1.3892601, 103.743728.
The geograpical coordinate of Clementi are 1.3140256, 103.7624098.
The geograpical coordinate of Geylang are 1.3181862, 103.8870563.
The geograpical coordinate of Hougang are 1.3733601, 103.8860907.
The geograpical coordinate of Jurong East are 1.333115, 103.7422968.
The geograpical coordinate of Jurong West are 1.3396365, 103.7073387.
The geograpical coordinate of Kallang/Whampoa are 1.3244514, 103.8676547.
The geograpical coordinate of Pasir Ris are 1.37598905, 103.954359749213.
The geograpical coordinate of Punggo

In [8]:
geo_loc.head()

Unnamed: 0,region,latitude,longitude
0,Ang Mo Kio,1.369842,103.846609
1,Bedok,1.323976,103.930216
2,Bishan,1.351455,103.848263
3,Bukit Batok,1.349057,103.749591
4,Bukit Merah,1.280628,103.830591


In [9]:
df_merged = df.merge(geo_loc,left_on='region', right_on='region',how='left')
df_merged.head()
singapore_data = df_merged

In [10]:
# create map of Singapore using latitude and longitude values
map_singapore = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_merged['latitude'], df_merged['longitude'], df_merged['region']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_singapore)  
    
map_singapore

In [11]:
CLIENT_ID = '3G1IFCLDTG0AK3CD2K0UNM1Q1EH5SNQGURVEJKJIK1AON3FY' # your Foursquare ID
CLIENT_SECRET = 'MHS4Y3C4EGUYMI51EGP2J0TMSM1LG0NHRHVBYTX4R1AFHXEY' # your Foursquare Secret
VERSION = '20190101' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 3G1IFCLDTG0AK3CD2K0UNM1Q1EH5SNQGURVEJKJIK1AON3FY
CLIENT_SECRET:MHS4Y3C4EGUYMI51EGP2J0TMSM1LG0NHRHVBYTX4R1AFHXEY


In [12]:
# type your answer here
limit = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    limit)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=3G1IFCLDTG0AK3CD2K0UNM1Q1EH5SNQGURVEJKJIK1AON3FY&client_secret=MHS4Y3C4EGUYMI51EGP2J0TMSM1LG0NHRHVBYTX4R1AFHXEY&v=20190101&ll=1.428136,103.8336942&radius=500&limit=100'

In [13]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [14]:
# type your answer here
singapore_venues = getNearbyVenues(names=singapore_data['region'],
                                   latitudes=singapore_data['latitude'],
                                   longitudes=singapore_data['longitude']
                                  )

Ang Mo Kio
Bedok
Bishan
Bukit Batok
Bukit Merah
Bukit Panjang
Choa Chu Kang
Clementi
Geylang
Hougang
Jurong East
Jurong West
Kallang/Whampoa
Pasir Ris
Punggol
Queenstown
Sembawang
Sengkang
Serangoon
Tampines
Toa Payoh
Woodlands
Yishun


In [15]:
print(singapore_venues.shape)
singapore_venues.head()

(908, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Ang Mo Kio,1.369842,103.846609,MOS Burger,1.36917,103.847831,Burger Joint
1,Ang Mo Kio,1.369842,103.846609,Malaysia Boleh!,1.369669,103.8489,Food Court
2,Ang Mo Kio,1.369842,103.846609,FairPrice Xtra,1.369556,103.848503,Supermarket
3,Ang Mo Kio,1.369842,103.846609,NTUC FairPrice,1.371507,103.847082,Supermarket
4,Ang Mo Kio,1.369842,103.846609,Old Chang Kee,1.369094,103.848389,Snack Place


In [16]:
singapore_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ang Mo Kio,48,48,48,48,48,48
Bedok,57,57,57,57,57,57
Bishan,40,40,40,40,40,40
Bukit Batok,25,25,25,25,25,25
Bukit Merah,41,41,41,41,41,41
Bukit Panjang,8,8,8,8,8,8
Choa Chu Kang,15,15,15,15,15,15
Clementi,57,57,57,57,57,57
Geylang,36,36,36,36,36,36
Hougang,26,26,26,26,26,26


In [17]:
print('There are {} uniques categories.'.format(len(singapore_venues['Venue Category'].unique())))

There are 140 uniques categories.


In [18]:
# one hot encoding
singapore_onehot = pd.get_dummies(singapore_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
singapore_onehot['Neighborhood'] = singapore_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [singapore_onehot.columns[-1]] + list(singapore_onehot.columns[:-1])
singapore_onehot = singapore_onehot[fixed_columns]

singapore_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Arcade,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,Bakery,Bank,Basketball Court,Betting Shop,Bistro,Bookstore,Boutique,Bowling Alley,Breakfast Spot,Bubble Tea Shop,Buffet,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Cafeteria,Café,Cantonese Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,College Cafeteria,Comfort Food Restaurant,Concert Hall,Convenience Store,Cosmetics Shop,Cupcake Shop,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Donut Shop,Dumpling Restaurant,Electronics Store,English Restaurant,Fast Food Restaurant,Fish & Chips Shop,Food,Food & Drink Shop,Food Court,Food Service,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Garden,General Entertainment,German Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hainan Restaurant,Halal Restaurant,High School,Hobby Shop,Hong Kong Restaurant,Hostel,Hotel,Hotpot Restaurant,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Juice Bar,Karaoke Bar,Kids Store,Korean Restaurant,Malay Restaurant,Market,Medical Center,Mediterranean Restaurant,Metro Station,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Movie Theater,Multiplex,Noodle House,Optical Shop,Park,Pastry Shop,Pet Store,Pharmacy,Pizza Place,Playground,Pool,Portuguese Restaurant,Post Office,Ramen Restaurant,Recreation Center,Resort,Restaurant,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Smoke Shop,Snack Place,Soccer Field,Soup Place,Spa,Sports Bar,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Water Park,Wings Joint,Yoga Studio
0,Ang Mo Kio,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Ang Mo Kio,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Ang Mo Kio,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Ang Mo Kio,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Ang Mo Kio,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [19]:
singapore_grouped = singapore_onehot.groupby('Neighborhood').mean().reset_index()
singapore_grouped

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Arcade,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,Bakery,Bank,Basketball Court,Betting Shop,Bistro,Bookstore,Boutique,Bowling Alley,Breakfast Spot,Bubble Tea Shop,Buffet,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Cafeteria,Café,Cantonese Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,College Cafeteria,Comfort Food Restaurant,Concert Hall,Convenience Store,Cosmetics Shop,Cupcake Shop,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Donut Shop,Dumpling Restaurant,Electronics Store,English Restaurant,Fast Food Restaurant,Fish & Chips Shop,Food,Food & Drink Shop,Food Court,Food Service,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Garden,General Entertainment,German Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hainan Restaurant,Halal Restaurant,High School,Hobby Shop,Hong Kong Restaurant,Hostel,Hotel,Hotpot Restaurant,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Juice Bar,Karaoke Bar,Kids Store,Korean Restaurant,Malay Restaurant,Market,Medical Center,Mediterranean Restaurant,Metro Station,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Movie Theater,Multiplex,Noodle House,Optical Shop,Park,Pastry Shop,Pet Store,Pharmacy,Pizza Place,Playground,Pool,Portuguese Restaurant,Post Office,Ramen Restaurant,Recreation Center,Resort,Restaurant,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Smoke Shop,Snack Place,Soccer Field,Soup Place,Spa,Sports Bar,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Water Park,Wings Joint,Yoga Studio
0,Ang Mo Kio,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.020833,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.020833,0.0,0.0,0.0,0.020833,0.0,0.020833,0.0,0.020833,0.0,0.104167,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.083333,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.020833,0.0,0.0,0.020833,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.020833,0.020833,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0
1,Bedok,0.0,0.017544,0.0,0.0,0.0,0.035088,0.0,0.0,0.0,0.035088,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.017544,0.017544,0.0,0.017544,0.017544,0.0,0.0,0.0,0.0,0.035088,0.0,0.052632,0.017544,0.070175,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035088,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.035088,0.0,0.0,0.0,0.052632,0.0,0.0,0.017544,0.017544,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.017544,0.017544,0.017544,0.017544,0.017544,0.0,0.052632,0.0,0.017544,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.035088,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.017544,0.0,0.0,0.035088,0.052632,0.0,0.0,0.017544,0.0,0.0,0.017544,0.017544,0.0,0.0,0.0,0.017544,0.0
2,Bishan,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.075,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.05,0.0,0.05,0.0,0.1,0.0,0.0,0.0,0.0,0.025,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.025,0.025,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.025,0.0,0.025,0.0,0.05,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.025,0.0,0.025,0.0,0.025,0.0,0.025,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bukit Batok,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.16,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.04,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0
4,Bukit Merah,0.0,0.02439,0.0,0.0,0.0,0.121951,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.04878,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.097561,0.0,0.073171,0.0,0.073171,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.121951,0.0,0.04878,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.04878,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04878,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439
5,Bukit Panjang,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Choa Chu Kang,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Clementi,0.0,0.017544,0.0,0.017544,0.0,0.052632,0.0,0.017544,0.0,0.017544,0.0,0.0,0.017544,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.035088,0.0,0.035088,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.017544,0.035088,0.0,0.0,0.0,0.0,0.035088,0.0,0.035088,0.0,0.0,0.0,0.070175,0.0,0.0,0.017544,0.035088,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.035088,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035088,0.0,0.017544,0.0,0.035088,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.052632,0.0,0.0,0.0,0.0,0.017544,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.035088,0.0,0.0,0.0,0.0,0.035088,0.0,0.017544,0.0,0.0,0.017544,0.017544,0.017544,0.0,0.0,0.035088,0.0,0.0,0.0,0.017544,0.0,0.017544,0.0,0.0,0.0,0.0
8,Geylang,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.027778,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.027778,0.222222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.027778,0.083333,0.0,0.0,0.0,0.0,0.0
9,Hougang,0.0,0.0,0.0,0.0,0.0,0.038462,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.038462,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.038462,0.038462,0.038462,0.0,0.0,0.038462,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0


In [20]:
num_top_venues = 5

for hood in singapore_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = singapore_grouped[singapore_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Ang Mo Kio----
                  venue  freq
0           Coffee Shop  0.10
1            Food Court  0.08
2  Fast Food Restaurant  0.08
3       Bubble Tea Shop  0.06
4          Dessert Shop  0.06


----Bedok----
                 venue  freq
0          Coffee Shop  0.07
1   Chinese Restaurant  0.05
2     Sushi Restaurant  0.05
3           Food Court  0.05
4  Japanese Restaurant  0.05


----Bishan----
                 venue  freq
0          Coffee Shop  0.10
1      Bubble Tea Shop  0.08
2  Japanese Restaurant  0.05
3                 Café  0.05
4   Chinese Restaurant  0.05


----Bukit Batok----
                venue  freq
0         Coffee Shop  0.20
1  Chinese Restaurant  0.16
2          Food Court  0.08
3    Department Store  0.04
4       Bowling Alley  0.04


----Bukit Merah----
                venue  freq
0          Food Court  0.12
1    Asian Restaurant  0.12
2                Café  0.10
3         Coffee Shop  0.07
4  Chinese Restaurant  0.07


----Bukit Panjang----
           venue

In [21]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [22]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = singapore_grouped['Neighborhood']

for ind in np.arange(singapore_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(singapore_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Ang Mo Kio,Coffee Shop,Fast Food Restaurant,Food Court,Bubble Tea Shop,Dessert Shop,Japanese Restaurant,Supermarket,Sandwich Place,Bus Stop,Modern European Restaurant
1,Bedok,Coffee Shop,Chinese Restaurant,Food Court,Sushi Restaurant,Japanese Restaurant,Café,Dessert Shop,Sandwich Place,Supermarket,Fast Food Restaurant
2,Bishan,Coffee Shop,Bubble Tea Shop,Supermarket,Food Court,Ice Cream Shop,Chinese Restaurant,Japanese Restaurant,Café,Department Store,Shopping Mall
3,Bukit Batok,Coffee Shop,Chinese Restaurant,Food Court,Department Store,Frozen Yogurt Shop,Fast Food Restaurant,Skate Park,Malay Restaurant,Soccer Field,Bowling Alley
4,Bukit Merah,Food Court,Asian Restaurant,Café,Chinese Restaurant,Coffee Shop,Noodle House,Food Truck,Japanese Restaurant,Bookstore,Cafeteria
5,Bukit Panjang,Food Court,Park,Noodle House,Market,Miscellaneous Shop,Grocery Store,Department Store,Fish & Chips Shop,Fast Food Restaurant,English Restaurant
6,Choa Chu Kang,Fast Food Restaurant,Asian Restaurant,Park,Food Court,Thai Restaurant,Playground,Shopping Mall,Sandwich Place,Bakery,Coffee Shop
7,Clementi,Food Court,Noodle House,Asian Restaurant,Electronics Store,Chinese Restaurant,Shopping Mall,Snack Place,Indian Restaurant,Fast Food Restaurant,Japanese Restaurant
8,Geylang,Chinese Restaurant,Noodle House,Food Court,Vegetarian / Vegan Restaurant,Grocery Store,Asian Restaurant,Dim Sum Restaurant,Department Store,Steakhouse,Bus Station
9,Hougang,Coffee Shop,Café,Food Court,Pool,Indian Restaurant,Pharmacy,Noodle House,Gym,Miscellaneous Shop,Sandwich Place


In [26]:
# set number of clusters
kclusters = 3

singapore_grouped_clustering = singapore_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(singapore_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 2, 1, 0, 0, 2, 0], dtype=int32)

In [27]:
singapore_merged = singapore_data

# add clustering labels
singapore_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
singapore_merged = singapore_merged.merge(neighborhoods_venues_sorted,left_on='region', right_on='Neighborhood',how='left')
singapore_merged.head() # check the last columns!

Unnamed: 0,region,room_price,latitude,longitude,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Ang Mo Kio,"$440,000",1.369842,103.846609,0,Ang Mo Kio,Coffee Shop,Fast Food Restaurant,Food Court,Bubble Tea Shop,Dessert Shop,Japanese Restaurant,Supermarket,Sandwich Place,Bus Stop,Modern European Restaurant
1,Bedok,"$405,000",1.323976,103.930216,0,Bedok,Coffee Shop,Chinese Restaurant,Food Court,Sushi Restaurant,Japanese Restaurant,Café,Dessert Shop,Sandwich Place,Supermarket,Fast Food Restaurant
2,Bishan,"$558,000",1.351455,103.848263,0,Bishan,Coffee Shop,Bubble Tea Shop,Supermarket,Food Court,Ice Cream Shop,Chinese Restaurant,Japanese Restaurant,Café,Department Store,Shopping Mall
3,Bukit Batok,"$378,400",1.349057,103.749591,0,Bukit Batok,Coffee Shop,Chinese Restaurant,Food Court,Department Store,Frozen Yogurt Shop,Fast Food Restaurant,Skate Park,Malay Restaurant,Soccer Field,Bowling Alley
4,Bukit Merah,"$660,000",1.280628,103.830591,2,Bukit Merah,Food Court,Asian Restaurant,Café,Chinese Restaurant,Coffee Shop,Noodle House,Food Truck,Japanese Restaurant,Bookstore,Cafeteria


In [28]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(singapore_merged['latitude'], singapore_merged['longitude'], singapore_merged['Neighborhood'], singapore_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [63]:
tmp = []
for x in singapore_merged['room_price']:
    int_tmp = int(x[1:].replace(',',''))
    tmp.append(int_tmp)

In [64]:
singapore_merged['room_price1'] = tmp

In [68]:
singapore_merged

Unnamed: 0,region,room_price,latitude,longitude,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,room_price1
0,Ang Mo Kio,"$440,000",1.369842,103.846609,0,Ang Mo Kio,Coffee Shop,Fast Food Restaurant,Food Court,Bubble Tea Shop,Dessert Shop,Japanese Restaurant,Supermarket,Sandwich Place,Bus Stop,Modern European Restaurant,440000
1,Bedok,"$405,000",1.323976,103.930216,0,Bedok,Coffee Shop,Chinese Restaurant,Food Court,Sushi Restaurant,Japanese Restaurant,Café,Dessert Shop,Sandwich Place,Supermarket,Fast Food Restaurant,405000
2,Bishan,"$558,000",1.351455,103.848263,0,Bishan,Coffee Shop,Bubble Tea Shop,Supermarket,Food Court,Ice Cream Shop,Chinese Restaurant,Japanese Restaurant,Café,Department Store,Shopping Mall,558000
3,Bukit Batok,"$378,400",1.349057,103.749591,0,Bukit Batok,Coffee Shop,Chinese Restaurant,Food Court,Department Store,Frozen Yogurt Shop,Fast Food Restaurant,Skate Park,Malay Restaurant,Soccer Field,Bowling Alley,378400
4,Bukit Merah,"$660,000",1.280628,103.830591,2,Bukit Merah,Food Court,Asian Restaurant,Café,Chinese Restaurant,Coffee Shop,Noodle House,Food Truck,Japanese Restaurant,Bookstore,Cafeteria,660000
5,Bukit Panjang,"$368,000",1.377921,103.771866,1,Bukit Panjang,Food Court,Park,Noodle House,Market,Miscellaneous Shop,Grocery Store,Department Store,Fish & Chips Shop,Fast Food Restaurant,English Restaurant,368000
6,Choa Chu Kang,"$338,500",1.38926,103.743728,0,Choa Chu Kang,Fast Food Restaurant,Asian Restaurant,Park,Food Court,Thai Restaurant,Playground,Shopping Mall,Sandwich Place,Bakery,Coffee Shop,338500
7,Clementi,"$504,000",1.314026,103.76241,0,Clementi,Food Court,Noodle House,Asian Restaurant,Electronics Store,Chinese Restaurant,Shopping Mall,Snack Place,Indian Restaurant,Fast Food Restaurant,Japanese Restaurant,504000
8,Geylang,"$472,500",1.318186,103.887056,2,Geylang,Chinese Restaurant,Noodle House,Food Court,Vegetarian / Vegan Restaurant,Grocery Store,Asian Restaurant,Dim Sum Restaurant,Department Store,Steakhouse,Bus Station,472500
9,Hougang,"$380,000",1.37336,103.886091,0,Hougang,Coffee Shop,Café,Food Court,Pool,Indian Restaurant,Pharmacy,Noodle House,Gym,Miscellaneous Shop,Sandwich Place,380000


In [67]:
singapore_merged.groupby('Cluster Labels').mean()['room_price1']

Cluster Labels
0    409525
1    368000
2    561250
Name: room_price1, dtype: int64