# IBM Coursera - Applied Data Science Capstone

### Final Project - Exploring similarities and dissimilarities between two cities, Taipei and Tainan, in Taiwan

1. Import libraries

In [1]:
import numpy as np 
import pandas as pd 

# library to handle JSON
import json 

# convert an address into latitude and longitude values
from geopy.geocoders import Nominatim 

# library to handle requests
import requests 

# library to parse HTML/XML documents
from bs4 import BeautifulSoup 

# tranform JSON file into a pandas dataframe
from pandas.io.json import json_normalize 

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

# !conda install -c conda-forge folium=0.5.0 --yes 
# map rendering library
import folium

print("Libraries imported.")

Libraries imported.


2. Get data from Wiki and load data into data frame

In [2]:
# send the GET request
data = requests.get('https://en.wikipedia.org/wiki/List_of_townships/cities_and_districts_in_Taiwan').text

In [3]:
# parse data from html into a beautifulsoup object
soup = BeautifulSoup(data, 'html.parser')

In [4]:
# gets the table from the webpage
table = soup.find('table', class_ = 'wikitable sortable')

# gets the table rows
rows = table.find_all('tr')

In [5]:
# create three lists to store data
cityList = []
districtList = []
typeList = []
areaList = []
populationList = []

In [6]:
# append the data into the respective lists
for row in rows:    
    cells = row.find_all('td')
    if(len(cells) > 0):
        cityList.append(cells[0].text.rstrip('\n')) # avoid new lines in neighborhood cell
        districtList.append(cells[1].text.rstrip('\n'))
        typeList.append(cells[3].text.rstrip('\n'))
        areaList.append(cells[4].text.rstrip('\n'))
        populationList.append(cells[5].text.rstrip('\n'))

In [7]:
# create a new DataFrame with five lists
taiwan_df = pd.DataFrame({"City": cityList,
                          "District": districtList,
                          "Type": typeList,
                          "Area": areaList,
                          "Population": populationList})

taiwan_df.head(10)

Unnamed: 0,City,District,Type,Area,Population
0,New Taipei City,Banqiao District,District,556276,23.14
1,New Taipei City,Sanchong District,District,386206,16.32
2,New Taipei City,Zhonghe District,District,412856,20.14
3,New Taipei City,Yonghe District,District,220602,5.71
4,New Taipei City,Xinzhuang District,District,419962,19.74
5,New Taipei City,Xindian District,District,303288,120.23
6,New Taipei City,Shulin District,District,183982,33.13
7,New Taipei City,Yingge District,District,86780,21.12
8,New Taipei City,Sanxia District,District,116338,191.45
9,New Taipei City,Tamsui District,District,177988,70.66


In [8]:
taiwan_df.shape

(368, 5)

3. Drop cells that are not districts in Taiwan

In [9]:
taiwan_df = taiwan_df[taiwan_df.Type == "District"].reset_index(drop=True)
taiwan_df.head(10)

Unnamed: 0,City,District,Type,Area,Population
0,New Taipei City,Banqiao District,District,556276,23.14
1,New Taipei City,Sanchong District,District,386206,16.32
2,New Taipei City,Zhonghe District,District,412856,20.14
3,New Taipei City,Yonghe District,District,220602,5.71
4,New Taipei City,Xinzhuang District,District,419962,19.74
5,New Taipei City,Xindian District,District,303288,120.23
6,New Taipei City,Shulin District,District,183982,33.13
7,New Taipei City,Yingge District,District,86780,21.12
8,New Taipei City,Sanxia District,District,116338,191.45
9,New Taipei City,Tamsui District,District,177988,70.66


In [10]:
taiwan_df.shape

(164, 5)

4. Drop the column 'Type'

In [11]:
taiwan_df = taiwan_df.drop(["Type"], 1)
taiwan_df.head(10)

Unnamed: 0,City,District,Area,Population
0,New Taipei City,Banqiao District,556276,23.14
1,New Taipei City,Sanchong District,386206,16.32
2,New Taipei City,Zhonghe District,412856,20.14
3,New Taipei City,Yonghe District,220602,5.71
4,New Taipei City,Xinzhuang District,419962,19.74
5,New Taipei City,Xindian District,303288,120.23
6,New Taipei City,Shulin District,183982,33.13
7,New Taipei City,Yingge District,86780,21.12
8,New Taipei City,Sanxia District,116338,191.45
9,New Taipei City,Tamsui District,177988,70.66


In [12]:
taiwan_df.shape

(164, 4)

5. Reduce the number of districts to explore. Focus on districts in 1) Taipei City/New Taipei City and 2) Tainan City only

In [13]:
taipei_df = taiwan_df[taiwan_df['City'].isin (["Taipei City", "New Taipei City"])].reset_index(drop=True)
taipei_df.head(10)

Unnamed: 0,City,District,Area,Population
0,New Taipei City,Banqiao District,556276,23.14
1,New Taipei City,Sanchong District,386206,16.32
2,New Taipei City,Zhonghe District,412856,20.14
3,New Taipei City,Yonghe District,220602,5.71
4,New Taipei City,Xinzhuang District,419962,19.74
5,New Taipei City,Xindian District,303288,120.23
6,New Taipei City,Shulin District,183982,33.13
7,New Taipei City,Yingge District,86780,21.12
8,New Taipei City,Sanxia District,116338,191.45
9,New Taipei City,Tamsui District,177988,70.66


In [14]:
taipei_df.shape

(40, 4)

In [15]:
tainan_df = taiwan_df[taiwan_df.City == "Tainan City"].reset_index(drop=True)
tainan_df.head(10)

Unnamed: 0,City,District,Area,Population
0,Tainan City,Xinying District,77072,38.54
1,Tainan City,Yanshuei District,25266,52.25
2,Tainan City,Baihe District,27744,126.4
3,Tainan City,Liuying District,21103,61.29
4,Tainan City,Houbi District,23055,72.22
5,Tainan City,Dongshan District,20501,124.92
6,Tainan City,Madou District,43987,53.97
7,Tainan City,Xiaying District,23700,33.53
8,Tainan City,Lioujia District,21917,67.55
9,Tainan City,Guantian District,21343,70.8


In [16]:
tainan_df.shape

(37, 4)

6. Get coordinates for the targeted districts

In [17]:
def get_coordinates(address):
    print(address)
    geolocator = Nominatim(user_agent="cn_explorer")
    location = None
    while(location is None):
        location = geolocator.geocode(address)

#    latitude = location.latitude
#    longitude = location.longitude
    return location.latitude, location.longitude

In [18]:
dist = taipei_df['District'].tolist()
coords = [get_coordinates('Taiwan, Taipei, ' + d) for d in dist]

Taiwan, Taipei, Banqiao District
Taiwan, Taipei, Sanchong District
Taiwan, Taipei, Zhonghe District
Taiwan, Taipei, Yonghe District
Taiwan, Taipei, Xinzhuang District
Taiwan, Taipei, Xindian District
Taiwan, Taipei, Shulin District
Taiwan, Taipei, Yingge District
Taiwan, Taipei, Sanxia District
Taiwan, Taipei, Tamsui District
Taiwan, Taipei, Xizhi District
Taiwan, Taipei, Ruifang District
Taiwan, Taipei, Tucheng District
Taiwan, Taipei, Luzhou District
Taiwan, Taipei, Wugu District
Taiwan, Taipei, Taishan District
Taiwan, Taipei, Linkou District
Taiwan, Taipei, Shenkeng District
Taiwan, Taipei, Shiding District
Taiwan, Taipei, Pinglin District
Taiwan, Taipei, Sanzhi District
Taiwan, Taipei, Shimen District
Taiwan, Taipei, Bali District
Taiwan, Taipei, Pingxi District
Taiwan, Taipei, Shuangxi District
Taiwan, Taipei, Gongliao District
Taiwan, Taipei, Jinshan District
Taiwan, Taipei, Wanli District
Taiwan, Taipei, Songshan District
Taiwan, Taipei, Xinyi District
Taiwan, Taipei, Daan Dist

In [19]:
coords_df = pd.DataFrame(coords, columns=['Latitude','Longitude'])
taipei_df['Latitude'] = coords_df['Latitude']
taipei_df['Longitude'] = coords_df['Longitude']
taipei_df.head(10)

Unnamed: 0,City,District,Area,Population,Latitude,Longitude
0,New Taipei City,Banqiao District,556276,23.14,25.00967,121.459099
1,New Taipei City,Sanchong District,386206,16.32,25.061486,121.488102
2,New Taipei City,Zhonghe District,412856,20.14,24.999397,121.49898
3,New Taipei City,Yonghe District,220602,5.71,25.009235,121.52007
4,New Taipei City,Xinzhuang District,419962,19.74,25.035772,121.450248
5,New Taipei City,Xindian District,303288,120.23,24.928408,121.539007
6,New Taipei City,Shulin District,183982,33.13,24.990706,121.420533
7,New Taipei City,Yingge District,86780,21.12,24.959945,121.353771
8,New Taipei City,Sanxia District,116338,191.45,24.934339,121.368905
9,New Taipei City,Tamsui District,177988,70.66,25.183559,121.459954


In [20]:
taipei_df.shape

(40, 6)

In [21]:
dist = tainan_df['District'].tolist()
coords = [get_coordinates('Taiwan, Tainan, ' + d) for d in dist]

Taiwan, Tainan, Xinying District
Taiwan, Tainan, Yanshuei District
Taiwan, Tainan, Baihe District
Taiwan, Tainan, Liuying District
Taiwan, Tainan, Houbi District
Taiwan, Tainan, Dongshan District
Taiwan, Tainan, Madou District
Taiwan, Tainan, Xiaying District
Taiwan, Tainan, Lioujia District
Taiwan, Tainan, Guantian District
Taiwan, Tainan, Danei District
Taiwan, Tainan, Jiali District
Taiwan, Tainan, Syuejia District
Taiwan, Tainan, Sigang District
Taiwan, Tainan, Cigu District
Taiwan, Tainan, Jiangjun District
Taiwan, Tainan, Beimen District
Taiwan, Tainan, Xinhua District
Taiwan, Tainan, Shanhua District
Taiwan, Tainan, Sinshih District
Taiwan, Tainan, Anding District
Taiwan, Tainan, Shanshang District
Taiwan, Tainan, Yujing District
Taiwan, Tainan, Nansi District
Taiwan, Tainan, Nanhua District
Taiwan, Tainan, Zuojhen District
Taiwan, Tainan, Rende District
Taiwan, Tainan, Gueiren District
Taiwan, Tainan, Guanmiao District
Taiwan, Tainan, Longci District
Taiwan, Tainan, Yongkang Di

In [22]:
coords_df = pd.DataFrame(coords, columns=['Latitude','Longitude'])
tainan_df['Latitude'] = coords_df['Latitude']
tainan_df['Longitude'] = coords_df['Longitude']
tainan_df.head(10)

Unnamed: 0,City,District,Area,Population,Latitude,Longitude
0,Tainan City,Xinying District,77072,38.54,23.308698,120.316957
1,Tainan City,Yanshuei District,25266,52.25,23.319828,120.266398
2,Tainan City,Baihe District,27744,126.4,23.351473,120.413745
3,Tainan City,Liuying District,21103,61.29,23.279601,120.311157
4,Tainan City,Houbi District,23055,72.22,23.366721,120.362726
5,Tainan City,Dongshan District,20501,124.92,23.326668,120.404059
6,Tainan City,Madou District,43987,53.97,23.181724,120.248206
7,Tainan City,Xiaying District,23700,33.53,23.235413,120.264484
8,Tainan City,Lioujia District,21917,67.55,23.232901,120.352221
9,Tainan City,Guantian District,21343,70.8,23.191997,120.316493


In [23]:
tainan_df.shape

(37, 6)

7. Use geopy to get the latitude and longitude of Taipei

In [24]:
tp_address = 'Taipei'

geolocator = Nominatim(user_agent="cn_explorer")
location = geolocator.geocode(tp_address)
tp_latitude = location.latitude
tp_longitude = location.longitude
print('The geograpical coordinates of Taipei are {}, {}.'.format(tp_latitude, tp_longitude))

The geograpical coordinates of Taipei are 25.0375198, 121.5636796.


8. Create a map of Taipei with districts superimposed on top

In [25]:
# create map of Taipei using latitude and longitude values
map_taipei = folium.Map(location=[tp_latitude, tp_longitude], zoom_start=10)

# add markers to map
for lat, lng, city, district in zip(taipei_df['Latitude'], taipei_df['Longitude'], taipei_df['City'], taipei_df['District']):
    label = '{}, {}'.format(district, city)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_taipei)  
    
map_taipei

9. Use geopy to get the latitude and longitude of Tainan

In [26]:
tn_address = 'Tainan'

location = geolocator.geocode(tn_address)
tn_latitude = location.latitude
tn_longitude = location.longitude
print('The geograpical coordinates of Tainan are {}, {}.'.format(tn_latitude, tn_longitude))

The geograpical coordinates of Tainan are 22.9912348, 120.184982.


10. Create a map of Tainan with districts superimposed on top

In [27]:
# create map of Tainan using latitude and longitude values
map_tainan = folium.Map(location=[tn_latitude, tn_longitude], zoom_start=10)

# add markers to map
for lat, lng, city, district in zip(tainan_df['Latitude'], tainan_df['Longitude'], tainan_df['City'], tainan_df['District']):
    label = '{}, {}'.format(district, city)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_tainan)  
    
map_tainan

11. Use the Foursquare API to explore the districts in Taipei and Tainan

In [28]:
# define Foursquare Credentials and Version
CLIENT_ID = 'HEA5TNP2PLKJWEB5SNATL3FGVFVMIGUOGJLI3EB4TSGBL3A2' # your Foursquare ID
CLIENT_SECRET = 'IBESIA2OCCQ1TIYUBX1PVAELGHPYTNNM3YWA3JJ4JY3AXOPP' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: HEA5TNP2PLKJWEB5SNATL3FGVFVMIGUOGJLI3EB4TSGBL3A2
CLIENT_SECRET:IBESIA2OCCQ1TIYUBX1PVAELGHPYTNNM3YWA3JJ4JY3AXOPP


In [29]:
LIMIT = 100
radius = 800

# use getNearbyVenues function from course lab, but modify to use city and district
def getNearbyVenues(city, district, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for c, d, lat, lng in zip(city, district, latitudes, longitudes):
        print(c + ', ' + d)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            c,
            d, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['City',
                  'District',
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [30]:
taipei_venues = getNearbyVenues(city=taipei_df['City'],
                                 district=taipei_df['District'],
                                 latitudes=taipei_df['Latitude'],
                                 longitudes=taipei_df['Longitude']
                                )

New Taipei City, Banqiao District
New Taipei City, Sanchong District
New Taipei City, Zhonghe District
New Taipei City, Yonghe District
New Taipei City, Xinzhuang District
New Taipei City, Xindian District
New Taipei City, Shulin District
New Taipei City, Yingge District
New Taipei City, Sanxia District
New Taipei City, Tamsui District
New Taipei City, Xizhi District
New Taipei City, Ruifang District
New Taipei City, Tucheng District
New Taipei City, Luzhou District
New Taipei City, Wugu District
New Taipei City, Taishan District
New Taipei City, Linkou District
New Taipei City, Shenkeng District
New Taipei City, Shiding District
New Taipei City, Pinglin District
New Taipei City, Sanzhi District
New Taipei City, Shimen District
New Taipei City, Bali District
New Taipei City, Pingxi District
New Taipei City, Shuangxi District
New Taipei City, Gongliao District
New Taipei City, Jinshan District
New Taipei City, Wanli District
Taipei City, Songshan District
Taipei City, Xinyi District
Tai

In [31]:
taipei_venues.head()

Unnamed: 0,City,District,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,New Taipei City,Banqiao District,25.00967,121.459099,府中15,25.009315,121.45958,Indie Movie Theater
1,New Taipei City,Banqiao District,25.00967,121.459099,馥都飯店 Grand Forward Hotel,25.007536,121.45858,Hotel
2,New Taipei City,Banqiao District,25.00967,121.459099,生炒魷魚,25.010272,121.457347,Seafood Restaurant
3,New Taipei City,Banqiao District,25.00967,121.459099,麥當勞 McDonald's,25.007962,121.46094,Fast Food Restaurant
4,New Taipei City,Banqiao District,25.00967,121.459099,六必居潮州沙鍋粥,25.00999,121.463072,Chinese Restaurant


In [32]:
# check the size of the resulting venue dataframe
taipei_venues.shape

(506, 8)

In [33]:
tainan_venues = getNearbyVenues(city=tainan_df['City'],
                                 district=tainan_df['District'],
                                 latitudes=tainan_df['Latitude'],
                                 longitudes=tainan_df['Longitude']
                                )

Tainan City, Xinying District
Tainan City, Yanshuei District
Tainan City, Baihe District
Tainan City, Liuying District
Tainan City, Houbi District
Tainan City, Dongshan District
Tainan City, Madou District
Tainan City, Xiaying District
Tainan City, Lioujia District
Tainan City, Guantian District
Tainan City, Danei District
Tainan City, Jiali District
Tainan City, Syuejia District
Tainan City, Sigang District
Tainan City, Cigu District
Tainan City, Jiangjun District
Tainan City, Beimen District
Tainan City, Xinhua District
Tainan City, Shanhua District
Tainan City, Sinshih District
Tainan City, Anding District
Tainan City, Shanshang District
Tainan City, Yujing District
Tainan City, Nansi District
Tainan City, Nanhua District
Tainan City, Zuojhen District
Tainan City, Rende District
Tainan City, Gueiren District
Tainan City, Guanmiao District
Tainan City, Longci District
Tainan City, Yongkang District
Tainan City, East District
Tainan City, South District
Tainan City, North District
Tai

In [34]:
tainan_venues.head()

Unnamed: 0,City,District,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Tainan City,Xinying District,23.308698,120.316957,美美冰果部,23.310415,120.316402,Dessert Shop
1,Tainan City,Xinying District,23.308698,120.316957,南瀛綠都心,23.309457,120.315478,Park
2,Tainan City,Xinying District,23.308698,120.316957,華味香鴨肉羹,23.310876,120.317258,Food
3,Tainan City,Xinying District,23.308698,120.316957,阿忠豆菜麵,23.31121,120.316548,Taiwanese Restaurant
4,Tainan City,Xinying District,23.308698,120.316957,阿松臭豆腐,23.313172,120.316683,Food Truck


In [35]:
# check the size of the resulting venue dataframe
tainan_venues.shape

(228, 8)

In [36]:
# show # of venues for each city/district in Taipei
taipei_venues.groupby(["City", "District"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
City,District,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
New Taipei City,Bali District,2,2,2,2,2,2
New Taipei City,Banqiao District,34,34,34,34,34,34
New Taipei City,Jinshan District,3,3,3,3,3,3
New Taipei City,Linkou District,10,10,10,10,10,10
New Taipei City,Luzhou District,4,4,4,4,4,4
New Taipei City,Pinglin District,9,9,9,9,9,9
New Taipei City,Pingxi District,5,5,5,5,5,5
New Taipei City,Ruifang District,1,1,1,1,1,1
New Taipei City,Sanchong District,4,4,4,4,4,4
New Taipei City,Sanxia District,9,9,9,9,9,9


In [37]:
print('There are {} uniques categories.'.format(len(taipei_venues['Venue Category'].unique())))

There are 122 uniques categories.


In [38]:
taipei_venues['Venue Category'].unique()

array(['Indie Movie Theater', 'Hotel', 'Seafood Restaurant',
       'Fast Food Restaurant', 'Chinese Restaurant', 'Garden',
       'Karaoke Bar', 'Café', 'Ice Cream Shop', 'Gym', 'Temple',
       'Movie Theater', 'Ramen Restaurant', 'Restaurant',
       'Hotpot Restaurant', 'Coffee Shop', 'Bookstore',
       'Vegetarian / Vegan Restaurant', 'Convenience Store',
       'Noodle House', 'Stadium', 'Dessert Shop', 'Japanese Restaurant',
       'Asian Restaurant', 'Steakhouse', 'Taiwanese Restaurant',
       'Farmers Market', 'Shopping Mall', 'Park', 'Sporting Goods Shop',
       'Supermarket', 'Bus Station', 'Electronics Store', 'BBQ Joint',
       'Arcade', 'Pharmacy', 'Furniture / Home Store', 'Breakfast Spot',
       'Museum', 'Gym / Fitness Center', 'Night Market',
       'Food & Drink Shop', 'Train Station', 'Food Truck',
       'Italian Restaurant', 'Bus Stop', 'Tea Room', 'Art Gallery',
       'Food', 'Bakery', 'Snack Place', 'Market', 'Golf Course',
       'Indie Theater', 'Korean 

In [39]:
# show # of venues for each city/district in Tainan
tainan_venues.groupby(["City", "District"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
City,District,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Tainan City,Anding District,1,1,1,1,1,1
Tainan City,Annan District,4,4,4,4,4,4
Tainan City,Anping District,9,9,9,9,9,9
Tainan City,Baihe District,4,4,4,4,4,4
Tainan City,Beimen District,4,4,4,4,4,4
Tainan City,Cigu District,2,2,2,2,2,2
Tainan City,Danei District,2,2,2,2,2,2
Tainan City,Dongshan District,5,5,5,5,5,5
Tainan City,East District,19,19,19,19,19,19
Tainan City,Guanmiao District,4,4,4,4,4,4


In [40]:
print('There are {} uniques categories.'.format(len(tainan_venues['Venue Category'].unique())))

There are 77 uniques categories.


In [41]:
tainan_venues['Venue Category'].unique()

array(['Dessert Shop', 'Park', 'Food', 'Taiwanese Restaurant',
       'Food Truck', 'Historic Site', 'Noodle House', 'Lake',
       'Indie Movie Theater', 'Night Market', 'Restaurant', 'Bus Station',
       'Flower Shop', 'Convenience Store', 'Gourmet Shop',
       'History Museum', 'Bakery', 'Train Station', 'Deli / Bodega',
       'Chinese Restaurant', 'Asian Restaurant', 'Tea Room',
       'Ice Cream Shop', 'Multiplex', 'Grocery Store', 'Movie Theater',
       'Pharmacy', 'Food Stand', 'Fruit & Vegetable Store', 'Flea Market',
       'Snack Place', 'Optical Shop', 'Public Art', 'Electronics Store',
       'Steakhouse', 'Water Park', 'Seafood Restaurant', 'Art Gallery',
       'Tourist Information Center', 'Outdoor Sculpture', 'Road',
       'Thai Restaurant', 'Dim Sum Restaurant', 'Indian Restaurant',
       'Fast Food Restaurant', 'Farmers Market', 'Campground', 'Trail',
       'Breakfast Spot', 'American Restaurant', 'Pet Store',
       'Bubble Tea Shop', 'Dog Run', 'Intersection'

13. Analyze each district

Create one hot encoding for Taipei:

In [42]:
# one hot encoding
taipei_onehot = pd.get_dummies(taipei_venues[['Venue Category']], prefix="", prefix_sep="")

# add city, district column back to dataframe
taipei_onehot['City'] = taipei_venues['City'] 
taipei_onehot['District'] = taipei_venues['District'] 

# move city, district column to the first column
fixed_columns = list(taipei_onehot.columns[-2:]) + list(taipei_onehot.columns[:-2])
taipei_onehot = taipei_onehot[fixed_columns]

taipei_onehot.head()

Unnamed: 0,City,District,American Restaurant,Arcade,Art Gallery,Asian Restaurant,BBQ Joint,Bakery,Bar,Beer Bar,...,Temple,Thai Restaurant,Theater,Tourist Information Center,Toy / Game Store,Train Station,Tunnel,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Women's Store
0,New Taipei City,Banqiao District,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,New Taipei City,Banqiao District,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,New Taipei City,Banqiao District,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,New Taipei City,Banqiao District,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,New Taipei City,Banqiao District,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [43]:
taipei_onehot.shape

(506, 124)

In [44]:
taipei_grouped = taipei_onehot.groupby(["City", "District"]).mean().reset_index()
taipei_grouped

Unnamed: 0,City,District,American Restaurant,Arcade,Art Gallery,Asian Restaurant,BBQ Joint,Bakery,Bar,Beer Bar,...,Temple,Thai Restaurant,Theater,Tourist Information Center,Toy / Game Store,Train Station,Tunnel,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Women's Store
0,New Taipei City,Bali District,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,New Taipei City,Banqiao District,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,...,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0
2,New Taipei City,Jinshan District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,New Taipei City,Linkou District,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,New Taipei City,Luzhou District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,New Taipei City,Pinglin District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0
6,New Taipei City,Pingxi District,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0
7,New Taipei City,Ruifang District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,New Taipei City,Sanchong District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,New Taipei City,Sanxia District,0.0,0.0,0.0,0.0,0.0,0.222222,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [45]:
taipei_grouped.shape

(37, 124)

Create one hot encoding for Tainan:

In [46]:
# one hot encoding
tainan_onehot = pd.get_dummies(tainan_venues[['Venue Category']], prefix="", prefix_sep="")

# add city,district column back to dataframe
tainan_onehot['City'] = tainan_venues['City'] 
tainan_onehot['District'] = tainan_venues['District'] 

# move city,district column to the first column
fixed_columns = list(tainan_onehot.columns[-2:]) + list(tainan_onehot.columns[:-2])
tainan_onehot = tainan_onehot[fixed_columns]

tainan_onehot.head()

Unnamed: 0,City,District,Accessories Store,American Restaurant,Art Gallery,Art Museum,Asian Restaurant,BBQ Joint,Bakery,Bar,...,Soba Restaurant,Soup Place,Steakhouse,Taiwanese Restaurant,Tea Room,Thai Restaurant,Tourist Information Center,Trail,Train Station,Water Park
0,Tainan City,Xinying District,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Tainan City,Xinying District,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Tainan City,Xinying District,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Tainan City,Xinying District,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,Tainan City,Xinying District,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [47]:
tainan_onehot.shape

(228, 79)

In [48]:
tainan_grouped = tainan_onehot.groupby(["City", "District"]).mean().reset_index()
tainan_grouped

Unnamed: 0,City,District,Accessories Store,American Restaurant,Art Gallery,Art Museum,Asian Restaurant,BBQ Joint,Bakery,Bar,...,Soba Restaurant,Soup Place,Steakhouse,Taiwanese Restaurant,Tea Room,Thai Restaurant,Tourist Information Center,Trail,Train Station,Water Park
0,Tainan City,Anding District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Tainan City,Annan District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.25,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0
2,Tainan City,Anping District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.222222,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0
3,Tainan City,Baihe District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
4,Tainan City,Beimen District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0
5,Tainan City,Cigu District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5
6,Tainan City,Danei District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Tainan City,Dongshan District,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0,...,0.0,0.0,0.0,0.2,0.2,0.0,0.0,0.0,0.0,0.0
8,Tainan City,East District,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.052632,...,0.0,0.0,0.0,0.105263,0.105263,0.0,0.0,0.0,0.0,0.0
9,Tainan City,Guanmiao District,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,...,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0


In [49]:
tainan_grouped.shape

(37, 79)

Print each district along with top 5 most common venues 

For Taipei:

In [50]:
num_top_venues = 5

for dist in taipei_grouped['District']:
    print("----"+dist+"----")
    temp = taipei_grouped[taipei_grouped['District'] == dist].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[3:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bali District----
               venue  freq
0   Asian Restaurant   0.5
1  Convenience Store   0.5
2             Arcade   0.0
3      Movie Theater   0.0
4              Plaza   0.0


----Banqiao District----
                  venue  freq
0                  Café  0.09
1     Hotpot Restaurant  0.06
2  Fast Food Restaurant  0.06
3                 Hotel  0.06
4    Chinese Restaurant  0.03


----Jinshan District----
           venue  freq
0          Hotel  0.33
1           Farm  0.33
2    Snack Place  0.33
3         Arcade  0.00
4  Movie Theater  0.00


----Linkou District----
                 venue  freq
0       Breakfast Spot   0.1
1             Pharmacy   0.1
2     Asian Restaurant   0.1
3                 Café   0.1
4  Dumpling Restaurant   0.1


----Luzhou District----
          venue  freq
0   Pizza Place  0.25
1  Concert Hall  0.25
2  Noodle House  0.25
3  Night Market  0.25
4        Arcade  0.00


----Pinglin District----
                venue  freq
0            Tea Room  0.33
1  

For Tainan:

In [51]:
for dist in tainan_grouped['District']:
    print("----"+dist+"----")
    temp = tainan_grouped[tainan_grouped['District'] == dist].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[3:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Anding District----
                 venue  freq
0    Convenience Store   1.0
1  American Restaurant   0.0
2  Monument / Landmark   0.0
3         Optical Shop   0.0
4         Noodle House   0.0


----Annan District----
                 venue  freq
0         Concert Hall  0.25
1             Tea Room  0.25
2           Soup Place  0.25
3   Seafood Restaurant  0.25
4  American Restaurant  0.00


----Anping District----
                venue  freq
0          Soup Place  0.22
1         Snack Place  0.11
2  Seafood Restaurant  0.11
3        Dessert Shop  0.11
4   Convenience Store  0.11


----Baihe District----
                  venue  freq
0           Bus Station  0.25
1            Restaurant  0.25
2  Taiwanese Restaurant  0.25
3           Flower Shop  0.25
4    Miscellaneous Shop  0.00


----Beimen District----
                        venue  freq
0              History Museum  0.25
1               Grocery Store  0.25
2  Tourist Information Center  0.25
3           Outdoor Sculpture  0.2

In [52]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Put top 5 most common venues into a new dataframe

For Taipei:

In [53]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['City','District']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
district_venues_sorted = pd.DataFrame(columns=columns)
district_venues_sorted['City'] = taipei_grouped['City']
district_venues_sorted['District'] = taipei_grouped['District']


for ind in np.arange(taipei_grouped.shape[0]):
    district_venues_sorted.iloc[ind, 2:] = return_most_common_venues(taipei_grouped.iloc[ind, 1:], num_top_venues)

district_venues_sorted

Unnamed: 0,City,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,New Taipei City,Bali District,Convenience Store,Asian Restaurant,Women's Store,Food Truck,Discount Store
1,New Taipei City,Banqiao District,Café,Hotel,Hotpot Restaurant,Fast Food Restaurant,Ice Cream Shop
2,New Taipei City,Jinshan District,Hotel,Farm,Snack Place,Food Court,Dim Sum Restaurant
3,New Taipei City,Linkou District,Pharmacy,Breakfast Spot,Café,Asian Restaurant,Noodle House
4,New Taipei City,Luzhou District,Pizza Place,Concert Hall,Noodle House,Night Market,Women's Store
5,New Taipei City,Pinglin District,Tea Room,Convenience Store,Chinese Restaurant,Taiwanese Restaurant,Museum
6,New Taipei City,Pingxi District,Bus Station,Platform,Train Station,Café,BBQ Joint
7,New Taipei City,Ruifang District,Taiwanese Restaurant,Women's Store,Food Stand,Dim Sum Restaurant,Diner
8,New Taipei City,Sanchong District,Convenience Store,Restaurant,Café,Taiwanese Restaurant,Women's Store
9,New Taipei City,Sanxia District,Dessert Shop,Bakery,Snack Place,Market,Café


In [54]:
district_venues_sorted.shape

(37, 7)

For Tainan:

In [55]:
# create a new dataframe
tn_district_venues_sorted = pd.DataFrame(columns=columns)
tn_district_venues_sorted['City'] = tainan_grouped['City']
tn_district_venues_sorted['District'] = tainan_grouped['District']


for ind in np.arange(tainan_grouped.shape[0]):
    tn_district_venues_sorted.iloc[ind, 2:] = return_most_common_venues(tainan_grouped.iloc[ind, 1:], num_top_venues)

tn_district_venues_sorted

Unnamed: 0,City,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Tainan City,Anding District,Convenience Store,Fast Food Restaurant,Department Store,Dessert Shop,Dim Sum Restaurant
1,Tainan City,Annan District,Concert Hall,Seafood Restaurant,Tea Room,Soup Place,Deli / Bodega
2,Tainan City,Anping District,Soup Place,Convenience Store,Food Truck,Dessert Shop,Taiwanese Restaurant
3,Tainan City,Baihe District,Taiwanese Restaurant,Flower Shop,Restaurant,Bus Station,Water Park
4,Tainan City,Beimen District,Grocery Store,Outdoor Sculpture,Tourist Information Center,History Museum,Farmers Market
5,Tainan City,Cigu District,Water Park,Seafood Restaurant,Convenience Store,Department Store,Dessert Shop
6,Tainan City,Danei District,Public Art,Electronics Store,Water Park,Convenience Store,Department Store
7,Tainan City,Dongshan District,Asian Restaurant,Food Truck,Tea Room,Taiwanese Restaurant,Water Park
8,Tainan City,East District,Noodle House,Tea Room,Taiwanese Restaurant,Chinese Restaurant,Café
9,Tainan City,Guanmiao District,Asian Restaurant,Taiwanese Restaurant,Chinese Restaurant,Bus Station,Water Park


In [56]:
tn_district_venues_sorted.shape

(37, 7)

14. Cluster areas using k-means into 5 clusters

Run k-means clustering for Taipei:

In [57]:
# set number of clusters
kclusters = 5

taipei_grouped_clustering = taipei_grouped.drop(["City", "District"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(taipei_grouped_clustering)

# check cluster labels generated for each row in the dataframe
# kmeans.labels_[0:10]
kmeans.labels_

array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 3, 1, 0, 4, 0, 0, 0, 0, 2, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [58]:
# add clustering labels
district_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

taipei_merged = taipei_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
taipei_merged = taipei_merged.join(district_venues_sorted.drop(["City"], 1).set_index("District"), on="District")

taipei_merged # check the last columns!

Unnamed: 0,City,District,Area,Population,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,New Taipei City,Banqiao District,556276,23.14,25.00967,121.459099,0.0,Café,Hotel,Hotpot Restaurant,Fast Food Restaurant,Ice Cream Shop
1,New Taipei City,Sanchong District,386206,16.32,25.061486,121.488102,0.0,Convenience Store,Restaurant,Café,Taiwanese Restaurant,Women's Store
2,New Taipei City,Zhonghe District,412856,20.14,24.999397,121.49898,0.0,Hotel,Coffee Shop,Café,Sporting Goods Shop,Furniture / Home Store
3,New Taipei City,Yonghe District,220602,5.71,25.009235,121.52007,0.0,Convenience Store,Museum,Farmers Market,Steakhouse,Coffee Shop
4,New Taipei City,Xinzhuang District,419962,19.74,25.035772,121.450248,0.0,Hotpot Restaurant,Coffee Shop,Gym / Fitness Center,Night Market,Food & Drink Shop
5,New Taipei City,Xindian District,303288,120.23,24.928408,121.539007,,,,,,
6,New Taipei City,Shulin District,183982,33.13,24.990706,121.420533,0.0,Food Truck,Italian Restaurant,Coffee Shop,Bus Stop,Fast Food Restaurant
7,New Taipei City,Yingge District,86780,21.12,24.959945,121.353771,0.0,Art Gallery,Convenience Store,Food,Women's Store,Food Truck
8,New Taipei City,Sanxia District,116338,191.45,24.934339,121.368905,0.0,Dessert Shop,Bakery,Snack Place,Market,Café
9,New Taipei City,Tamsui District,177988,70.66,25.183559,121.459954,0.0,Indie Theater,Asian Restaurant,Golf Course,Women's Store,Farmers Market


In [59]:
#taipei_merged = taipei_merged[taipei_merged['Cluster Labels'] != "NaN"].reset_index(drop=True)
#taipei_merged[taipei_merged['Cluster Labels'] != np.nan]

There are 3 districts that have NO venue data returned from Foursquare, casuing 'cluster labels' = NaN after a DB join. Need to determine how to handle these districts. I decide to drop these districts to simplify the issue here. We can source additional venue data in further studies to resolve this issue. 

In [60]:
# 3 districts have NO venue info, replace NaN to 5 (thinking if we should consider these districts forming the 6th cluster)
taipei_merged['Cluster Labels'].replace(np.nan, 5, inplace=True)

In [61]:
# decide to drop these districts to simplify the issue 
taipei_merged = taipei_merged[taipei_merged['Cluster Labels'] != 5].reset_index(drop=True)

In [62]:
# convert type from float to int, in order to run the clustering
taipei_merged[['Cluster Labels']] = taipei_merged[['Cluster Labels']].astype("int")

In [63]:
taipei_merged

Unnamed: 0,City,District,Area,Population,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,New Taipei City,Banqiao District,556276,23.14,25.00967,121.459099,0,Café,Hotel,Hotpot Restaurant,Fast Food Restaurant,Ice Cream Shop
1,New Taipei City,Sanchong District,386206,16.32,25.061486,121.488102,0,Convenience Store,Restaurant,Café,Taiwanese Restaurant,Women's Store
2,New Taipei City,Zhonghe District,412856,20.14,24.999397,121.49898,0,Hotel,Coffee Shop,Café,Sporting Goods Shop,Furniture / Home Store
3,New Taipei City,Yonghe District,220602,5.71,25.009235,121.52007,0,Convenience Store,Museum,Farmers Market,Steakhouse,Coffee Shop
4,New Taipei City,Xinzhuang District,419962,19.74,25.035772,121.450248,0,Hotpot Restaurant,Coffee Shop,Gym / Fitness Center,Night Market,Food & Drink Shop
5,New Taipei City,Shulin District,183982,33.13,24.990706,121.420533,0,Food Truck,Italian Restaurant,Coffee Shop,Bus Stop,Fast Food Restaurant
6,New Taipei City,Yingge District,86780,21.12,24.959945,121.353771,0,Art Gallery,Convenience Store,Food,Women's Store,Food Truck
7,New Taipei City,Sanxia District,116338,191.45,24.934339,121.368905,0,Dessert Shop,Bakery,Snack Place,Market,Café
8,New Taipei City,Tamsui District,177988,70.66,25.183559,121.459954,0,Indie Theater,Asian Restaurant,Golf Course,Women's Store,Farmers Market
9,New Taipei City,Xizhi District,203069,71.24,25.064159,121.658748,0,Korean Restaurant,Convenience Store,Fast Food Restaurant,Japanese Restaurant,Chinese Restaurant


In [64]:
taipei_merged.shape

(37, 12)

Run k-means clustering for Tainan:

In [65]:
tainan_grouped_clustering = tainan_grouped.drop(["City", "District"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(tainan_grouped_clustering)

# check cluster labels generated for each row in the dataframe
# kmeans.labels_[0:10]
kmeans.labels_

array([2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 2, 3, 1, 1, 0, 1,
       1, 0, 2, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 4], dtype=int32)

In [66]:
# add clustering labels
tn_district_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

tainan_merged = tainan_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
tainan_merged = tainan_merged.join(tn_district_venues_sorted.drop(["City"], 1).set_index("District"), on="District")

tainan_merged # check the last columns!

Unnamed: 0,City,District,Area,Population,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Tainan City,Xinying District,77072,38.54,23.308698,120.316957,1,Park,Food Truck,Dessert Shop,Taiwanese Restaurant,Food
1,Tainan City,Yanshuei District,25266,52.25,23.319828,120.266398,1,Noodle House,Indie Movie Theater,Night Market,Dessert Shop,Lake
2,Tainan City,Baihe District,27744,126.4,23.351473,120.413745,1,Taiwanese Restaurant,Flower Shop,Restaurant,Bus Station,Water Park
3,Tainan City,Liuying District,21103,61.29,23.279601,120.311157,2,Convenience Store,History Museum,Gourmet Shop,Art Gallery,Art Museum
4,Tainan City,Houbi District,23055,72.22,23.366721,120.362726,0,Train Station,Bakery,Chinese Restaurant,Deli / Bodega,Fruit & Vegetable Store
5,Tainan City,Dongshan District,20501,124.92,23.326668,120.404059,1,Asian Restaurant,Food Truck,Tea Room,Taiwanese Restaurant,Water Park
6,Tainan City,Madou District,43987,53.97,23.181724,120.248206,1,Grocery Store,Multiplex,Ice Cream Shop,Movie Theater,Farmers Market
7,Tainan City,Xiaying District,23700,33.53,23.235413,120.264484,1,Fruit & Vegetable Store,Food Stand,Pharmacy,Restaurant,Convenience Store
8,Tainan City,Lioujia District,21917,67.55,23.232901,120.352221,0,Snack Place,Flea Market,Chinese Restaurant,Optical Shop,Food Truck
9,Tainan City,Guantian District,21343,70.8,23.191997,120.316493,1,Park,Night Market,Train Station,Dessert Shop,Farmers Market


In [67]:
tainan_merged.shape

(37, 12)

15. Visualize clusters

Create a map of Taipei and display the resulting clustering

In [68]:
# create map
map_clusters_taipei = folium.Map(location=[tp_latitude, tp_longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
#for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
for lat, lon, c, poi, cluster in zip(taipei_merged['Latitude'], taipei_merged['Longitude'], taipei_merged['City'], taipei_merged['District'], taipei_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters_taipei)
       
map_clusters_taipei

Create a map of Tainan and display the resulting clustering

In [69]:
# create map
map_clusters_tainan = folium.Map(location=[tn_latitude, tn_longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
#for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
for lat, lon, c, poi, cluster in zip(tainan_merged['Latitude'], tainan_merged['Longitude'], tainan_merged['City'], tainan_merged['District'], tainan_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters_tainan)
       
map_clusters_tainan

16. Examine clusters

### For Taipei:

Cluster #1, the major cluster in Taipei: 

In [70]:
taipei_merged.loc[taipei_merged['Cluster Labels'] == 0, taipei_merged.columns[[0,1] + list(range(7, taipei_merged.shape[1]))]]

Unnamed: 0,City,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,New Taipei City,Banqiao District,Café,Hotel,Hotpot Restaurant,Fast Food Restaurant,Ice Cream Shop
1,New Taipei City,Sanchong District,Convenience Store,Restaurant,Café,Taiwanese Restaurant,Women's Store
2,New Taipei City,Zhonghe District,Hotel,Coffee Shop,Café,Sporting Goods Shop,Furniture / Home Store
3,New Taipei City,Yonghe District,Convenience Store,Museum,Farmers Market,Steakhouse,Coffee Shop
4,New Taipei City,Xinzhuang District,Hotpot Restaurant,Coffee Shop,Gym / Fitness Center,Night Market,Food & Drink Shop
5,New Taipei City,Shulin District,Food Truck,Italian Restaurant,Coffee Shop,Bus Stop,Fast Food Restaurant
6,New Taipei City,Yingge District,Art Gallery,Convenience Store,Food,Women's Store,Food Truck
7,New Taipei City,Sanxia District,Dessert Shop,Bakery,Snack Place,Market,Café
8,New Taipei City,Tamsui District,Indie Theater,Asian Restaurant,Golf Course,Women's Store,Farmers Market
9,New Taipei City,Xizhi District,Korean Restaurant,Convenience Store,Fast Food Restaurant,Japanese Restaurant,Chinese Restaurant


Cluster #2:

In [71]:
taipei_merged.loc[taipei_merged['Cluster Labels'] == 1, taipei_merged.columns[[0,1] + list(range(7, taipei_merged.shape[1]))]]

Unnamed: 0,City,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
10,New Taipei City,Ruifang District,Taiwanese Restaurant,Women's Store,Food Stand,Dim Sum Restaurant,Diner
16,New Taipei City,Shenkeng District,Taiwanese Restaurant,Women's Store,Food Stand,Dim Sum Restaurant,Diner


Cluster #3:

In [72]:
taipei_merged.loc[taipei_merged['Cluster Labels'] == 2, taipei_merged.columns[[0,1] + list(range(7, taipei_merged.shape[1]))]]

Unnamed: 0,City,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
24,New Taipei City,Wanli District,Electronics Store,Women's Store,Dessert Shop,Diner,Discount Store


Cluster #4:

In [73]:
taipei_merged.loc[taipei_merged['Cluster Labels'] == 3, taipei_merged.columns[[0,1] + list(range(7, taipei_merged.shape[1]))]]

Unnamed: 0,City,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
19,New Taipei City,Sanzhi District,Chinese Restaurant,Women's Store,Food Stand,Diner,Discount Store


Cluster #5:

In [74]:
taipei_merged.loc[taipei_merged['Cluster Labels'] == 4, taipei_merged.columns[[0,1] + list(range(7, taipei_merged.shape[1]))]]

Unnamed: 0,City,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
22,New Taipei City,Shuangxi District,Women's Store,Dessert Shop,Diner,Discount Store,Donut Shop


### For Tainan:

Cluster #1:

In [75]:
tainan_merged.loc[tainan_merged['Cluster Labels'] == 0, tainan_merged.columns[[0,1] + list(range(7, tainan_merged.shape[1]))]]

Unnamed: 0,City,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
4,Tainan City,Houbi District,Train Station,Bakery,Chinese Restaurant,Deli / Bodega,Fruit & Vegetable Store
8,Tainan City,Lioujia District,Snack Place,Flea Market,Chinese Restaurant,Optical Shop,Food Truck
13,Tainan City,Sigang District,Historic Site,Chinese Restaurant,Night Market,Convenience Store,Art Museum
18,Tainan City,Shanhua District,Chinese Restaurant,Night Market,Thai Restaurant,Farmers Market,Department Store
23,Tainan City,Nansi District,Asian Restaurant,Chinese Restaurant,Campground,Water Park,Fast Food Restaurant
28,Tainan City,Guanmiao District,Asian Restaurant,Taiwanese Restaurant,Chinese Restaurant,Bus Station,Water Park


Cluster #2, the major cluster in Tainan:

In [76]:
tainan_merged.loc[tainan_merged['Cluster Labels'] == 1, tainan_merged.columns[[0,1] + list(range(7, tainan_merged.shape[1]))]]

Unnamed: 0,City,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Tainan City,Xinying District,Park,Food Truck,Dessert Shop,Taiwanese Restaurant,Food
1,Tainan City,Yanshuei District,Noodle House,Indie Movie Theater,Night Market,Dessert Shop,Lake
2,Tainan City,Baihe District,Taiwanese Restaurant,Flower Shop,Restaurant,Bus Station,Water Park
5,Tainan City,Dongshan District,Asian Restaurant,Food Truck,Tea Room,Taiwanese Restaurant,Water Park
6,Tainan City,Madou District,Grocery Store,Multiplex,Ice Cream Shop,Movie Theater,Farmers Market
7,Tainan City,Xiaying District,Fruit & Vegetable Store,Food Stand,Pharmacy,Restaurant,Convenience Store
9,Tainan City,Guantian District,Park,Night Market,Train Station,Dessert Shop,Farmers Market
10,Tainan City,Danei District,Public Art,Electronics Store,Water Park,Convenience Store,Department Store
11,Tainan City,Jiali District,Park,Asian Restaurant,Steakhouse,Bus Station,Water Park
12,Tainan City,Syuejia District,Taiwanese Restaurant,Dessert Shop,Ice Cream Shop,Asian Restaurant,Bakery


Cluster #3:

In [77]:
tainan_merged.loc[tainan_merged['Cluster Labels'] == 2, tainan_merged.columns[[0,1] + list(range(7, tainan_merged.shape[1]))]]

Unnamed: 0,City,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
3,Tainan City,Liuying District,Convenience Store,History Museum,Gourmet Shop,Art Gallery,Art Museum
20,Tainan City,Anding District,Convenience Store,Fast Food Restaurant,Department Store,Dessert Shop,Dim Sum Restaurant
21,Tainan City,Shanshang District,Convenience Store,Fast Food Restaurant,Department Store,Dessert Shop,Dim Sum Restaurant
32,Tainan City,South District,Convenience Store,Noodle House,Garden,Farmers Market,Department Store


Cluster #4:

In [78]:
tainan_merged.loc[tainan_merged['Cluster Labels'] == 3, tainan_merged.columns[[0,1] + list(range(7, tainan_merged.shape[1]))]]

Unnamed: 0,City,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
29,Tainan City,Longci District,Dog Run,Water Park,Fast Food Restaurant,Department Store,Dessert Shop


Cluster #5:

In [79]:
tainan_merged.loc[tainan_merged['Cluster Labels'] == 4, tainan_merged.columns[[0,1] + list(range(7, tainan_merged.shape[1]))]]

Unnamed: 0,City,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
25,Tainan City,Zuojhen District,History Museum,Water Park,Flea Market,Dessert Shop,Dim Sum Restaurant
