## Capstone Project - The Battle of Neighborhoods (Week 1)(Data section)  

### 0. Import packages

In [236]:
import numpy as np 
import pandas as pd
import json
from geopy.geocoders import Nominatim
import requests 
from pandas import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

### 1. Latitude and longitude of townships in Taiwan

- County, township, Chinese-English Excel file (Chinese Pinyin)  
    - Source [National Development Council] https://data.gov.tw/dataset/25489  
    - Download [Taiwan_postal.csv] https://quality.data.gov.tw/dq_download_csv.php?nid=25489&md5_url=ab48007db9f630e51fec0cb608e32d61  
- 3 yards postal code and administrative center latitude and longitude comparison table
    - Source [Chunghwa Post Co., Ltd.] https://www.post.gov.tw/post/internet/Download/index.jsp?ID=220306  
    - Download [county_mapping.xls] https://www.post.gov.tw/post/download/county_h_10706.xls  

In [83]:
ll_data = pd.read_csv('Taiwan_postal.csv')
ll_data.rename(columns={"_x0033_碼郵遞區號": "Postal code", "中心點經度": "Longitude", "中心點緯度": "Latitude"},inplace=True)
ll_data.drop(['TGOS_URL'],axis=1,inplace=True)
ll_data

Unnamed: 0,行政區名,Postal code,Longitude,Latitude
0,臺北市中正區,100,121.519884,25.032405
1,臺北市大同區,103,121.513042,25.063424
2,臺北市中山區,104,121.538160,25.069699
3,臺北市松山區,105,121.557588,25.059991
4,臺北市大安區,106,121.543445,25.026770
...,...,...,...,...
366,花蓮縣瑞穗鄉,978,121.407347,23.515612
367,花蓮縣萬榮鄉,979,121.318953,23.727726
368,花蓮縣玉里鎮,981,121.360448,23.371436
369,花蓮縣卓溪鄉,982,121.180422,23.390629


In [89]:
map_data = pd.read_excel('county_mapping.xls', header=None)
map_data.columns = ['Postal code','行政區名','District name']
map_data

Unnamed: 0,Postal code,行政區名,District name
0,100,臺北市中正區,"Zhongzheng Dist., Taipei City"
1,103,臺北市大同區,"Datong Dist., Taipei City"
2,104,臺北市中山區,"Zhongshan Dist., Taipei City"
3,105,臺北市松山區,"Songshan Dist., Taipei City"
4,106,臺北市大安區,"Da’an Dist., Taipei City"
...,...,...,...
366,978,花蓮縣瑞穗鄉,"Ruisui Township, Hualien County"
367,979,花蓮縣萬榮鄉,"Wanrong Township, Hualien County"
368,981,花蓮縣玉里鎮,"Yuli Township, Hualien County"
369,982,花蓮縣卓溪鄉,"Zhuoxi Township, Hualien County"


**Notice: The administrative districts in the two datasets have different names, but those places are not habitable, so they can be ignored.**

In [92]:
diff = pd.concat([ll_data['行政區名'],map_data['行政區名']]).drop_duplicates(keep=False)
diff

64     宜蘭縣釣魚臺列嶼
270    南海諸島東沙群島
271    南海諸島南沙群島
64          釣魚台
270     高雄市東沙群島
271     高雄市南沙群島
Name: 行政區名, dtype: object

In [90]:
taiwan_data = pd.merge(map_data, ll_data,on=['Postal code', '行政區名'])
taiwan_data

Unnamed: 0,Postal code,行政區名,District name,Longitude,Latitude
0,100,臺北市中正區,"Zhongzheng Dist., Taipei City",121.519884,25.032405
1,103,臺北市大同區,"Datong Dist., Taipei City",121.513042,25.063424
2,104,臺北市中山區,"Zhongshan Dist., Taipei City",121.538160,25.069699
3,105,臺北市松山區,"Songshan Dist., Taipei City",121.557588,25.059991
4,106,臺北市大安區,"Da’an Dist., Taipei City",121.543445,25.026770
...,...,...,...,...,...
363,978,花蓮縣瑞穗鄉,"Ruisui Township, Hualien County",121.407347,23.515612
364,979,花蓮縣萬榮鄉,"Wanrong Township, Hualien County",121.318953,23.727726
365,981,花蓮縣玉里鎮,"Yuli Township, Hualien County",121.360448,23.371436
366,982,花蓮縣卓溪鄉,"Zhuoxi Township, Hualien County",121.180422,23.390629


### 2. Venue categories in New Taipei City 

In [114]:
city_data = taiwan_data[taiwan_data['District name'].str.contains("Taipei City")]
city_data

Unnamed: 0,Postal code,行政區名,District name,Longitude,Latitude
0,100,臺北市中正區,"Zhongzheng Dist., Taipei City",121.519884,25.032405
1,103,臺北市大同區,"Datong Dist., Taipei City",121.513042,25.063424
2,104,臺北市中山區,"Zhongshan Dist., Taipei City",121.53816,25.069699
3,105,臺北市松山區,"Songshan Dist., Taipei City",121.557588,25.059991
4,106,臺北市大安區,"Da’an Dist., Taipei City",121.543445,25.02677
5,108,臺北市萬華區,"Wanhua Dist., Taipei City",121.497986,25.02859
6,110,臺北市信義區,"Xinyi Dist., Taipei City",121.57167,25.030621
7,111,臺北市士林區,"Shilin Dist., Taipei City",121.550847,25.125467
8,112,臺北市北投區,"Beitou Dist., Taipei City",121.517799,25.148068
9,114,臺北市內湖區,"Neihu Dist., Taipei City",121.592383,25.083706


In [115]:
CLIENT_ID = 'M143ASAFEMECNO05TR44UEDIRA2CL4X0VPXJZCGX1QCJF3ZF' # your Foursquare ID
CLIENT_SECRET = 'BCELZQVSDCZ0DZZXR14FZA5SYMFGT21ITWK0T4FMQJ13VEFQ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 
radius = 5000

In [101]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['District', 
                  'District Latitude', 
                  'District Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [118]:
new_taipei_categories = getNearbyVenues(names=city_data['District name'],
                                   latitudes=city_data['Latitude'],
                                   longitudes=city_data['Longitude']
                                  )

Zhongzheng Dist., Taipei City
Datong Dist., Taipei City
Zhongshan Dist., Taipei City
Songshan Dist., Taipei City
Da’an Dist., Taipei City
Wanhua Dist., Taipei City
Xinyi Dist., Taipei City
Shilin Dist., Taipei City
Beitou Dist., Taipei City
Neihu Dist., Taipei City
Nangang Dist., Taipei City
Wenshan Dist., Taipei City
Wanli Dist., New Taipei City
Jinshan Dist., New Taipei City
Banqiao Dist., New Taipei City
Xizhi Dist., New Taipei City
Shenkeng Dist., New Taipei City
Shiding Dist., New Taipei City
Ruifang Dist., New Taipei City
Pingxi Dist., New Taipei City
Shuangxi Dist., New Taipei City
Gongliao Dist., New Taipei City
Xindian Dist., New Taipei City
Pinglin Dist., New Taipei City
Wulai Dist., New Taipei City
Yonghe Dist., New Taipei City
Zhonghe Dist., New Taipei City
Tucheng Dist., New Taipei City
Sanxia Dist., New Taipei City
Shulin Dist., New Taipei City
Yingge Dist., New Taipei City
Sanchong Dist., New Taipei City
Xinzhuang Dist., New Taipei City
Taishan Dist., New Taipei City
Lin

In [273]:
pd.options.display.max_rows = 10000
new_taipei_categories

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Zhongzheng Dist., Taipei City",25.032405,121.519884,Kinfen Braised Pork Rice (金峰魯肉飯),25.032194,121.518534,Taiwanese Restaurant
1,"Zhongzheng Dist., Taipei City",25.032405,121.519884,臻味赤肉胡椒餅 烤地瓜,25.033022,121.518246,Bakery
2,"Zhongzheng Dist., Taipei City",25.032405,121.519884,Chiang Kai-Shek Memorial Hall (中正紀念堂),25.034555,121.521835,Monument / Landmark
3,"Zhongzheng Dist., Taipei City",25.032405,121.519884,虎記商行,25.031744,121.519284,Café
4,"Zhongzheng Dist., Taipei City",25.032405,121.519884,National Theater (國家戲劇院),25.035197,121.518188,Theater
5,"Zhongzheng Dist., Taipei City",25.032405,121.519884,樂田麵包屋 Gakuden Boulangerie,25.032757,121.517534,Bakery
6,"Zhongzheng Dist., Taipei City",25.032405,121.519884,鼎元豆漿,25.031294,121.521194,Chinese Breakfast Place
7,"Zhongzheng Dist., Taipei City",25.032405,121.519884,生活在他方 Elsewhere Cafe,25.030536,121.52063,Café
8,"Zhongzheng Dist., Taipei City",25.032405,121.519884,豆味行甜不辣、豆花、芋圓,25.031303,121.517232,Snack Place
9,"Zhongzheng Dist., Taipei City",25.032405,121.519884,三槐堂,25.030555,121.519381,Café


In [313]:
url = 'https://api.foursquare.com/v2/venues/categories?&client_id={}&client_secret={}&v={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET,
            VERSION)           
# make the GET request
json_categories = requests.get(url).json()

In [314]:
categories = json_normalize(json_categories['response']['categories'])
categories['1st_categories'] = ""

for idx in range(0,len(categories)):
    s1 = json_normalize(json_categories['response']['categories'][idx]['categories'])
    for idx2 in range(0,len(s1)):
        s2 = json_normalize(json_categories['response']['categories'][idx]['categories'][idx2]['categories'])
        if(json_categories['response']['categories'][idx]['categories'][idx2]['categories']!=[]):
            for idx3 in range(0,len(s2)):
                s3 = json_normalize(json_categories['response']['categories'][idx]['categories'][idx2]['categories'][idx3]['categories'])
                if(json_categories['response']['categories'][idx]['categories'][idx2]['categories'][idx3]['categories']!=[]):
                    for idx4 in range(0,len(s3)):
                        s4 = json_normalize(json_categories['response']['categories'][idx]['categories'][idx2]['categories'][idx3]['categories'][idx4]['categories'])
                        s4['1st_categories'] = json_categories['response']['categories'][idx]['name']
                        categories = categories.append(s4)
                s3['1st_categories'] = json_categories['response']['categories'][idx]['name']   
                categories = categories.append(s3)
        s2['1st_categories'] = json_categories['response']['categories'][idx]['name']
        categories = categories.append(s2)
    s1['1st_categories'] = json_categories['response']['categories'][idx]['name']
    categories = categories.append(s1)

categories = categories.drop(['id', 'pluralName','shortName','categories','icon.prefix','icon.suffix'], axis=1).reset_index(drop=True)

In [322]:
pd.merge(new_taipei_categories, categories, left_on = ''