# Singapore Public Housing (HDB) Resale Price Prediction Model (Part 3)
### Data Collection - Travel Time and Employment Center

## 1. Initialization

In [1]:
# Import Vanilla Libraries
import requests, json, time, random, math
import pandas as pd
import numpy as np

In [2]:
mrt = pd.read_csv('./Dataset/Engineered/MRT.csv')
hdb = pd.read_csv('./Dataset/Transitional/complete_data.csv')

## 2. Unique Coordinates

In [3]:
# Top 5 Employment Center of Singapore
r_place = mrt[mrt['Name']=='Raffles Place']
o_north = mrt[mrt['Name']=='one-north']
j_east= mrt[mrt['Name']=='Jurong East']
orchard = mrt[mrt['Name']=='Orchard']
changi = mrt[mrt['Name']=='Changi Airport']

In [4]:
# Getting only unique coordinates from HDB dataset to cut short mining time
coors = [coor for coor in zip(hdb['latitude'], hdb['longitude'])]
coors_unique = pd.Series(coors).unique()
coors_unique = pd.Series(coors_unique)

In [5]:
# Convert list to DataFrame
coors_df = pd.DataFrame(coors_unique, columns=['coordinates'])

In [6]:
coors_df.head()

Unnamed: 0,coordinates
0,"(1.362004539, 103.85387990000001)"
1,"(1.370966352, 103.83820190000002)"
2,"(1.38070883, 103.8353682)"
3,"(1.3662010409999998, 103.857201)"
4,"(1.381041355, 103.8351317)"


In [7]:
# Export to csv for function get_pt_travel_time
coors_df.to_csv('./Dataset/Engineered/Travel_time.csv', index=False)

## 3. Public Transport

In [8]:
### TOKEN - TO BE DELETED ###
token = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOjQxMTEsInVzZXJfaWQiOjQxMTEsImVtYWlsIjoiemp0YW45MUBnbWFpbC5jb20iLCJmb3JldmVyIjpmYWxzZSwiaXNzIjoiaHR0cDpcL1wvb20yLmRmZS5vbmVtYXAuc2dcL2FwaVwvdjJcL3VzZXJcL3Nlc3Npb24iLCJpYXQiOjE1OTE3ODk2NzQsImV4cCI6MTU5MjIyMTY3NCwibmJmIjoxNTkxNzg5Njc0LCJqdGkiOiI1ZGI4YzliNTllMDAwMTRlOWRiMWM0MmVmNzZhMGI0NSJ9.3v8JPGCh3T_TKws8K51BxnNuU9TSBVmycOyU7QXA7nY"

In [9]:
# Function to extract travel time (PT or Driving) by OneMap API
def get_pt_travel_time(desti_df, column_name, route_type='pt'):
    coors_df = pd.read_csv('./Dataset/Engineered/Travel_time.csv')

    coors_list = coors_df['coordinates'].str.replace('(', '').str.replace(')', '').str.split(', ')
    
    if column_name not in coors_df.columns:
        coors_df[column_name] = np.nan

    for idx, coors in enumerate(coors_list):
        if math.isnan(coors_df.loc[idx, column_name]):
            start = coors[0], coors[1]
            end = desti_df['Latitude'].values[0], desti_df['Longitude'].values[0]

            print('\rWaiting... {} completed... {} entries remaining... '.format(idx, len(coors_df)-idx-1), end='.')

            if route_type == 'pt':
                query = f"""https://developers.onemap.sg/privateapi/routingsvc/route?start={start[0]},{start[1]}&end={end[0]},{end[1]}&routeType={route_type}&token={token}&date=2019-10-04&time=07:30:00&mode=TRANSIT&maxWalkDistance=500&numItineraries=1""".replace('\n', '')
            
            else:
                query = f"""https://developers.onemap.sg/privateapi/routingsvc/route?start={start[0]},{start[1]}&end={end[0]},{end[1]}&routeType={route_type}&token={token}""" 
                
            try:
                response = requests.get(query)
                jsons = json.loads(response.content)
                
                if route_type == 'pt':
                    duration = round(jsons['plan']['itineraries'][0]['duration']/60, 2)
                else:
                    duration = round(jsons['route_summary']['total_time']/60, 2)
                    
                coors_df.loc[idx, column_name] = duration

            except:
                coors_df.loc[idx, column_name] = np.nan

            coors_df.to_csv('./Dataset/Engineered/Travel_time.csv', index=False)

            time.sleep(random.randint(1, 2)/4)
            
    if coors_df[column_name].isnull().sum() == 0:
        print("--- Data is complete ---")

In [11]:
# Raffles Place by Public Transport
get_pt_travel_time(r_place, 'raffles_place_dist')

--- Data is complete ---


In [12]:
# One North by Public Transport
get_pt_travel_time(o_north, 'one_north_dist')

--- Data is complete ---


In [13]:
# Jurong East by Public Transport
get_pt_travel_time(j_east, 'jurong_east_dist')

--- Data is complete ---


In [14]:
# Orchard by Public Transport
get_pt_travel_time(orchard, 'orchard_dist')

--- Data is complete ---


In [15]:
# Changi by Public Transport
get_pt_travel_time(changi, 'changi_dist')

--- Data is complete ---


In [16]:
# Sanity check on missing data
coors_df = pd.read_csv('./Dataset/Engineered/Travel_time.csv')
coors_df.isnull().sum()

coordinates            0
raffles_place_dist     0
one_north_dist         0
jurong_east_dist       0
orchard_dist           0
changi_dist            0
raffles_place_drive    0
one_north_drive        0
jurong_east_drive      0
orchard_drive          0
changi_drive           0
dtype: int64

## 4. Driving

In [17]:
# Raffles Place by Driving
get_pt_travel_time(r_place, 'raffles_place_drive', route_type='drive')

--- Data is complete ---


In [18]:
# One North by Driving
get_pt_travel_time(o_north, 'one_north_drive', route_type='drive')

--- Data is complete ---


In [19]:
# Jurong East by Driving
get_pt_travel_time(j_east, 'jurong_east_drive', route_type='drive')

--- Data is complete ---


In [20]:
# Orchard by Driving
get_pt_travel_time(orchard, 'orchard_drive', route_type='drive')

--- Data is complete ---


In [21]:
# Changi by Driving
get_pt_travel_time(changi, 'changi_drive', route_type='drive')

--- Data is complete ---


In [22]:
coors_df = pd.read_csv('./Dataset/Engineered/Travel_time.csv')

## 5. Data Merging

In [23]:
# Create 'coordinates' column for merging with travel_time dataframe
hdb['coordinates'] = '(' + hdb['latitude'].astype(str) + ', ' + hdb['longitude'].astype(str) + ')'

In [24]:
# Data Merging
hdb = pd.merge(hdb, coors_df, how='left', on='coordinates').drop('coordinates', axis=1)

In [26]:
# Export final dataframe
hdb.to_csv('./Dataset/Transitional/complete_data_with_ec.csv', index=False)