<a href="https://colab.research.google.com/github/z-gard/analysis/blob/main/notebooks/train_station_route.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [31]:
DIR_NAME = '/content/drive/MyDrive/z-gard/data'

In [283]:
output_file = 'train_station_route.csv'

In [85]:
import os
import requests
import json
import pandas as pd
from urllib.parse import urlparse, quote
import io
import numpy as np
import datetime
import pprint
import time

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 100)

In [254]:
df_station = pd.read_csv(os.path.join(DIR_NAME, 'train_station.csv'))[
    ['name', 'lat', 'lng', 'place_id']].drop_duplicates().reset_index(drop=True)
print(df_station.shape)

(339, 4)


In [255]:
df_station['name'] = df_station['name'].replace('Shin-urayasu station', '新浦安駅')
df_station['station'] = df_station['name'].str.replace('・ステーション', '')\
                        .str.replace('JR ', '').str.replace('JR', '')\
                        .str.replace('\(スカイツリー前\)', '', regex=True).str.replace('駅$', '', regex=True)
df_station = df_station.rename(columns={'name':'駅名'})

### 主要駅

In [187]:
station_list = ['東京駅', '上野駅', '池袋駅', '新宿駅', '渋谷駅', '品川駅']
lat_lng = {}

for station in station_list:
    row = df_station[df_station['name'] == station].iloc[0]
    lat_lng[station] = f"{row['lat']:.6f},{row['lng']:.6f}"

In [188]:
print(lat_lng)

{'東京駅': '35.681236,139.767125', '上野駅': '35.714167,139.777409', '池袋駅': '35.729503,139.710900', '新宿駅': '35.689607,139.700571', '渋谷駅': '35.658034,139.701636', '品川駅': '35.628471,139.738760'}


### NAVITIME Reachable API
- [Rapid API](https://rapidapi.com/navitimejapan-navitimejapan/api/navitime-reachable)
- [NAVITIME API](https://api-sdk.navitime.co.jp/api/rakutenrapid/#wrap-regd)

In [77]:
YOUR_API_KEY = 'xxxxx'

In [86]:
def get_reachable_transit(station, lat_lng, minute):
    url = "https://navitime-reachable.p.rapidapi.com/reachable_transit"
    headers = {
        "X-RapidAPI-Key": f"{YOUR_API_KEY}",
        "X-RapidAPI-Host": "navitime-reachable.p.rapidapi.com"
    }
    querystring = {
        "term":f"{minute}",
        "start":lat_lng,
        "offset":"0",
        "coord_unit":"degree",
        "term_from":"0",
        "transit_limit":"10",
        "datum":"wgs84",
        "walk_speed":"5",
        "limit":"2000"
    }
    response = requests.request("GET", url, headers=headers, params=querystring)
    if response.status_code == requests.codes.ok:
        res_json = response.json()
        df_res = pd.DataFrame(res_json['items'])
        df_res['start'] = station
        return df_res
    else:
        print(response)
        return pd.DataFrame()

In [87]:
df_result = pd.DataFrame()
minute = 90
for station in station_list:
    _tmp = get_reachable_transit(station, lat_lng[station], minute)
    df_result = pd.concat([df_result, _tmp])
    time.sleep(5)

df_result = df_result.reset_index(drop=True)
print(df_result.shape)

(14229, 7)


In [270]:
df_result.head()

Unnamed: 0,time,coord,name,node_id,transit_count,start,minute
0,4,"{'lat': 35.68081, 'lon': 139.76779}",東京,6668,0,東京駅,30
1,8,"{'lat': 35.68675, 'lon': 139.764863}",大手町（東京都）,5630,0,東京駅,30
2,8,"{'lat': 35.691982, 'lon': 139.771014}",神田（東京都）,4464,0,東京駅,30
3,8,"{'lat': 35.674854, 'lon': 139.762797}",有楽町,8837,0,東京駅,30
4,9,"{'lat': 35.676823, 'lon': 139.770099}",京橋（東京都）,1725,0,東京駅,30


In [227]:
df_time_sort = df_result.sort_values('time').reset_index(drop=True)
df_extract = df_time_sort.drop_duplicates(subset=['start', 'node_id']).reset_index(drop=True)
print(len(df_extract))

7894


In [228]:
df_extract['station'] = df_extract['name'].str.extract('(.*?)（', expand=False)
df_extract['name_2'] = df_extract['name'].str.extract('(.*?)〔', expand=False)
df_extract['name_3'] = df_extract['name'].str.extract('(.*?)\[', expand=False)
df_extract.loc[df_extract['station'].isna(), 'station'] = df_extract['name_2']
df_extract.loc[df_extract['station'].isna(), 'station'] = df_extract['name_3']
df_extract.loc[df_extract['station'].isna(), 'station'] = df_extract['name']
df_extract = df_extract.drop(columns=['name_2', 'name_3'])
df_extract = df_extract.drop_duplicates(subset=['start', 'station']).reset_index(drop=True)
print(len(df_extract))

7777


In [233]:
df_extract['station'] = df_extract['station'].str.replace('霞ヶ関', '霞ケ関').str.replace('市ヶ谷', '市ケ谷')\
                        .str.replace('千駄ヶ谷', '千駄ケ谷').str.replace('鐘ヶ淵', '鐘ケ淵').str.replace('３', '3')

### 結合

In [264]:
df_merge = pd.merge(
    df_station[['駅名', 'place_id', 'station']], 
    df_extract[['station', 'start', 'time']], 
    on='station', how='left'
)
print(df_merge.shape)

(2024, 6)


In [265]:
df_merge[df_merge['start'].isna()]

Unnamed: 0,駅名,place_id,station,start,time,minute
1086,笹塚駅,ChIJcYI7YhfzGGARVFWzNC9eONU,笹塚,,,
1171,小田急クローゼット成城,ChIJiz-74WDxGGARLl4H02KGT-A,小田急クローゼット成城,,,


In [275]:
df_output = pd.pivot_table(
    df_merge.drop(columns=['station']), 
    index=['駅名', 'place_id'], columns='start', values='time'
).reset_index()
print(df_output.shape)

(337, 8)


In [279]:
df_output.head()

start,駅名,place_id,上野駅,品川駅,新宿駅,東京駅,池袋駅,渋谷駅
0,JR 秋葉原駅,ChIJl_zMVh2MGGAR3XixTB2CgJ4,9.0,19.0,22.0,10.0,23.0,29.0
1,JR大崎駅,ChIJ-TG-NhWLGGAR6OcN44dimrg,29.0,8.0,21.0,21.0,28.0,12.0
2,お台場海浜公園駅,ChIJDQUtOvGJGGARuul34HWvMQ8,39.0,34.0,48.0,31.0,52.0,42.0
3,お花茶屋駅,ChIJ6a9LTHGPGGARl4SEYCeMiVE,28.0,44.0,42.0,36.0,33.0,47.0
4,とうきょうスカイツリー駅,ChIJ6ZlMKdeOGGARD4-LlV0avwY,21.0,41.0,42.0,32.0,42.0,46.0


In [272]:
df_station_org = pd.read_csv(os.path.join(DIR_NAME, 'train_station.csv'))
print(df_station_org.shape)

(3141, 8)


In [280]:
df_station_min = pd.merge(
    df_station_org.drop(columns=['rating', 'types', 'vicinity']),
    df_output.drop(columns=['駅名']),
    on='place_id', how='left'
)
print(df_station_min.shape)

(3141, 11)


In [282]:
df_station_min.head()

Unnamed: 0,name,lat,lng,place_id,no,上野駅,品川駅,新宿駅,東京駅,池袋駅,渋谷駅
0,東京駅,35.681236,139.767125,ChIJC3Cf2PuLGGAROO00ukl8JwA,101003,13.0,15.0,20.0,4.0,21.0,25.0
1,東京駅,35.681236,139.767125,ChIJC3Cf2PuLGGAROO00ukl8JwA,101004,13.0,15.0,20.0,4.0,21.0,25.0
2,有楽町駅,35.675013,139.76302,ChIJ1UxPReWLGGARagE2MSfXh7g,101005,16.0,15.0,27.0,8.0,26.0,25.0
3,大手町駅,35.685349,139.763278,ChIJnUcMGviLGGAROCgXeJNX4xg,101007,21.0,23.0,25.0,8.0,20.0,23.0
4,大手町駅,35.685349,139.763278,ChIJnUcMGviLGGAROCgXeJNX4xg,101008,21.0,23.0,25.0,8.0,20.0,23.0


In [284]:
df_station_min.to_csv(os.path.join(DIR_NAME, output_file), index=False)