# GTFSのデータからYoshida’s MapboxGLに読み込ませるためのデータに変換する処理

## PandasとGeoPandasのインポート

In [17]:
import pandas as pd
import geopandas as gpd
from shapely import LineString
from shapely.ops import linemerge
import os
import json
import sys
sys.setrecursionlimit(10000)

#  GTFSデータが格納されているフォルダの絶対パス
folder_path = "ToyamaChitetsu"
output_path = f"./{folder_path}/output/"
os.makedirs(output_path, exist_ok=True)

## データの読み込み

In [5]:
df_shapes = pd.read_csv(f"./{folder_path}/shapes.txt")
df_stops = pd.read_csv(f"./{folder_path}/stops.txt")
df_stop_times = pd.read_csv(f"./{folder_path}/stop_times.txt")
df_trips = pd.read_csv(f"./{folder_path}/trips.txt")

## shapes.txtからルート情報を抽出する処理

In [8]:
# 座標データに[経度, 緯度]の情報を書き込み
df_shapes["coord"] = df_shapes[[
    "shape_pt_lon", "shape_pt_lat"]].values.tolist()

# shape_idごとに座標データをまとめる
df_routes = df_shapes.groupby(by="shape_id", as_index=False)["coord"].apply(list)
df_routes["geometry"] = df_routes["coord"].apply(lambda x: LineString(x))

# shape_idごとに座標データをまとめて、JSONファイルに出力
# 座標データに沿って、バスを動かすために用いるファイル
df_routes[["shape_id", "coord"]].to_json(f"{output_path}/routes.json", orient="records")

# shape_idごとに座標データをまとめて、GeoJSONファイルに出力
gpd.GeoDataFrame(df_routes[["shape_id", "geometry"]]).to_file(f"{output_path}/routes.geojson")

df_routes


Unnamed: 0,shape_id,coord,geometry
0,3001-1-1,"[[137.220566712479, 36.6705204722661], [137.22...","LINESTRING (137.220566712479 36.6705204722661,..."
1,3001-1-2,"[[137.212899949192, 36.701609951035], [137.212...","LINESTRING (137.212899949192 36.701609951035, ..."
2,3001-2-1,"[[137.220566712479, 36.6705204722661], [137.22...","LINESTRING (137.220566712479 36.6705204722661,..."
3,3001-2-2,"[[137.191196828679, 36.6985813786566], [137.19...","LINESTRING (137.191196828679 36.6985813786566,..."
4,3002-1,"[[137.212899949192, 36.701609951035], [137.212...","LINESTRING (137.212899949192 36.701609951035, ..."
5,3002-4-2-1,"[[137.220566712479, 36.6705204722661], [137.22...","LINESTRING (137.220566712479 36.6705204722661,..."
6,3003-4-1,"[[137.234059466553, 36.7613627664226], [137.23...","LINESTRING (137.234059466553 36.7613627664226,..."
7,3003-4-1-1,"[[137.226022712526, 36.731082893702], [137.224...","LINESTRING (137.226022712526 36.731082893702, ..."
8,3003-4-2,"[[137.220566712479, 36.6705204722661], [137.22...","LINESTRING (137.220566712479 36.6705204722661,..."
9,3003-5-1,"[[137.234059466553, 36.7613627664226], [137.23...","LINESTRING (137.234059466553 36.7613627664226,..."


### 重複するルートを削除することでデータの軽量化を図る

In [9]:
# 重複するルートを削除することで、データの軽量化を図る
merged_routes = linemerge(df_routes["geometry"].to_list())
# RuntimeWarning: invalid value encountered in intersectionのWarningが出るけど無視。いいのか？笑
merged_routes = merged_routes.intersection(merged_routes)

gpd.GeoDataFrame({"geometry": [merged_routes]}).to_file(f"{output_path}/routes_simplify.geojson")

  return lib.intersection(a, b, **kwargs)


## stops.txtから停留所データを抽出する

In [12]:
df_stops.head()

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,platform_code,parent_station
0,3001-C01_01,,南富山駅前,,36.67052,137.220567,3001-C01_01,,0,,
1,3001-C02_01,,大町,,36.672764,137.220813,3001-C02_01,,0,,
2,3001-C03_01,,堀川小泉,,36.675638,137.219379,3001-C03_01,,0,,
3,3001-C04_01,,小泉町,,36.678258,137.217986,3001-C04_01,,0,,
4,3001-C05_01,,西中野,,36.680453,137.216897,3001-C05_01,,0,,


In [15]:
# location typeが0（標柱）のみを指定
df_stops_filtered = df_stops.query("location_type==0")

df_stops_filtered["coord"] = df_stops_filtered[["stop_lon", "stop_lat"]].values.tolist()
df_stops_filtered = df_stops_filtered[["stop_id", "stop_name", "coord"]]

df_stops_filtered.to_json(f"{output_path}/stops.json", orient="records", force_ascii=False)

df_stops_filtered.head()

Unnamed: 0,stop_id,stop_name,coord
0,3001-C01_01,南富山駅前,"[137.220566712479, 36.6705204722661]"
1,3001-C02_01,大町,"[137.220813475709, 36.672764436263]"
2,3001-C03_01,堀川小泉,"[137.219378570938, 36.6756381935983]"
3,3001-C04_01,小泉町,"[137.217986348707, 36.6782584628215]"
4,3001-C05_01,西中野,"[137.21689706034, 36.6804529414684]"


## stop_times.txtとtrips.txtから時刻表データを作成する

In [39]:
df_stop_times.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,timepoint
0,平日_05時30分_系統3003-4-2,05:30:00,05:30:00,3001-C01_01,1,岩瀬浜（富山駅 経由）,0,1,1
1,平日_05時30分_系統3003-4-2,05:31:00,05:31:00,3001-C02_01,2,岩瀬浜（富山駅 経由）,0,0,1
2,平日_05時30分_系統3003-4-2,05:32:00,05:32:00,3001-C03_01,3,岩瀬浜（富山駅 経由）,0,0,1
3,平日_05時30分_系統3003-4-2,05:33:00,05:33:00,3001-C04_01,4,岩瀬浜（富山駅 経由）,0,0,1
4,平日_05時30分_系統3003-4-2,05:34:00,05:34:00,3001-C05_01,5,岩瀬浜（富山駅 経由）,0,0,1


In [33]:
df_trips_lookup = df_trips.set_index("trip_id")
df_trips_lookup.head()

Unnamed: 0_level_0,route_id,service_id,trip_headsign,block_id,trip_short_name,direction_id,shape_id
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
平日_05時30分_系統3003-4-2,富山港線（南富山）(3003-4-2),平日,岩瀬浜（富山駅 経由）,,,0.0,3003-4-2
平日_05時31分_系統3003-5-1,富山港線（富山大学前）(3003-5-1),平日,富山大学前（富山駅 経由）,,,,3003-5-1
平日_05時40分_系統3001-2-1,市内軌道線(3001-2-1),平日,富山大学前（富山駅 経由）,,,,3001-2-1
平日_05時50分_系統3001-1-1,市内軌道線(3001-1-1),平日,富山駅（西町 経由）,,,,3001-1-1
平日_06時00分_系統3001-2-1,市内軌道線(3001-2-1),平日,富山大学前（富山駅 経由）,,,,3001-2-1


In [37]:
res = []
for trip_id, g in df_stop_times.groupby(by="trip_id"):
    lookup = df_trips_lookup.loc[trip_id]
    tt = []
    d = ""
    for _, r in g.iterrows():
        # 前のバス停の発時刻と、次のバス停の着時刻が同様の場合には、スキップする
        # ここはあとでなんとかしなきゃね
        if d == r["arrival_time"]:
            continue

        if r["stop_sequence"] == 0:
            tt.append({
                "d": r["departure_time"],
                "s": r["stop_id"]
            })
        else:
            tt.append({
                "a": r["arrival_time"],
                "d": r["departure_time"],
                "s": r["stop_id"]
            })
        d = r["departure_time"]
    res.append({
        "trip_id": trip_id,
        "service_id": lookup["service_id"],
        "shape_id": lookup["shape_id"],
        "trip_headsign": lookup["trip_headsign"],
        "tt": tt
    })

In [38]:
with open(f"{output_path}/timetable.json", "w") as f:
    json.dump(res, f, ensure_ascii=False, indent=2)

## デバッグ用

### direction_idとshape_idが一意に対応しているかを確認するコード

In [10]:
df_ = df_trips.drop_duplicates(["shape_id", "direction_id"])[
    ["shape_id", "direction_id"]
]
print("shape_idとdirection_idの組み合わせの個数", len(df_))
print("shape_idの個数", len(df_shapes["shape_id"].unique()))
print(len(df_) == len(df_shapes["shape_id"].unique()))

shape_idとdirection_idの組み合わせの個数 15
shape_idの個数 15
True
