# GTFSのデータからYoshida’s MapboxGLに読み込ませるためのデータに変換する処理

## PandasとGeoPandasのインポート

In [9]:
import pandas as pd
import geopandas as gpd
from shapely import LineString
from shapely.ops import linemerge

#  GTFSデータが格納されているフォルダの絶対パス
folder_path = "kanto_GTFS"

## データの読み込み

In [2]:
df_shapes = pd.read_csv(f"./{folder_path}/shapes.txt")
df_stops = pd.read_csv(f"./{folder_path}/stops.txt")
df_stop_times = pd.read_csv(f"./{folder_path}/stop_times.txt")
df_trips = pd.read_csv(f"./{folder_path}/trips.txt")

## shapes.txtからルート情報を抽出する処理

In [11]:
# 座標データに[経度, 緯度]の情報を書き込み
df_shapes["coord"] = df_shapes[["shape_pt_lon", "shape_pt_lat"]].values.tolist()

# shape_idごとに座標データをまとめる
df_routes = df_shapes.groupby(by="shape_id", as_index=False)["coord"].apply(list)
df_routes["geometry"] = df_routes["coord"].apply(lambda x: LineString(x))

# shape_idごとに座標データをまとめて、JSONファイルに出力
# 座標データに沿って、バスを動かすために用いるファイル
df_routes[["shape_id", "coord"]].to_json("routes.json", orient="records")

# shape_idごとに座標データをまとめて、GeoJSONファイルに出力
gpd.GeoDataFrame(df_routes[["shape_id", "geometry"]]).to_file("routes.geojson")

df_routes

Unnamed: 0,shape_id,coord,geometry
0,SP000001,"[[139.8288811, 36.4663989], [139.828826, 36.46...","LINESTRING (139.8288811 36.4663989, 139.828826..."
1,SP000002,"[[139.8253048, 36.4714155], [139.825302, 36.47...","LINESTRING (139.8253048 36.4714155, 139.825302..."
2,SP000003,"[[139.8741129, 36.641033], [139.874053, 36.640...","LINESTRING (139.8741129 36.641033, 139.874053 ..."
3,SP000004,"[[139.8741129, 36.641033], [139.874053, 36.640...","LINESTRING (139.8741129 36.641033, 139.874053 ..."
4,SP000005,"[[139.9623675, 36.6818642], [139.962448, 36.68...","LINESTRING (139.9623675 36.6818642, 139.962448..."
...,...,...,...
262,SP000263,"[[139.9484122, 36.6381795], [139.948455, 36.63...","LINESTRING (139.9484122 36.6381795, 139.948455..."
263,SP000264,"[[139.845055, 36.5913938], [139.845077, 36.591...","LINESTRING (139.845055 36.5913938, 139.845077 ..."
264,SP000265,"[[139.9484122, 36.6381795], [139.948455, 36.63...","LINESTRING (139.9484122 36.6381795, 139.948455..."
265,SP000266,"[[139.9166509, 36.477216], [139.916656, 36.477...","LINESTRING (139.9166509 36.477216, 139.916656 ..."


In [20]:
# 重複するルートを削除することで、データの軽量化を図る
merged_routes = linemerge(df_routes["geometry"].to_list())
merged_routes = merged_routes.intersection(merged_routes)

gpd.GeoDataFrame({
    "geometry": [merged_routes]
    }).to_file("routes_simplify.geojson")

In [None]:
k.apply(lambda x: list(reversed(x)))

In [None]:
res = []
for i, r in k.items():
    l_reversed = list(reversed(r))
    for x in res:
        if l_reversed == x:
            break
    else:
        res.append(r)
        print(i)


res

## デバッグ用

### direction_idとshape_idが一意に対応しているかを確認するコード

In [None]:
df_ = df_trips.drop_duplicates(["shape_id", "direction_id"])[["shape_id", "direction_id"]]
print("shape_idとdirection_idの組み合わせの個数", len(df_))
print("shape_idの個数", len(df_shapes["shape_id"].unique()))
print(len(df_) == len(df_shapes["shape_id"].unique()))