In [1]:
import requests
from bs4 import BeautifulSoup
import strip
import os
import pandas as pd
import time
import re

In [2]:
# 現在のフォルダを取得
current_dir = os.getcwd()

In [3]:
# 路線検索する関数
def search_route(departure_station, destination_station):

    #経路の取得先URL：状況に応じて変える必要あり
    route_url = "https://transit.yahoo.co.jp/search/print?from="+departure_station+"&flatlon=&to="+ destination_station+"&fromgid=&togid=&flatlon=&tlatlon=&via=&viacode=&y=2024&m=08&d=04&hh=13&m1=0&m2=0&type=4&ticket=ic&expkind=1&userpass=0&ws=3&s=0&al=1&shin=1&ex=1&hb=1&lb=1&sr=1"
    print(route_url)
    #Requestsを利用してWebページを取得する
    route_response = requests.get(route_url)

    # BeautifulSoupを利用してWebページを解析する
    route_soup = BeautifulSoup(route_response.text, 'html.parser')

    #経路のサマリーを取得
    route_summary = route_soup.find("div",class_ = "routeSummary")
    #所要時間を取得
    required_time = route_summary.find("li",class_ = "time").get_text()
    #乗り換え回数を取得
    transfer_count = route_summary.find("li", class_ = "transfer").get_text()
    #料金を取得
    fare = route_summary.find("li", class_ = "fare").get_text()

    #乗り換えの詳細情報を取得
    route_detail = route_soup.find("div",class_ = "routeDetail")

    #乗換駅の取得
    stations = []
    stations_tmp = route_detail.find_all("div", class_="station")
    for station in stations_tmp:
        stations.append(station.get_text().strip())

    #乗り換え路線の取得
    lines = []
    lines_tmp = route_detail.find_all("li", class_="transport")
    for line in lines_tmp:
        line = line.find("div").get_text().strip()
        lines.append(line)

    #路線ごとの所要時間を取得
    estimated_times = []
    estimated_times_tmp = route_detail.find_all("li", class_="estimatedTime")
    for estimated_time in estimated_times_tmp:
        estimated_times.append(estimated_time.get_text())

    #路線ごとの料金を取得
    fars = []
    fars_tmp = route_detail.find_all("p", class_="fare")
    for fare in fars_tmp:
        fars.append(fare.get_text().strip())

    return required_time, transfer_count, fare


# 費用を適切に抽出する関数
def extract_amount(text):
    match = re.search(r'\d{1,3}(,\d{3})*円', text)
    return match.group()


# 路線検索を繰り返してCSVに出力する関数
def repeat_the_route_search(destination_station):

    # 都道府県ごとの代表駅一覧を読み込む
    df = pd.read_csv(f'{current_dir}/都道府県ごとの代表駅一覧.csv', encoding='shift_jis')

    # 代表駅の列を文字列に変換
    df['代表駅'] = df['代表駅'].astype(str)

    # 目的地の行を削除
    df = df[df['代表駅'] != destination_station]

    station_list = df['代表駅'].values.tolist()
    required_time_list = []
    transfer_count_list = []
    fare_list = []

    # 各駅から目的地までの経路を検索
    for departure_station in station_list:
        required_time, transfer_count, fare = search_route(departure_station, destination_station)
        required_time_list.append(required_time)
        transfer_count_list.append(transfer_count)
        fare_list.append(fare)
        # 1秒間隔をあける
        time.sleep(1)

    df['移動時間'] = required_time_list
    df['乗換回数'] = transfer_count_list
    df['料金'] = [extract_amount(str(fare)) for fare in fare_list]

    # 結果をcsvで出力
    df.to_csv(f'{current_dir}/都道府県ごとの代表駅一覧から{destination_station}までの経路.csv', index=False, encoding='shift_jis')

In [4]:
repeat_the_route_search('盛岡駅')

https://transit.yahoo.co.jp/search/print?from=札幌駅&flatlon=&to=盛岡駅&fromgid=&togid=&flatlon=&tlatlon=&via=&viacode=&y=2024&m=08&d=04&hh=13&m1=0&m2=0&type=4&ticket=ic&expkind=1&userpass=0&ws=3&s=0&al=1&shin=1&ex=1&hb=1&lb=1&sr=1


https://transit.yahoo.co.jp/search/print?from=青森駅&flatlon=&to=盛岡駅&fromgid=&togid=&flatlon=&tlatlon=&via=&viacode=&y=2024&m=08&d=04&hh=13&m1=0&m2=0&type=4&ticket=ic&expkind=1&userpass=0&ws=3&s=0&al=1&shin=1&ex=1&hb=1&lb=1&sr=1
https://transit.yahoo.co.jp/search/print?from=仙台駅&flatlon=&to=盛岡駅&fromgid=&togid=&flatlon=&tlatlon=&via=&viacode=&y=2024&m=08&d=04&hh=13&m1=0&m2=0&type=4&ticket=ic&expkind=1&userpass=0&ws=3&s=0&al=1&shin=1&ex=1&hb=1&lb=1&sr=1
https://transit.yahoo.co.jp/search/print?from=秋田駅&flatlon=&to=盛岡駅&fromgid=&togid=&flatlon=&tlatlon=&via=&viacode=&y=2024&m=08&d=04&hh=13&m1=0&m2=0&type=4&ticket=ic&expkind=1&userpass=0&ws=3&s=0&al=1&shin=1&ex=1&hb=1&lb=1&sr=1
https://transit.yahoo.co.jp/search/print?from=山形駅&flatlon=&to=盛岡駅&fromgid=&togid=&flatlon=&tlatlon=&via=&viacode=&y=2024&m=08&d=04&hh=13&m1=0&m2=0&type=4&ticket=ic&expkind=1&userpass=0&ws=3&s=0&al=1&shin=1&ex=1&hb=1&lb=1&sr=1
https://transit.yahoo.co.jp/search/print?from=福島駅&flatlon=&to=盛岡駅&fromgid=&togid=&flatlon=&tlatl