In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
from dataclasses import dataclass
from modules import LocalPaths, Collection
# 行と列の最大表示数を指定
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 30)

## スクレイピング

In [None]:
# 実行用パラメータ
@dataclass(frozen=True)
class _RaceParams:
    EXECUTION = True
    FROM = '2022-01'
    TO = '2023-01'    
    
@dataclass(frozen=True)
class ExecParams:
    # レース開催日に関する処理
    RACE_INFO = _RaceParams()
    # 馬情報に関する処理
    HORSE_EXECUTION = True
    # 血統情報に関する処理
    PED_EXECUTION = True

In [None]:
# インスタンス生成
collection = Collection()

In [None]:
if ExecParams.RACE_INFO.EXECUTION:
    # レース開催日の取得
    event_dates = collection.get_event_date(from_=ExecParams.RACE_INFO.FROM, to_=ExecParams.RACE_INFO.TO)
    # レースIDの取得
    race_ids = collection.get_race_ids(event_dates)
    # htmlのスクレイピング
    html_filepaths = collection.scrape_html_race(race_ids)
    
    if html_filepaths:
        # レース結果テーブルの取得
        race_results = collection.get_rawdata_results(html_filepaths)
        # レース情報テーブルの取得
        all_race_info = collection.get_rawdata_raceinfo(html_filepaths)
        # 払い戻し結果テーブルの取得
        paybacks = collection.get_rawdata_payback(html_filepaths)
        # テーブルの更新
        collection.update_rawdata(LocalPaths.RAW_RESULTS_PATH, race_results)
        collection.update_rawdata(LocalPaths.RAW_RACEINFO_PATH, all_race_info)
        collection.update_rawdata(LocalPaths.RAW_PAYBACK_PATH, paybacks)

In [None]:
if ExecParams.HORSE_EXECUTION:
    race_results = collection.load_rawdata(LocalPaths.RAW_RESULTS_PATH)
    horse_ids = race_results['horse_id'].unique()
    html_file_horses = collection.scrape_html_horse_with_master(horse_ids)
    
    if html_file_horses:
        # 馬の過去成績テーブルの取得
        horse_results = collection.get_rawdata_horse(html_file_horses)
        collection.update_rawdata(LocalPaths.RAW_HORSERESULTS_PATH, horse_results)

In [None]:
if ExecParams.PED_EXECUTION:
    race_results = collection.load_rawdata(LocalPaths.RAW_RESULTS_PATH)
    horse_ids = race_results['horse_id'].unique()
    html_file_peds = collection.scrape_html_ped(horse_ids)
    
    if html_file_peds:
        # 血統情報の取得
        ped_results = collection.get_rawdata_ped(html_file_peds)
        collection.update_rawdata(LocalPaths.RAW_PEDS_PATH, ped_results)