In [None]:
import json
import asyncio
import time

import requests
from requests_html import HTMLSession
from bs4 import BeautifulSoup
from fake_useragent import UserAgent

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

import stock

In [None]:
MANEX_BASE_URL = "https://mst.monex.co.jp"
MANEX_LOGIN_PAGE_URL = f"{MANEX_BASE_URL}/pc/ITS/login/LoginIDPassword.jsp"
MANEX_SCOUTER_URL = "https://monex.ifis.co.jp/index.php"
MANEX_LOGIN_ID_JSON = stock.constants.PROJECT_ROOT / "cert" / "manex_login_id.json"


with open(MANEX_LOGIN_ID_JSON, "r") as f:
    login_info = json.load(f)

In [None]:
session = HTMLSession()
headers = {"User-Agent": UserAgent().chrome}

# login pageを取得
r = session.get(MANEX_LOGIN_PAGE_URL)
soup = BeautifulSoup(r.text, "html.parser")

# login pageのformからloginに必要な情報を取得
form = soup.find("form", id="contents")
login_post_url = f"{MANEX_BASE_URL}{form.attrs['action']}"
login_request_body = {}
for input in form.find_all("input", type="hidden"):
    login_request_body[input.attrs["name"]] = input.attrs["value"]

login_request_body_str = "&".join([f"{key}={val}" for key, val in login_request_body.items()])
print(login_request_body_str)

r = session.post(login_post_url, data=login_request_body_str, timeout=5)

`requests`ベースだとうまく動かないので、seleniumに変更

In [None]:
options = webdriver.ChromeOptions()
# options.add_argument("--headless")

# Selenium Server に接続する
driver = webdriver.Remote(
    command_executor='http://localhost:4444/wd/hub',
    desired_capabilities=options.to_capabilities(),
    options=options,
)

In [None]:
# ログイン画面でIDとパスワードを入力してログインする
driver.get(MANEX_LOGIN_PAGE_URL)

loginid = driver.find_element(By.ID, "loginid").send_keys(login_info["loginid"])
passwd = driver.find_element(By.ID, "passwd").send_keys(login_info["passwd"])

for elem in driver.find_elements(By.TAG_NAME, "input"):
    if elem.get_attribute("type") == "submit" and elem.get_attribute("value") == "ログイン":
        elem.click()
        break
print(driver.current_url)

In [None]:
# スカウター画面に遷移する
for elem in driver.find_elements(By.TAG_NAME, "a"):
    if elem.text == "ツール":
        elem.click()
        break
print(driver.current_url)

for elem in driver.find_elements(By.TAG_NAME, "td"):
    flag = False
    for href in elem.find_elements(By.TAG_NAME, "a"):
        if href.text == "マネックス銘柄スカウター":
            flag = True
            break
    if flag:
        for href in elem.find_elements(By.TAG_NAME, "a"):
            if href.text == "ログイン":
                href.click()
                break
        break

# 新しいタブに移動する
for handle in driver.window_handles:
    if handle != driver.current_window_handle:
        driver.switch_to.window(handle)
        break    
print(driver.current_url)

In [None]:
# 銘柄入力欄を取得
form = driver.find_element(By.ID, "se_word")
for elem in form.find_elements(By.TAG_NAME, "input"):
    if elem.get_attribute("name") =="wd":
        print(elem)
        break

# 銘柄入力欄に銘柄コードを入力する
code = 6099
elem.send_keys(str(code))

# # 検索ボタンをクリック
for elem in form.find_elements(By.TAG_NAME, "input"):
    if elem.get_attribute("type") == "image" and elem.get_attribute("class") == "search_button":
        print("click")
        elem.click()
        break

print(driver.current_url)

In [None]:
soup = BeautifulSoup(driver.page_source)

In [None]:
for table in soup.find_all("table"):
    if "id" in table.attrs:
        print(table.attrs["id"])

In [None]:
[][1:]

In [None]:
def get_series_table_data(table):
    rows = table.find("thead").find_all("tr")
    header = [th.text for th in rows[0].find_all("th")] 
    for row in rows[1:]:
        sub_header = [th.text for th in row.find_all("th")]
        hi = 0
        for sh in sub_header:
            while hi < len(header) and header[hi] != "":
                hi += 1
            if hi < len(header):
                header[hi] = sh
                hi += 1
            else:
                header.append(sh)
                hi += 1
    values = [[td.text for td in tr.find_all("td")] for tr in table.find("tbody").find_all("tr")]
    return header, values


target_table_ids = [
    "table_fy",  # 通期業績推移
    "table_4q",  # 四半期業績推移
    "table_cf",  # キャッシュフロー推移
    "table_bs_byyear_assets_10yr",  # 資産
    "table_bs_byyear_liab_10yr",  # 負債、純資産
    "table_ive_perf_10yr",  # 設備投資・減価償却費・研究開発費
    "table_itd_perf_10yr",  # 有利子負債
    "table_turnover_rate_10yr",  # 各種回転率
    "table_employee_perf_table_10yr",  # 従業員数・1人当り業績
]


for tid in target_table_ids:
    table = soup.find("table", id=tid)
    header, values = get_series_table_data(table)
    print(header)

In [None]:
def get_index_table_data(table):
    header = [th.text for th in table.find_all("th")]
    value = [td.text for td in table.find_all("td")]
    return header, value

index_tables = soup.find("div", id="index_list_mass")
target_table_classes = [
    "wariyasu",
    "kouritsu",
    "kenzen",
    "kanren"
]

for tc in target_table_classes:
    table = index_tables.find("div", class_=tc).find("table")
    header, value = get_index_table_data(table)
    print(header)
