In [None]:
import json
from pathlib import Path
import time
import datetime

import requests
import numpy as np
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

import stock

In [None]:
res = requests.get("https://raw.githubusercontent.com/skyte/rs-log/main/output/rs_stocks.csv")

In [None]:
rows = res.text.split("\n")

In [None]:
len(rows)

In [None]:
options = webdriver.ChromeOptions()
options.add_experimental_option("detach", True)
driver = webdriver.Remote(
    command_executor="http://localhost:4444/wd/hub",
    options=options
)

In [None]:
def convert_to_number(str):
    str = str.replace(",", "")
    if "." in str:
        return float(str)
    else:
        try:
            return int(str)
        except:
            print("Failed to convert {} to number".format(str))
            return np.nan

In [None]:
def extract_table_data(source: BeautifulSoup) -> dict:
    table = {}
    table_head = source.find("div", {"class": "D(tbhg)"})
    columns = [span.text for span in table_head.find_all("span")]

    table_body = source.find("div", {"class": "D(tbrg)"})
    blocks = [div for div in table_body.find_all("div", {"class": "rw-expnded"}, recursive=False)]

    for block in blocks:
        #table[row[0]] = {columns[idx + 1]: convert_to_number(col) for idx, col in enumerate(row[1:len(columns)])}
        #for idx in range(1, len(row) // len(columns)):
        #print(row[0], row[1:]
        rows = block.find_all("div", {"class": "D(tbr)"})
        breakdown = rows[0].find("div", {"class": "D(tbc)"}).text
        #print(breakdown)
        table[breakdown] = {
            key: convert_to_number(val.text) for key, val in zip(columns[1:], rows[0].find_all("div", {"class": "Ta(c)"}))
            if key != "ttm"
        }
        for row in rows[1:]:
            sub_breakdown = row.find("div", {"class": "D(tbc)"}).text
            table[breakdown][sub_breakdown] = {
                key: convert_to_number(val.text) for key, val in zip(columns[1:], row.find_all("div", {"class": "Ta(c)"}))
                if key != "ttm"
            }
    return table

In [None]:
target_urls = [
    "https://finance.yahoo.com/quote/{code}/financials?p={code}",
    "https://finance.yahoo.com/quote/{code}/balance-sheet?p={code}",
    "https://finance.yahoo.com/quote/{code}/cash-flow?p={code}"
]
code = "NVDA"

table = {
    "Quarterly":  {},
    "Annual": {}
}
for url in target_urls:
    driver.get(url.format(code=code))
    buttons = [button for button in driver.find_elements(By.TAG_NAME, "button") if button.text == "Expand All"]
    if buttons:
        buttons[0].click()
        time.sleep(1)

    # quqrterly data
    buttons = [button for button in driver.find_elements(By.TAG_NAME, "button") if button.text == "Quarterly"]
    if buttons:
        buttons[0].click()
        time.sleep(1)
    source = BeautifulSoup(driver.page_source, parser="lxml")
    table["Quarterly"].update(extract_table_data(source))

    # annual data
    buttons = [button for button in driver.find_elements(By.TAG_NAME, "button") if button.text == "Annual"]
    if buttons:
        buttons[0].click()
        time.sleep(1)
    source = BeautifulSoup(driver.page_source, parser="lxml")
    table["Annual"].update(extract_table_data(source))


In [None]:
def strptime(date_str: str) -> datetime.datetime:
    splits = date_str.split("/")
    formatted = "{:0>2}/{:0>2}/{}".format(*splits)
    return datetime.datetime.strptime(formatted, "%m/%d/%Y")

def calc_grow_rate(before: float, after: float) -> float:
    return (after - before) / before * 100

stats_table = table

#def is_good_stats(stats_table: dict) -> bool:
quarter = stats_table["Quarterly"]
annual = stats_table["Annual"]

is_good = True
# EPSの伸びが2四半期連続で2桁
basic_epss = sorted([[strptime(key), val] for key, val in quarter["Basic EPS"].items()], key=lambda x: x[0])
if calc_grow_rate(basic_epss[-2][1], basic_epss[-1][1]) < 10.0 or calc_grow_rate(basic_epss[-3][1], basic_epss[-2][1]) < 10.0:
    is_good = False
# 売上高が２四半期連続で上昇
operating_revenues = sorted([[strptime(key), val] for key, val in quarter["Total Revenue"]["Operating Revenue"].items()], key=lambda x: x[0])
if operating_revenues[-1][1] < operating_revenues[-2][1] or operating_revenues[-2][1] < operating_revenues[-3][1]:
    is_good = False

# (粗)利益率が2四半期連続で上昇
""" gross_profits = sorted([[strptime(key), val] for key, val in quarter["Gross Profit"].items()], key=lambda x: x[0])
assert operating_revenues[-1][0] == gross_profits[-1][0]
assert operating_revenues[-2][0] == gross_profits[-2][0]
assert operating_revenues[-3][0] == gross_profits[-3][0]
gross_profit_rates = [gross_profits[i][0] / operating_revenues[i][0] for i in [-1, -2, -3]]
if gross_profit_rates[0] < gross_profit_rates[1] or gross_profit_rates[1] < gross_profit_rates[2]:
    is_good = False """

In [None]:
json_path = stock.DATA_DIR / "financials" / f"{code}.json"

if json_path.exists():
    with open(json_path, "r") as f:
        table.update(json.load(f))

with open(json_path, "w") as f:
    json.dump(table, f, indent=4)

In [None]:
8288 / 6704