In [None]:
# us株、仮想通貨の分足データを取得する
from pathlib import Path
import datetime

import polars as pl
import numpy as np
import yfinance as yf
from requests import Session
from requests_cache import CacheMixin, SQLiteCache
from requests_ratelimiter import LimiterMixin, MemoryQueueBucket
from pyrate_limiter import Duration, RequestRate, Limiter

import stock


class CachedLimiterSession(CacheMixin, LimiterMixin, Session):
    pass


session = CachedLimiterSession(
    limiter=Limiter(RequestRate(10, Duration.SECOND)),  # max 2 requests per 1 seconds
    bucket_class=MemoryQueueBucket,
    backend=SQLiteCache("yfinance.cache"),
)

In [None]:
all_codes = stock.get_code_list(include_etf=True)
output_dir = stock.PROJECT_ROOT / "data" / "minutes"

date = datetime.date.today() - datetime.timedelta(days=30)
while date <= datetime.date.today():
    ticker = yf.Ticker(f"{code}.T", session=session)
    df = stock.util.pd_to_pl(
        ticker.history(
            interval="1m",
            start=date.strftime("%Y-%m-%d"),
            end=(date + datetime.timedelta(days=1)).strftime("%Y-%m-%d"),
        )
    )
    if len(df) > 0:
        output_path = (
            output_dir / date.strftime("%Y%m%d") / f"{code}_{date.strftime('%Y%m%d')}.arrow"
        )
        output_path.parent.mkdir(parents=True, exist_ok=True)
        df.write_ipc(output_path)
    date += datetime.timedelta(days=1)

In [None]:
date = datetime.date.today() - datetime.timedelta(days=1)
ticker = yf.Ticker("BTC-JPY", session=session)
df = ticker.history(
    interval="1m",
    start=date.strftime("%Y-%m-%d"),
    end=(date + datetime.timedelta(days=1)).strftime("%Y-%m-%d"),
)


In [None]:
df

In [None]:
import requests
import json

endPoint = 'https://api.coin.z.com/public'
# path     = '/v1/trades?symbol=BTC&page=1&count=10'
path     = '/v1/klines?symbol=ASTR&interval=1min&date=20210415'

#response = requests.get(endPoint + path)
response = session.get(endPoint + path)

In [None]:
from fake_useragent import UserAgent

ua = UserAgent()
headers = {'User-Agent': str(ua.chrome)}

In [None]:
len(response.json()["data"]["rows"])

In [None]:
res = response.json()

In [None]:
df = pl.from_dicts(res["data"]["rows"])

In [None]:
df.write_csv("./tmp/codes.csv")

In [None]:
res["data"]["headers"]

In [None]:
ticker_csv_path = stock.PROJECT_ROOT / "data" / "us_tickers.csv"
df = pl.read_csv(ticker_csv_path)
symbol_list = df["symbol"].to_list()

In [None]:
from tqdm import tqdm

In [None]:
ticker = yf.Ticker(symbol, session)

In [None]:
ticker.history(interval="1m", start=date.strftime("%Y-%m-%d"))

In [None]:
start_day = datetime.date.today() - datetime.timedelta(days=29)
end_day = datetime.date.today()

for symbol in tqdm(symbol_list):
    date = start_day
    while date <= end_day:
        output_path = stock.DATA_DIR / "minutes_yf" / date.strftime("%Y%m%d") / f"{symbol}_{date.strftime('%Y%m%d')}.csv"
        if output_path.exists() or date.weekday() >= 5 or date == datetime.date(2024, 9, 2):  # 収集済み or 土日祝はスキップ
            date += datetime.timedelta(days=1)
            continue
        try:
            df = yf.Ticker(symbol, session=session).history(
                interval="1m", start=date.strftime("%Y-%m-%d"), end=(date + datetime.timedelta(days=1)).strftime("%Y-%m-%d"))
        except:
            date += datetime.timedelta(days=1)
            continue
            
        if len(df) > 0:
            output_path.parent.mkdir(parents=True, exist_ok=True)
            df = stock.util.pd_to_pl(df)
            df.write_csv(output_path)
        date += datetime.timedelta(days=1)

In [None]:
code

In [None]:
len(yf.Ticker(code).history(interval="1m", start="2024-09-04", end="2024-09-05"))

In [None]:
res["data"]

In [None]:
res = response.json()
df = pl.from_dicts(res["data"]).with_columns(
    pl.col("openTime").cast(pl.Float64),
    pl.col("open").cast(pl.Int64),
    pl.col("high").cast(pl.Int64),
    pl.col("low").cast(pl.Int64),
    pl.col("close").cast(pl.Int64),
    pl.col("volume").cast(pl.Float64),
).with_columns(
    (pl.from_epoch("openTime", time_unit="ms") + pl.duration(hours=9)).alias("datetime"),  # JST
)

In [None]:
df.write_csv("./tmp/test.csv")

In [None]:
len(df)

In [None]:
res

In [None]:
df