In [6]:
import requests
import typing as t
from bs4 import BeautifulSoup
from datetime import datetime
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import pandas as pd

In [3]:
URL = 'https://finance.naver.com/item/sise_day.nhn'
HEADERS = {
    'User-Agent':
    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4501.0 Safari/537.36 Edg/92.0.891.1'
}


In [5]:
def session():
    s = requests.session()
    r = Retry(total=5,
              backoff_factor=0.2,
              status_forcelist=[413, 429, 500, 502, 503, 504])
    a = HTTPAdapter(max_retries=r)
    s.mount('http://', a)
    s.mount('https://', a)
    return s
    
s = session()


In [17]:
def partition(line: str, n: int) -> t.Generator[list[str], None, None]:
    for i in range(0, len(line), n):
        yield line[i:i + n]

def parse(text: str) -> list[dict]:
    bs = BeautifulSoup(text, 'html.parser')

    values = [span.text for span in bs.findAll('span', class_='tah')]
    values = list(map(lambda v: v.strip().replace(',', ''), values))
    values = [int(v) if v.isnumeric() else v for v in values]

    # ['date', 'close', 'delta', 'open', 'high', 'low', 'volume']
    for row in partition(values, 7):
        return {
            'date': datetime.strptime(row[0], '%Y.%m.%d'),
            'open': row[3],
            'high': row[4],
            'low': row[5],
            'close': row[1],
            'volume': row[6],
        }

In [34]:
def partition(line: str, n: int) -> t.Generator[list[str], None, None]:
    for i in range(0, len(line), n):
        yield line[i:i + n]

def parse(text: str) -> t.Generator[list[dict], None, None]:
    bs = BeautifulSoup(text, 'html.parser')

    values = [span.text for span in bs.findAll('span', class_='tah')]
    values = list(map(lambda v: v.strip().replace(',', ''), values))
    values = [int(v) if v.isnumeric() else v for v in values]

    # ['date', 'close', 'delta', 'open', 'high', 'low', 'volume']
    for row in partition(values, 7):
        d = datetime.strptime(row[0], '%Y.%m.%d')
        yield {
            'date': row[0],
            'delta': (datetime.today() - d).days,
        }

In [25]:
symbol = '372330'
date_from = '2020-12-17'
page = 1

r = s.get(URL, 
          params={'code': symbol, 'page': page}, 
          headers=HEADERS)

In [33]:
parse(r.text, page)

{'date': '2022.02.15', 'delta': 0, 'page': 1}

In [61]:
symbol = '015760'

result = []
for page in range(1, 643):
    r = s.get(URL, 
            params={'code': symbol, 'page': page}, 
            headers=HEADERS)

    for item in parse(r.text):
        item['page'] = page         
        result.append(item)

In [None]:
for item in result:
    date, delta, page = item.values()
    print(f'{date}\t{delta}\t{page}')

In [96]:
import typing

def calc_page(target_date: typing.Union[datetime, str]) -> int:
    if isinstance(str, target_date):
        target_date = datetime.fromisoformat(target_date)

    Y1 = -0.329523068666276
    M = 0.0677815661631966
    delta = (datetime.today() - target_date).days
    
    return max(1, round(delta * M + Y1))


In [97]:
assert 1 == calc_page('2022-02-15')
assert 100 == calc_page('2018-02-02')
assert 200 == calc_page('2014-01-10')
assert 300 == calc_page('2010-01-04')
assert 400 == calc_page('2005-12-22')
assert 500 == calc_page('2001-12-05')
assert 600 == calc_page('1997-11-22')


TypeError: isinstance() arg 2 must be a type, a tuple of types, or a union