In [1]:
import sys
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import pandas as pd

In [2]:
URL = 'https://finance.naver.com/item/sise_day.nhn'
HEADERS = {
    'User-Agent':
    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4501.0 Safari/537.36 Edg/92.0.891.1'
}


In [24]:
def parse(bs: BeautifulSoup) -> dict:
    # ['date', 'close', 'delta', 'open', 'high', 'low', 'volume']
    clean = lambda s: s.strip().replace(',', '').replace('.', '-')
    values = [span.text for span in bs.findAll('span', class_='tah')]
    values = list(map(clean, values))

    def partition(line, n):
        for i in range(0, len(line), n):
            yield line[i:i + n]

    for row in partition(values, 7):
        yield {
            'Date': row[0],
            'Open': row[3],
            'High': row[4],
            'Low': row[5],
            'Close': row[1],
            'Adj Close': row[1],
            'Volume': row[6],
        }

In [4]:
symbol = '372330'
date_from = '2020-12-17'

class OutOfPeriod(Exception):
    pass

In [5]:
def session():
    s = requests.session()
    r = Retry(total=5,
              backoff_factor=0.2,
              status_forcelist=[413, 429, 500, 502, 503, 504])
    a = HTTPAdapter(max_retries=r)
    s.mount('http://', a)
    s.mount('https://', a)
    return s
    
s = session()
r = s.get(URL, 
          params={'code': symbol, 'page': 1}, 
          headers=HEADERS)


In [None]:
r.text

In [7]:
bs = BeautifulSoup(r.text, 'html.parser')

if bs.find('td', class_='pgRR') is None:
    print('[ERROR] Invalid symbol: {}'.format(symbol), file=sys.stderr)
    raise RuntimeError('No data found with symbol')

In [8]:
bs = BeautifulSoup(r.text, 'html.parser')
if bs.find('span', class_='tah').text == '':
    raise FileNotFoundError()

In [None]:
bs.findAll('span', class_='tah')

In [None]:
l = []
for r in parse(bs):
    if date_from > r['Date']:
        raise OutOfPeriod()

    l.append(r)

df = pd.DataFrame(l)
df.set_index('Date')
