In [13]:
import re
from datetime import datetime
import pandas as pd
import requests
from bs4 import BeautifulSoup

'''
get_date_str(s) - 문자열 s 에서 "YYYY/MM" 문자열 추출
'''
def get_date_str(s):
    date_str = ''
    r = re.search("\d{4}/\d{2}", s)
    if r:
        date_str = r.group()
        date_str = date_str.replace('/', '-')

    return date_str

'''
* code: 종목코드
* fin_type = '0': 재무제표 종류 (0: 주재무제표, 1: GAAP개별, 2: GAAP연결, 3: IFRS별도, 4:IFRS연결)
* freq_type = 'Y': 기간 (Y:년, Q:분기)
headers -- 매출액, 영업이익, 세전계속사업이익, 당기순이익, 당기순이익(지배), 당기순이익(비지배), 자산총계, 부채총계, 자본총계, 자본총계(지배), 자본총계(비지배), 자본금, 영업활동현금흐름, 투자활동현금흐름, 재무활동현금흐름, CAPEX, FCF, 이자발생부채, 영업이익률, 순이익률, ROE(%), ROA(%), 부채비율, 자본유보율, EPS(원), PER(배), BPS(원), PBR(배), 현금DPS(원), 현금배당수익률, 현금배당성향(%), 발행주식수(보통주)

'''
def get_finstate_naver(code, fin_type='0', freq_type='Y'):
    url_tmpl = 'http://companyinfo.stock.naver.com/v1/company/ajax/cF1001.aspx?' \
                   'cmp_cd=%s&fin_typ=%s&freq_typ=%s'

    url = url_tmpl % (code, fin_type, freq_type)
    #print(url)

    dfs = pd.read_html(url, encoding="utf-8", flavor='html5lib')
    df = dfs[0]
    if df.ix[0,0].find('해당 데이터가 존재하지 않습니다') >= 0:
        return None

    df.rename(columns={'주요재무정보':'date'}, inplace=True)
    df.set_index('date', inplace=True)

    cols = list(df.columns)
    if '연간' in cols: cols.remove('연간')
    if '분기' in cols: cols.remove('분기')
    cols = [get_date_str(x) for x in cols]
    df = df.ix[:, :-1]
    df.columns = cols
    dft = df.T
    dft.index = pd.to_datetime(dft.index)

    # remove if index is NaT
    dft = dft[pd.notnull(dft.index)]
    return dft

In [None]:
import pymysql
import configparser

cf = configparser.ConfigParser()
cf.read('config.cfg')
                               
DB_IP = cf.get('db', 'DB_IP')
DB_USER = cf.get('db', 'DB_USER')
DB_PWD = cf.get('db', 'DB_PWD')
DB_SCH = cf.get('db', 'DB_SCH')

conn = pymysql.connect(host=DB_IP, user=DB_USER, password=DB_PWD, db=DB_SCH, charset='utf8mb4')
def get_codes():
    query = "SELECT DISTINCT code FROM data.daily_stock"
    cursor = conn.cursor()
    cursor.execute(query)



for code in get_codes():
    df = get_finstate_naver(code[0])
    df[['매출액', '영업이익', '세전계속사업이익', '당기순이익', '당기순이익(지배)', '당기순이익(비지배)', '자산총계',
  '부채총계', '자본총계', '자본총계(지배)', '자본총계(비지배)', '자본금', '영업활동현금흐름', '투자활동현금흐름', '재무활동현금흐름', 
  'CAPEX', 'FCF', '이자발생부채', '영업이익률', '순이익률', 'ROE(%)', 'ROA(%)', '부채비율', '자본유보율', 'EPS(원)', 'PER(배)', 
  'BPS(원)', 'PBR(배)', '현금DPS(원)', '현금배당수익률', '현금배당성향(%)', '발행주식수(보통주)']]
    ## something to do.

In [14]:
# 삼성전자 (년간, IFRS연결)
df = get_finstate_naver('005930')
df[['매출액','영업이익', '당기순이익', '영업활동현금흐름', '순이익률']]

date,매출액,영업이익,당기순이익,영업활동현금흐름,순이익률
2011-12-01,1650018.0,156443.0,137590.0,229179.0,8.34
2012-12-01,2011036.0,290493.0,238453.0,379728.0,11.86
2013-12-01,2286927.0,367850.0,304748.0,467074.0,13.33
2014-12-01,2062060.0,250251.0,233944.0,369754.0,11.34
2015-12-01,2006535.0,264134.0,190601.0,400618.0,9.5
2016-12-01,2004675.0,282505.0,220057.0,455796.0,10.98
2017-12-01,2208352.0,411526.0,314203.0,516450.0,14.23
2018-12-01,2331110.0,438342.0,335387.0,557257.0,14.39


In [15]:
#df = get_finstate_naver('035720') # 셀트리온 068270 
df = get_finstate_naver('035720') # 카카오 035720
#df = get_finstate_naver('035720') # CJ E&M 130960
#df = get_finstate_naver('035720') # 메디톡스 086900

df[['ROE(%)', 'ROA(%)', '부채비율', '자본유보율', 'EPS(원)', 'PER(배)', 'BPS(원)', 'PBR(배)']]

date,ROE(%),ROA(%),부채비율,자본유보율,EPS(원),PER(배),BPS(원),PBR(배)
2011-12-01,26.86,21.45,22.46,6533.61,8079.0,14.85,34031.0,3.53
2012-12-01,15.7,13.09,17.68,7376.32,5656.0,16.11,38158.0,2.39
2013-12-01,17.73,14.81,22.53,728.3,4532.0,18.54,13398.0,6.27
2014-12-01,11.41,10.04,12.37,8411.95,6116.0,20.21,42337.0,2.92
2015-12-01,3.02,2.65,23.32,8407.52,1269.0,91.24,42476.0,2.73
2016-12-01,1.87,1.49,48.06,,828.0,94.18,48994.0,1.59
2017-12-01,3.43,2.53,45.85,,1719.0,51.25,51292.0,1.72
2018-12-01,4.53,3.36,44.74,,2375.0,37.09,53621.0,1.64
