In [3]:
import re
from datetime import datetime
import pandas as pd
import requests
from bs4 import BeautifulSoup

'''
get_date_str(s) - 문자열 s 에서 "YYYY/MM" 문자열 추출
'''
def get_date_str(s):
    date_str = ''
    r = re.search("\d{4}/\d{2}", s)
    if r:
        date_str = r.group()
        date_str = date_str.replace('/', '-')

    return date_str

'''
* code: 종목코드
* fin_type = '0': 재무제표 종류 (0: 주재무제표, 1: GAAP개별, 2: GAAP연결, 3: IFRS별도, 4:IFRS연결)
* freq_type = 'Y': 기간 (Y:년, Q:분기)
headers -- [['매출액', '영업이익', '세전계속사업이익', '당기순이익', '당기순이익(지배)', '당기순이익(비지배)', '자산총계',
  '부채총계', '자본총계', '자본총계(지배)', '자본총계(비지배)', '자본금', '영업활동현금흐름', '투자활동현금흐름', '재무활동현금흐름', 
  'CAPEX', 'FCF', '이자발생부채', '영업이익률', '순이익률', 'ROE(%)', 'ROA(%)', '부채비율', '자본유보율', 'EPS(원)', 'PER(배)', 
  'BPS(원)', 'PBR(배)', '현금DPS(원)', '현금배당수익률', '현금배당성향(%)', '발행주식수(보통주)']]

'''
def get_finstate_naver(code, fin_type='0', freq_type='Y'):
    url_tmpl = 'http://companyinfo.stock.naver.com/v1/company/ajax/cF1001.aspx?' \
                   'cmp_cd=%s&fin_typ=%s&freq_typ=%s'

    url = url_tmpl % (code, fin_type, freq_type)
    #print(url)

    dfs = pd.read_html(url, encoding="utf-8", flavor='html5lib')
    df = dfs[0]
    if df.ix[0,0].find('해당 데이터가 존재하지 않습니다') >= 0:
        return None

    df.rename(columns={'주요재무정보':'date'}, inplace=True)
    df.set_index('date', inplace=True)

    cols = list(df.columns)
    if '연간' in cols: cols.remove('연간')
    if '분기' in cols: cols.remove('분기')
    cols = [get_date_str(x) for x in cols]
    df = df.ix[:, :-1]
    df.columns = cols
    dft = df.T
    dft.index = pd.to_datetime(dft.index)

    # remove if index is NaT
    dft = dft[pd.notnull(dft.index)]
    return dft

In [None]:
import pymysql
import configparser

cf = configparser.ConfigParser()
cf.read('config.cfg')
                               
DB_IP = cf.get('db', 'DB_IP')
DB_USER = cf.get('db', 'DB_USER')
DB_PWD = cf.get('db', 'DB_PWD')
DB_SCH = cf.get('db', 'DB_SCH')

conn = pymysql.connect(host=DB_IP, user=DB_USER, password=DB_PWD, db=DB_SCH, charset='utf8mb4')
def get_codes():
    query = "SELECT DISTINCT code FROM data.daily_stock"
    cursor = conn.cursor()
    cursor.execute(query)
    return cursor.fetchall()



#for code in get_codes():
#    df = get_finstate_naver(code[0])
    
    ## something to do.

In [6]:
# 삼성전자 (년간, IFRS연결)
df = get_finstate_naver('005930')
headers = ['매출액', '영업이익', '세전계속사업이익', '당기순이익', '당기순이익(지배)', '당기순이익(비지배)', '자산총계',
  '부채총계', '자본총계', '자본총계(지배)', '자본총계(비지배)', '자본금', '영업활동현금흐름', '투자활동현금흐름', '재무활동현금흐름', 
  'CAPEX', 'FCF', '이자발생부채', '영업이익률', '순이익률', 'ROE(%)', 'ROA(%)', '부채비율', '자본유보율', 'EPS(원)', 'PER(배)', 
  'BPS(원)', 'PBR(배)', '현금DPS(원)', '현금배당수익률', '현금배당성향(%)', '발행주식수(보통주)']
df[headers]
for header in headers:
    print(header, df[header],'\n')

매출액 2012-12-01    2011036.0
2013-12-01    2286927.0
2014-12-01    2062060.0
2015-12-01    2006535.0
2016-12-01    2018667.0
2017-12-01    2205512.0
2018-12-01    2330379.0
2019-12-01    2360945.0
Name: 매출액, dtype: float64
영업이익 2012-12-01    290493.0
2013-12-01    367850.0
2014-12-01    250251.0
2015-12-01    264134.0
2016-12-01    292407.0
2017-12-01    411300.0
2018-12-01    437803.0
2019-12-01    479730.0
Name: 영업이익, dtype: float64
세전계속사업이익 2012-12-01    299150.0
2013-12-01    383643.0
2014-12-01    278750.0
2015-12-01    259610.0
2016-12-01    307137.0
2017-12-01    424113.0
2018-12-01    452598.0
2019-12-01         NaN
Name: 세전계속사업이익, dtype: float64
당기순이익 2012-12-01    238453.0
2013-12-01    304748.0
2014-12-01    233944.0
2015-12-01    190601.0
2016-12-01    227261.0
2017-12-01    314572.0
2018-12-01    335585.0
2019-12-01    373760.0
Name: 당기순이익, dtype: float64
당기순이익(지배) 2012-12-01    231854.0
2013-12-01    298212.0
2014-12-01    230825.0
2015-12-01    186946.0
2016-12-01    2241

In [None]:
#df = get_finstate_naver('035720') # 셀트리온 068270 
df = get_finstate_naver('035720') # 카카오 035720
#df = get_finstate_naver('035720') # CJ E&M 130960
#df = get_finstate_naver('035720') # 메디톡스 086900

df[['ROE(%)', 'ROA(%)', '부채비율', '자본유보율', 'EPS(원)', 'PER(배)', 'BPS(원)', 'PBR(배)']]