<a href="https://colab.research.google.com/github/yeonghun00/stock_public/blob/main/crawler/KOTC_crawler.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

class KOTC:
  def __init__(self, code):
    self.code = code
    self.headers = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'}

  def get_summary(self):
    url = 'http://kotc.kisline.com/highlight/mainHighlight.nice?paper_stock=' + str(self.code) + '&nav=1'
    result = requests.get(url)
    bs_obj = BeautifulSoup(result.content, "html.parser")

    summary = bs_obj.find_all('table', {'class':'list_b1', 'summary':'기업소개'})[0].find_all('li')
    summary = ' '.join([x.text for x in summary])
    return summary

  def get_share_distribution(self):
    url = 'http://kotc.kisline.com/compinfo/mainCompinfo.nice?paper_stock=' + str(self.code) + '&nav=2'
    result = requests.get(url)
    bs_obj = BeautifulSoup(result.content, "html.parser")
    table = bs_obj.find_all('table', {'class':'list_a0', 'summary':'부유지분, 관계, 기업명, 그룹명, 대표자, 대표전화, 홈페이지, 주거래은행, 주소'})[0]

    columns = [x.get_text() for x in table.find_all('th')]
    elements = [x.text for x in table.find_all('td')]
    n = len(columns)
    elements_li = [elements[i:i+n] for i in range(0, len(elements), n)]

    df = pd.DataFrame(elements_li, columns=columns)
    df['주식수'] = [int(x[:-1].replace(',','')) for x in df['주식수']]
    df['지분율'] = [float(x[:-1])/100 for x in df['지분율']]
    return df

  def get_stocks_change(self):
    url = 'http://kotc.kisline.com/compinfo/mainCompinfo.nice?paper_stock=' + str(self.code)+ '&nav=2'
    result = requests.get(url)
    bs_obj = BeautifulSoup(result.content, "html.parser")
    table = bs_obj.find_all('table', {'class':'list_b1', 'summary':'기업명, 설립일자, 기준일, 매출액, 순이익, 자본금, 지주비율'})[0]

    columns = [x.get_text() for x in table.find_all('th')][:6]
    index = [x.get_text() for x in table.find_all('th')][6:]
    elements = [x.text for x in table.find_all('td')]
    n = len(columns)-1
    elements_li = [elements[i:i+n] for i in range(0, len(elements), n)]
    df = pd.DataFrame(elements_li, columns=columns[1:], index=index)
    df['변동주식수'] = [int(x.replace(',','')) for x in df['변동주식수']]
    df['변동후주식수'] = [int(x.replace(',','')) for x in df['변동후주식수']]
    df['변동후자본금'] = [int(x.replace(',','')) for x in df['변동후자본금']]
    df['액면가'] = [int(x.replace(',','')) for x in df['액면가']]
    return df
    
  def get_annual(self):
    url = 'http://kotc.kisline.com/highlight/mainHighlight.nice?paper_stock=' + str(self.code) + '&nav=1'
    result = requests.get(url)
    bs_obj = BeautifulSoup(result.content, "html.parser")
    annual = bs_obj.find_all('table', {'class':'list_b1', 'summary':'매출액'})[0]

    columns = [x.text for x in annual.find_all('th')[3:7]]
    index = [x.text for x in annual.find_all('th')[7:]]
    elements = [x.text for x in annual.find_all('td')]
    n = len(columns)
    elements_li = [elements[i:i+n] for i in range(0, len(elements), n)]
    df = pd.DataFrame(elements_li, columns=columns, index=index)
    for c in df.columns:
      df[c] = [float(x.replace(',','')) for x in df[c]]
    return df

  def get_quarter(self):
    url = 'http://kotc.kisline.com/highlight/mainHighlight.nice?paper_stock=' + str(self.code) + '&nav=1'
    result = requests.get(url)
    bs_obj = BeautifulSoup(result.content, "html.parser")
    annual = bs_obj.find_all('table', {'class':'list_b1', 'summary':'매출액'})[0]
    quarter = bs_obj.find_all('table', {'class':'list_b1', 'summary':'매출액'})[1]

    columns = [x.text for x in quarter.find_all('th')[1:7]]
    index = [x.text for x in annual.find_all('th')[7:]]
    elements = [x.text for x in quarter.find_all('td')]
    n = len(columns)
    elements_li = [elements[i:i+n] for i in range(0, len(elements), n)]
    df = pd.DataFrame(elements_li, columns=columns, index=index)
    for c in df.columns:
      try: 
        df[c] = [float(x.replace(',','')) for x in df[c]]
      except:
        df[c] = df[c]
    return df

In [3]:
kotc = KOTC('066400')
kotc.get_share_distribution()

Unnamed: 0,주주명,관계,주식수,지분율
0,(주)하나투어,모회사,906981,0.3023


In [4]:
kotc.get_stocks_change()

Unnamed: 0,변동사유,변동주식수,변동후주식수,변동후자본금,액면가
2014.08.25,등록/지정,3000000,3000000,1500000,500


In [5]:
kotc.get_annual()

Unnamed: 0,2018.12,2019.12,2020.12,2021.03
매출액(억원),150.0,149.0,33.0,3.0
영업이익(억원),11.0,11.0,-19.0,-5.0
영업이익율(%),7.06,7.13,-57.27,-145.5
당기순이익(억원),9.0,9.0,-16.0,-5.0
순이익율(%),5.82,5.83,-48.52,-146.37
자산총계(억원),66.0,71.0,48.0,44.0
부채총계(억원),18.0,19.0,14.0,15.0
자본총계(억원),48.0,52.0,33.0,29.0
유보율(%),217.22,247.0,121.44,90.76
ROE,19.35,17.4,-37.14,-59.54


In [6]:
kotc.get_quarter()

Unnamed: 0,2020.03,2020.06,2020.09,2020.12,2021.03
매출액(억원),24.0,4.0,2.0,3.0,3.0
영업이익(억원),-2.0,-7.0,-4.0,-6.0,-5.0
영업이익율(%),-9.17,-180.17,-195.17,-195.35,-145.5
당기순이익(억원),-2.0,-6.0,-4.0,-4.0,-5.0
순이익율(%),-8.86,-148.25,-212.89,-126.99,-146.37
자산총계(억원),69.0,60.0,55.0,48.0,44.0
부채총계(억원),23.0,19.0,18.0,14.0,15.0
자본총계(억원),47.0,41.0,37.0,33.0,29.0
유보율(%),212.89,175.41,145.53,121.44,90.76
ROE,-17.1,-50.98,-45.88,-41.27,-59.54


In [None]:
from scipy.stats.mstats import gmean

def get_growth_score(li):
  return gmean(li)

In [20]:
stock_str = '066400134610104770104230003340123900068750201150237010079180298420074750227680015150026970252980140320008340012760005900105700013900035430043500366130387530040830328270050480316640078060102600250900080640276620280140274460168570002580070080018360066190279000015200042380179690268030227580066290070480012580002470073960032040037340041710192230293290323530049410011400102950043720129050114290224850036150037890001890141110068420190410011870008120046290206300184560038520036220114750110030101970063180010090381530013090140310090120104690065050037640166220087800040100049500031690020920094930099620038710047730363230061610072610025100045020060910084820130480049130030010054190241140012350075120012670036840'

In [21]:
codes = [stock_str[i:i+6] for i in range(0, len(stock_str),6)]

In [25]:
annual_dic = {}

for code in codes:
  kotc = KOTC(code)
  try:
    annual_dic[code] = kotc.get_annual().loc['영업이익율(%)']/100 + 1
  except:
    pass

In [28]:
new_d = {}

for key in annual_dic:
  try:
    new_d[key] = get_growth_score(annual_dic[key])
  except:
    pass

  log_a = np.log(np.array(a, dtype=dtype))


In [36]:
sorted(new_d.items(), key=lambda x: x[1], reverse=True)

[('066400', nan),
 ('134610', nan),
 ('237010', nan),
 ('298420', 1.441330023366001),
 ('050480', 1.2431943595114823),
 ('079180', 1.1951530089135038),
 ('102600', 1.154119200660003),
 ('123900', 1.1492013867468494),
 ('005900', 1.1193181232100706),
 ('026970', 1.0907446526801867),
 ('104770', 1.0803572569087148),
 ('201150', 1.0606162093572555),
 ('008340', 1.036509094516454),
 ('387530', 1.0360407263494176),
 ('035430', 1.0289838530954718),
 ('250900', nan),
 ('276620', nan),
 ('280140', 1.1430302250962923),
 ('074750', 1.1174099869062062),
 ('227680', 1.097737791274216),
 ('274460', nan),
 ('168570', nan),
 ('042380', 1.0892239042800278),
 ('268030', nan),
 ('037340', 1.0880485608655095),
 ('015200', 1.0624807047677487),
 ('080640', 1.050954372182419),
 ('066290', 1.047641656746419),
 ('140320', 1.0386470849639113),
 ('104230', 1.0364680476217814),
 ('018360', 1.028333726831703),
 ('003340', 1.0278129922869947),
 ('279000', 1.0254237184694381),
 ('066190', 1.022463296413268),
 ('013

In [16]:
kotc.get_annual().loc['영업이익율(%)']

2018.12      7.06
2019.12      7.13
2020.12    -57.27
2021.03   -145.50
Name: 영업이익율(%), dtype: float64

In [18]:
kotc.get_annual().loc['영업이익율(%)']/100

2018.12    0.0706
2019.12    0.0713
2020.12   -0.5727
2021.03   -1.4550
Name: 영업이익율(%), dtype: float64

In [19]:
kotc.get_annual().loc['영업이익율(%)']/100 + 1

2018.12    1.0706
2019.12    1.0713
2020.12    0.4273
2021.03   -0.4550
Name: 영업이익율(%), dtype: float64