<a href="https://colab.research.google.com/github/yeonghun00/stock_public/blob/main/crawler/KOTC_crawler.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

class KOTC:
  def __init__(self, code):
    self.code = code
    self.headers = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'}

  def get_summary(self):
    url = 'http://kotc.kisline.com/highlight/mainHighlight.nice?paper_stock=' + str(self.code) + '&nav=1'
    result = requests.get(url)
    bs_obj = BeautifulSoup(result.content, "html.parser")

    summary = bs_obj.find_all('table', {'class':'list_b1', 'summary':'기업소개'})[0].find_all('li')
    summary = ' '.join([x.text for x in summary])
    return summary

  def get_share_distribution(self):
    url = 'http://kotc.kisline.com/compinfo/mainCompinfo.nice?paper_stock=' + str(self.code) + '&nav=2'
    result = requests.get(url)
    bs_obj = BeautifulSoup(result.content, "html.parser")
    table = bs_obj.find_all('table', {'class':'list_a0', 'summary':'부유지분, 관계, 기업명, 그룹명, 대표자, 대표전화, 홈페이지, 주거래은행, 주소'})[0]

    columns = [x.get_text() for x in table.find_all('th')]
    elements = [x.text for x in table.find_all('td')]
    n = len(columns)
    elements_li = [elements[i:i+n] for i in range(0, len(elements), n)]

    df = pd.DataFrame(elements_li, columns=columns)
    df['주식수'] = [int(x[:-1].replace(',','')) for x in df['주식수']]
    df['지분율'] = [float(x[:-1])/100 for x in df['지분율']]
    return df

  def get_stocks_change(self):
    url = 'http://kotc.kisline.com/compinfo/mainCompinfo.nice?paper_stock=' + str(self.code)+ '&nav=2'
    result = requests.get(url)
    bs_obj = BeautifulSoup(result.content, "html.parser")
    table = bs_obj.find_all('table', {'class':'list_b1', 'summary':'기업명, 설립일자, 기준일, 매출액, 순이익, 자본금, 지주비율'})[0]

    columns = [x.get_text() for x in table.find_all('th')][:6]
    index = [x.get_text() for x in table.find_all('th')][6:]
    elements = [x.text for x in table.find_all('td')]
    n = len(columns)-1
    elements_li = [elements[i:i+n] for i in range(0, len(elements), n)]
    df = pd.DataFrame(elements_li, columns=columns[1:], index=index)
    df['변동주식수'] = [int(x.replace(',','')) for x in df['변동주식수']]
    df['변동후주식수'] = [int(x.replace(',','')) for x in df['변동후주식수']]
    df['변동후자본금'] = [int(x.replace(',','')) for x in df['변동후자본금']]
    df['액면가'] = [int(x.replace(',','')) for x in df['액면가']]
    return df
    
  def get_annual(self):
    url = 'http://kotc.kisline.com/highlight/mainHighlight.nice?paper_stock=' + str(self.code) + '&nav=1'
    result = requests.get(url)
    bs_obj = BeautifulSoup(result.content, "html.parser")
    annual = bs_obj.find_all('table', {'class':'list_b1', 'summary':'매출액'})[0]

    columns = [x.text for x in annual.find_all('th')[3:7]]
    index = [x.text for x in annual.find_all('th')[7:]]
    elements = [x.text for x in annual.find_all('td')]
    n = len(columns)
    elements_li = [elements[i:i+n] for i in range(0, len(elements), n)]
    df = pd.DataFrame(elements_li, columns=columns, index=index)
    for c in df.columns:
      df[c] = [float(x.replace(',','')) for x in df[c]]
    return df

  def get_quarter(self):
    url = 'http://kotc.kisline.com/highlight/mainHighlight.nice?paper_stock=' + str(self.code) + '&nav=1'
    result = requests.get(url)
    bs_obj = BeautifulSoup(result.content, "html.parser")
    annual = bs_obj.find_all('table', {'class':'list_b1', 'summary':'매출액'})[0]
    quarter = bs_obj.find_all('table', {'class':'list_b1', 'summary':'매출액'})[1]

    columns = [x.text for x in quarter.find_all('th')[1:7]]
    index = [x.text for x in annual.find_all('th')[7:]]
    elements = [x.text for x in quarter.find_all('td')]
    n = len(columns)
    elements_li = [elements[i:i+n] for i in range(0, len(elements), n)]
    df = pd.DataFrame(elements_li, columns=columns, index=index)
    for c in df.columns:
      try: 
        df[c] = [float(x.replace(',','')) for x in df[c]]
      except:
        df[c] = df[c]
    return df

In [2]:
kotc = KOTC('298420')
kotc.get_share_distribution()

Unnamed: 0,주주명,관계,주식수,지분율
0,넷마블(주),최대주주,10020000,0.7965
1,권영식,등기임원,395000,0.0314
2,도기욱,계열회사 임원,20000,0.0016
3,하상룡,계열회사 임원,1173,0.0001
4,김준성,계열회사 임원,907,0.0001


In [3]:
kotc.get_stocks_change()

Unnamed: 0,변동사유,변동주식수,변동후주식수,변동후자본금,액면가
2021.07.15,스톡옵션행사,3290,63770795,6377079,100
2021.07.14,주식분할,51014004,63767505,6376750,100
2021.06.15,스톡옵션행사,2534,12753501,6376750,500
2021.05.31,스톡옵션행사,46683,12750967,6375483,500
2021.04.16,스톡옵션행사,21428,12704284,6352142,500
2021.03.16,스톡옵션행사,16185,12682856,6341428,500
2021.02.16,스톡옵션행사,76513,12666671,6333335,500
2021.01.19,스톡옵션행사,9908,12590158,6295079,500
2020.12.16,스톡옵션행사,9890,12580250,6290125,500
2020.11.16,주식매수선택권행사,89863,12570360,6285180,500


In [4]:
kotc.get_annual()

Unnamed: 0,2018.12,2019.12,2020.12,2021.03
매출액(억원),1449.0,1058.0,881.0,140.0
영업이익(억원),957.0,551.0,432.0,21.0
영업이익율(%),66.05,52.1,49.03,14.66
당기순이익(억원),728.0,473.0,386.0,33.0
순이익율(%),50.24,44.7,43.85,23.91
자산총계(억원),3408.0,3780.0,4108.0,4272.0
부채총계(억원),371.0,216.0,138.0,3724.0
자본총계(억원),3037.0,3564.0,3971.0,547.0
유보율(%),4783.9,5623.28,6212.51,762.75
ROE,27.51,14.33,10.26,5.93


In [5]:
kotc.get_quarter()

Unnamed: 0,2020.03,2020.06,2020.09,2020.12,2021.03
매출액(억원),305.0,202.0,205.0,169.0,140.0
영업이익(억원),197.0,105.0,100.0,30.0,21.0
영업이익율(%),64.53,52.09,48.83,17.72,14.66
당기순이익(억원),153.0,97.0,88.0,48.0,34.0
순이익율(%),50.35,48.03,42.99,28.19,23.91
자산총계(억원),3941.0,3999.0,4090.0,4108.0,4272.0
부채총계(억원),214.0,171.0,172.0,138.0,3724.0
자본총계(억원),3727.0,3828.0,3918.0,3971.0,547.0
유보율(%),5883.72,6038.83,6180.97,6212.51,762.75
ROE,16.83,10.3,9.1,4.84,5.93
