<a href="https://colab.research.google.com/github/yeonghun00/stock_public/blob/main/crawler/KOTC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

class KOTC:
  def __init__(self, code):
    self.code = code
    self.headers = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'}

  def get_summary(self):
    url = 'http://kotc.kisline.com/highlight/mainHighlight.nice?paper_stock=' + str(self.code) + '&nav=1'
    result = requests.get(url)
    bs_obj = BeautifulSoup(result.content, "html.parser")

    summary = bs_obj.find_all('table', {'class':'list_b1', 'summary':'기업소개'})[0].find_all('li')
    summary = ' '.join([x.text for x in summary])
    return summary

  def get_share_distribution(self):
    url = 'http://kotc.kisline.com/compinfo/mainCompinfo.nice?paper_stock=' + str(self.code) + '&nav=2'
    result = requests.get(url)
    bs_obj = BeautifulSoup(result.content, "html.parser")
    table = bs_obj.find_all('table', {'class':'list_a0', 'summary':'부유지분, 관계, 기업명, 그룹명, 대표자, 대표전화, 홈페이지, 주거래은행, 주소'})[0]

    columns = [x.get_text() for x in table.find_all('th')]
    elements = [x.text for x in table.find_all('td')]
    n = len(columns)
    elements_li = [elements[i:i+n] for i in range(0, len(elements), n)]

    df = pd.DataFrame(elements_li, columns=columns)
    df['주식수'] = [int(x[:-1].replace(',','')) for x in df['주식수']]
    df['지분율'] = [float(x[:-1])/100 for x in df['지분율']]
    return df

  def get_stocks_change(self):
    url = 'http://kotc.kisline.com/compinfo/mainCompinfo.nice?paper_stock=' + str(self.code)+ '&nav=2'
    result = requests.get(url)
    bs_obj = BeautifulSoup(result.content, "html.parser")
    table = bs_obj.find_all('table', {'class':'list_b1', 'summary':'기업명, 설립일자, 기준일, 매출액, 순이익, 자본금, 지주비율'})[0]

    columns = [x.get_text() for x in table.find_all('th')][:6]
    index = [x.get_text() for x in table.find_all('th')][6:]
    elements = [x.text for x in table.find_all('td')]
    n = len(columns)-1
    elements_li = [elements[i:i+n] for i in range(0, len(elements), n)]
    df = pd.DataFrame(elements_li, columns=columns[1:], index=index)
    df['변동주식수'] = [int(x.replace(',','')) for x in df['변동주식수']]
    df['변동후주식수'] = [int(x.replace(',','')) for x in df['변동후주식수']]
    df['변동후자본금'] = [int(x.replace(',','')) for x in df['변동후자본금']]
    df['액면가'] = [int(x.replace(',','')) for x in df['액면가']]
    return df
    
  def get_annual(self):
    url = 'http://kotc.kisline.com/highlight/mainHighlight.nice?paper_stock=' + str(self.code) + '&nav=1'
    result = requests.get(url)
    bs_obj = BeautifulSoup(result.content, "html.parser")
    annual = bs_obj.find_all('table', {'class':'list_b1', 'summary':'매출액'})[0]

    columns = [x.text for x in annual.find_all('th')[3:7]]
    index = [x.text for x in annual.find_all('th')[7:]]
    elements = [x.text for x in annual.find_all('td')]
    n = len(columns)
    elements_li = [elements[i:i+n] for i in range(0, len(elements), n)]
    df = pd.DataFrame(elements_li, columns=columns, index=index)
    for c in df.columns:
      df[c] = [float(x.replace(',','')) for x in df[c]]
    return df

  def get_quarter(self):
    url = 'http://kotc.kisline.com/highlight/mainHighlight.nice?paper_stock=' + str(self.code) + '&nav=1'
    result = requests.get(url)
    bs_obj = BeautifulSoup(result.content, "html.parser")
    annual = bs_obj.find_all('table', {'class':'list_b1', 'summary':'매출액'})[0]
    quarter = bs_obj.find_all('table', {'class':'list_b1', 'summary':'매출액'})[1]

    columns = [x.text for x in quarter.find_all('th')[1:7]]
    index = [x.text for x in annual.find_all('th')[7:]]
    elements = [x.text for x in quarter.find_all('td')]
    n = len(columns)
    elements_li = [elements[i:i+n] for i in range(0, len(elements), n)]
    df = pd.DataFrame(elements_li, columns=columns, index=index)
    for c in df.columns:
      try: 
        df[c] = [float(x.replace(',','')) for x in df[c]]
      except:
        df[c] = df[c]
    return df

In [2]:
kotc = KOTC('066400')
kotc.get_share_distribution()

Unnamed: 0,주주명,관계,주식수,지분율
0,(주)하나투어,모회사,906981,0.3023


In [3]:
kotc.get_stocks_change()

Unnamed: 0,변동사유,변동주식수,변동후주식수,변동후자본금,액면가
2014.08.25,등록/지정,3000000,3000000,1500000,500


In [4]:
kotc.get_annual()

Unnamed: 0,2018.12,2019.12,2020.12,2021.06
매출액(억원),150.0,149.0,33.0,6.0
영업이익(억원),11.0,11.0,-19.0,-9.0
영업이익율(%),7.06,7.13,-57.27,-152.72
당기순이익(억원),9.0,9.0,-16.0,-14.0
순이익율(%),5.82,5.83,-48.52,-236.74
자산총계(억원),66.0,71.0,48.0,32.0
부채총계(억원),18.0,19.0,14.0,13.0
자본총계(억원),48.0,52.0,33.0,19.0
유보율(%),217.22,247.0,121.44,27.27
ROE,19.35,17.4,-37.14,-108.02


In [5]:
kotc.get_quarter()

Unnamed: 0,2020.06,2020.09,2020.12,2021.03,2021.06
매출액(억원),4.0,2.0,3.0,3.0,3.0
영업이익(억원),-7.0,-4.0,-6.0,-5.0,-5.0
영업이익율(%),-180.17,-195.17,-195.35,-145.5,-160.76
당기순이익(억원),-6.0,-4.0,-4.0,-5.0,-10.0
순이익율(%),-148.25,-212.89,-126.99,-146.37,-337.38
자산총계(억원),60.0,55.0,48.0,44.0,32.0
부채총계(억원),19.0,18.0,14.0,15.0,13.0
자본총계(억원),41.0,37.0,33.0,29.0,19.0
유보율(%),175.41,145.53,121.44,90.76,27.27
ROE,-50.98,-45.88,-41.27,-59.54,-159.71
