<a href="https://colab.research.google.com/github/yeonghun00/stock_public/blob/main/crawler/Support.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import datetime

In [2]:
# 관련 업종
headers = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'}

def get_related(code:str='005930'):
  url = 'https://finance.naver.com/item/main.nhn?code=' + code
  result = requests.get(url, headers = headers)
  bs_obj = BeautifulSoup(result.content, "html.parser")
  th = bs_obj.find_all("th", {'scope':"col"})
  th = [x.get_text() for x in th][-12:-7]
  elements = [x[-6:] for x in th]
  return elements

In [3]:
get_related('005930')

['005930', '000660', '058470', '000990', '357780']

In [4]:
# 업종
dic = {'에너지': 'WI100', '화학': 'WI110', '비철금속': 'WI200', '철강': 'WI210', '건설': 'WI220', '기계': 'WI230', '조선': 'WI240', '상가,자본재': 'WI250', \
       '운송': 'WI260', '자동차': 'WI300', '화장품,의류': 'WI310', '호텔,레저': 'WI320', '미디어,교육': 'WI330', '소매(유통)': 'WI340', '필수소비재': 'WI400', \
       '건강관리': 'WI410', '은행': 'WI500', '증권': 'WI510', '보험': 'WI520', '소프트웨어': 'WI600', 'IT하드웨어': 'WI610', '반도체': 'WI620', 'IT가전': 'WI630', \
       '디스플레이': 'WI640', '전기통신서비스': 'WI700', '유틸리티': 'WI800'}

def get_date():
  url = 'https://finance.naver.com/sise/sise_deposit.naver'
  result = requests.get(url, headers = headers)
  bs_obj = BeautifulSoup(result.content, "html.parser")
  return bs_obj.find_all('span', {'class':'tah'})[0].get_text()[-10:]

date = ''.join(get_date().split('.'))

def get_sector(code):
  url = 'https://www.wiseindex.com/Index/GetIndexComponets?ceil_yn=0&dt=' + \
  date + '&sec_cd=' + dic[code]
  result = requests.get(url, headers = headers)
  json_list = result.json() 

  df = pd.DataFrame(columns=['code', 'name', 'sector_name', 'idx_name'])

  for json in json_list['list']:
    code = json['CMP_CD']
    name = json['CMP_KOR']
    sector_name = json['SEC_NM_KOR']
    idx_name = json['IDX_NM_KOR']
    df = df.append({'code':code, 'name':name, 'sector_name':sector_name, 'idx_name':idx_name}, ignore_index=True)

  return df

In [5]:
get_sector('에너지')

Unnamed: 0,code,name,sector_name,idx_name
0,96770,SK이노베이션,에너지,WI26 에너지
1,10950,S-Oil,에너지,WI26 에너지
2,267250,현대중공업지주,에너지,WI26 에너지
3,78930,GS,에너지,WI26 에너지
4,6120,SK디스커버리,에너지,WI26 에너지
5,5090,SGC에너지,에너지,WI26 에너지
6,1390,KG케미칼,에너지,WI26 에너지
7,93230,이아이디,에너지,WI26 에너지
8,2960,한국쉘석유,에너지,WI26 에너지
9,137950,제이씨케미칼,에너지,WI26 에너지


In [6]:
# 검색 상위

def get_searched():
  url = 'https://finance.naver.com/sise/lastsearch2.nhn'
  result = requests.get(url, headers = headers)
  bs_obj = BeautifulSoup(result.content, "html.parser")
  a = bs_obj.find_all('a',{'class', 'tltle'}) # title nono tltle
  return [x['href'][-6:] for x in a]

In [7]:
get_searched()[:5]

['005930', '035720', '000660', '086980', '053290']

In [8]:
# 상승률
# market : 'KOSDAQ', 'KOSPI'
# interval : 'TODAY', 'YESTERDAY', 'DAYS_5', 'DAYS_10', 'DAYS_15', 'DAYS_20', 'DAYS_30', 'DAYS_60', 'DAYS_90', 'DAYS_120', 'DAYS_250'

def get_highest(market='KOSPI', interval='DAYS_250'):
  url = 'https://finance.daum.net/api/trend/price_performance?page=1&perPage=100&intervalType={0}&market={1}&changeType=RISE&pagination=true&order=desc'.format(interval, market)
  headers = {
      'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36',
      'x-requested-with': 'XMLHttpRequest',
      'referer': 'https://finance.daum.net/domestic/rise_stocks?market=KOSDAQ'
      }
  r = requests.get(url, headers = headers)
  return r.json()['data']

In [9]:
import pandas as pd

dic = {}
for i in ['TODAY', 'DAYS_5', 'DAYS_10', 'DAYS_15', 'DAYS_20', 'DAYS_30', 'DAYS_60', 'DAYS_90', 'DAYS_120', 'DAYS_250']:
  data = get_highest('KOSDAQ', i)
  dic[i] = [x['name'] for x in data]
pd.DataFrame(dic)[:10]

Unnamed: 0,TODAY,DAYS_5,DAYS_10,DAYS_15,DAYS_20,DAYS_30,DAYS_60,DAYS_90,DAYS_120,DAYS_250
0,지더블유바이텍,위메이드맥스,에코캡,위메이드맥스,데브시스터즈,위메이드맥스,나노씨엠에스,나노씨엠에스,쎄미시스코,데브시스터즈
1,NE능률,바른손,위메이드맥스,쇼박스,버킷스튜디오,데브시스터즈,위메이드,쎄미시스코,나노씨엠에스,대원미디어
2,디에이테크놀로지,위메이드,게임빌,위메이드,위메이드맥스,위메이드,세종메디칼,세종메디칼,위메이드,한국비엔씨
3,엔피케이,크루셜텍,지에스이,바른손,쇼박스,에코캡,위메이드맥스,위메이드,한국비엔씨,경남스틸
4,바른손,게임빌,바른손,게임빌,엘앤에프,버킷스튜디오,국전약품,바이오니아,세종메디칼,NE능률
5,에프엔씨엔터,덱스터,동국알앤에스,데브시스터즈,티에스아이,나노씨엠에스,에코프로에이치엔,위메이드맥스,위메이드맥스,위메이드
6,지에스이,CBI,스튜디오산타클로스,에코캡,에코캡,쇼박스,켐트로스,한국비엔씨,바이오니아,박셀바이오
7,지트리비앤티,엔피,크루셜텍,버킷스튜디오,지에스이,티비씨,경남스틸,서전기전,엔피,엘앤에프
8,승일,우리산업홀딩스,덱스터,티에스아이,바른손,지에스이,데브시스터즈,에코프로에이치엔,한국선재,이즈미디어
9,현대무벡스,네오위즈홀딩스,우리산업홀딩스,지에스이,켐트로스,켐트로스,덱스터,덱스터,광진윈텍,에이스토리


In [10]:
# 모든 종목

from io import StringIO

def get_all():
  all_df = pd.DataFrame()
  for market in ['KSQ', 'STK']:
    today = datetime.date.today().strftime('%Y%m%d')
    data = {
      'mktId': market,
      'trdDd': today,
      'money': '1',
      'csvxls_isNo': 'false',
      'name': 'fileDown',
      'url': 'dbms/MDC/STAT/standard/MDCSTAT03901',
    }
    gen_url = 'http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd'
    gen_key = requests.post(gen_url, data=data)

    down_url = 'http://data.krx.co.kr/comm/fileDn/download_csv/download.cmd'
    r = requests.post(down_url, data={'code':gen_key.text})
    r.encoding = 'EUC-KR'

    df = pd.read_csv(StringIO(r.text))
    all_df = pd.concat([all_df, df], ignore_index=True)
  return all_df

In [11]:
get_all()

Unnamed: 0,종목코드,종목명,시장구분,업종명,종가,대비,등락률,시가총액
0,060310,3S,KOSDAQ,기계·장비,3810,95,2.56,176294464530
1,054620,APS홀딩스,KOSDAQ,금융,15700,-300,-1.88,320189269700
2,265520,AP시스템,KOSDAQ,반도체,23650,-300,-1.25,342457368550
3,211270,AP위성,KOSDAQ,통신장비,15850,-50,-0.31,239054518400
4,013720,CBI,KOSDAQ,운송장비·부품,3760,-55,-1.44,190623634000
...,...,...,...,...,...,...,...,...
2449,069260,휴켐스,KOSPI,화학,27050,-150,-0.55,1105765805400
2450,000540,흥국화재,KOSPI,보험,3700,-30,-0.80,237697786500
2451,000547,흥국화재2우B,KOSPI,보험,28800,300,1.05,4423680000
2452,000545,흥국화재우,KOSPI,보험,8500,0,0.00,6528000000
