<a href="https://colab.research.google.com/github/yeonghun00/stock-notes/blob/main/crawler/Support.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import datetime
from io import BytesIO, StringIO

In [2]:
# 관련 업종
headers = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'}

def get_related(code:str='005930'):
  url = 'https://finance.naver.com/item/main.nhn?code=' + code
  result = requests.get(url, headers = headers)
  bs_obj = BeautifulSoup(result.content, "html.parser")
  th = bs_obj.find_all("th", {'scope':"col"})
  th = [x.get_text() for x in th][-12:-7]
  elements = [x[-6:] for x in th]
  return elements

In [3]:
get_related('005930')

['005930', '000660', '402340', '000990', '058470']

In [4]:
# 업종
dic = {'에너지': 'WI100', '화학': 'WI110', '비철금속': 'WI200', '철강': 'WI210', '건설': 'WI220', '기계': 'WI230', '조선': 'WI240', '상가,자본재': 'WI250', \
       '운송': 'WI260', '자동차': 'WI300', '화장품,의류': 'WI310', '호텔,레저': 'WI320', '미디어,교육': 'WI330', '소매(유통)': 'WI340', '필수소비재': 'WI400', \
       '건강관리': 'WI410', '은행': 'WI500', '증권': 'WI510', '보험': 'WI520', '소프트웨어': 'WI600', 'IT하드웨어': 'WI610', '반도체': 'WI620', 'IT가전': 'WI630', \
       '디스플레이': 'WI640', '전기통신서비스': 'WI700', '유틸리티': 'WI800'}

def get_date():
  url = 'https://finance.naver.com/sise/sise_deposit.naver'
  result = requests.get(url, headers = headers)
  bs_obj = BeautifulSoup(result.content, "html.parser")
  return bs_obj.find_all('span', {'class':'tah'})[0].get_text()[-10:]

date = ''.join(get_date().split('.'))

def get_sector(code):
  url = 'https://www.wiseindex.com/Index/GetIndexComponets?ceil_yn=0&dt=' + \
  date + '&sec_cd=' + dic[code]
  result = requests.get(url, headers = headers)
  json_list = result.json() 

  df = pd.DataFrame(columns=['code', 'name', 'sector_name', 'idx_name'])

  for json in json_list['list']:
    code = json['CMP_CD']
    name = json['CMP_KOR']
    sector_name = json['SEC_NM_KOR']
    idx_name = json['IDX_NM_KOR']
    df = df.append({'code':code, 'name':name, 'sector_name':sector_name, 'idx_name':idx_name}, ignore_index=True)

  return df

In [5]:
get_sector('에너지')

Unnamed: 0,code,name,sector_name,idx_name
0,96770,SK이노베이션,에너지,WI26 에너지
1,10950,S-Oil,에너지,WI26 에너지
2,267250,HD현대,에너지,WI26 에너지
3,78930,GS,에너지,WI26 에너지
4,112610,씨에스윈드,에너지,WI26 에너지
5,6120,SK디스커버리,에너지,WI26 에너지
6,5090,SGC에너지,에너지,WI26 에너지
7,18000,유니슨,에너지,WI26 에너지
8,1390,KG케미칼,에너지,WI26 에너지
9,322000,현대에너지솔루션,에너지,WI26 에너지


In [6]:
# 검색 상위

def get_searched():
  url = 'https://finance.naver.com/sise/lastsearch2.nhn'
  result = requests.get(url, headers = headers)
  bs_obj = BeautifulSoup(result.content, "html.parser")
  a = bs_obj.find_all('a',{'class', 'tltle'}) # title nono tltle
  return [x['href'][-6:] for x in a]

In [7]:
get_searched()[:5]

['005930', '005490', '035720', '035420', '000660']

In [8]:
# 상승률
# market : 'KOSDAQ', 'KOSPI'
# interval : 'TODAY', 'YESTERDAY', 'DAYS_5', 'DAYS_10', 'DAYS_15', 'DAYS_20', 'DAYS_30', 'DAYS_60', 'DAYS_90', 'DAYS_120', 'DAYS_250'

def get_highest(market='KOSPI', interval='DAYS_250'):
  url = 'https://finance.daum.net/api/trend/price_performance?page=1&perPage=100&intervalType={0}&market={1}&changeType=RISE&pagination=true&order=desc'.format(interval, market)
  headers = {
      'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36',
      'x-requested-with': 'XMLHttpRequest',
      'referer': 'https://finance.daum.net/domestic/rise_stocks?market=KOSDAQ'
      }
  r = requests.get(url, headers = headers)
  return r.json()['data']

In [9]:
dic = {}
for i in ['TODAY', 'DAYS_5', 'DAYS_10', 'DAYS_15', 'DAYS_20', 'DAYS_30', 'DAYS_60', 'DAYS_90', 'DAYS_120', 'DAYS_250']:
  data = get_highest('KOSDAQ', i)
  dic[i] = [x['name'] for x in data]
pd.DataFrame(dic)[:10]

Unnamed: 0,TODAY,DAYS_5,DAYS_10,DAYS_15,DAYS_20,DAYS_30,DAYS_60,DAYS_90,DAYS_120,DAYS_250
0,모비스,에이스토리,SK5호스팩,공구우먼,공구우먼,공구우먼,공구우먼,현대사료,현대사료,현대사료
1,한국비엔씨,모비릭스,공구우먼,신화인터텍,SK5호스팩,SK5호스팩,한일사료,하인크코리아,하인크코리아,하인크코리아
2,신화인터텍,SK5호스팩,에이스토리,실리콘투,모아데이타,피코그램,현대사료,한일사료,한일사료,한일사료
3,서린바이오,한국비엔씨,모아데이타,SK5호스팩,신화인터텍,신화인터텍,SK5호스팩,휴림로봇,휴림로봇,휴림로봇
4,차백신연구소,바이오니아,신진에스엠,모아데이타,실리콘투,실리콘투,피코그램,아이에스이커머스,아이에스이커머스,위메이드맥스
5,국전약품,셀리버리,수젠텍,케이옥션,피코그램,모아데이타,하인크코리아,노터스,노터스,이랜텍
6,셀리드,진매트릭스,신화인터텍,에이스토리,조광ILI,신진에스엠,노터스,피코그램,에디슨INNO,노터스
7,신진에스엠,씨엔플러스,비플라이소프트,수젠텍,신진에스엠,세림B&G,THQ,SK5호스팩,SK5호스팩,엘앤에프
8,비플라이소프트,모아데이타,모비릭스,신진에스엠,지투파워,현대사료,한탑,한탑,피코그램,탑코미디어
9,피씨디렉트,싸이버원,나노씨엠에스,레이,수젠텍,케이옥션,신화인터텍,현대에버다임,베셀,나노신소재


In [10]:
# 상승률 순위 (날짜 설정)

market_type = {
    'all':'ALL',
    'kospi':'STK',
    'kosdaq':'KSQ'
}

def get_ranking(market, ranking_type, start_date, end_date):
  gen_url = 'http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd'
  url = ''
  if ranking_type == 'highest': url = 'dbms/MDC/EASY/ranking/MDCEASY01501'
  elif ranking_type == 'traded': url = 'dbms/MDC/EASY/ranking/MDCEASY01601'
  data = {
    'locale': 'ko_KR',
    'mktId': market,
    'itmTpCd2': '1',
    'strtDd': start_date,
    'endDd': end_date,
    'stkprcTpCd': 'Y',
    'share': '1',
    'money': '1',
    'csvxls_isNo': 'false',
    'name': 'fileDown',
    'url': url
  }
  gen_key = requests.post(gen_url, data=data).text
  down_url = 'http://data.krx.co.kr/comm/fileDn/download_csv/download.cmd'
  data = {'code':gen_key}
  r = requests.post(down_url, data=data)
  r.encoding = 'EUC-KR'
  return pd.read_csv(StringIO(r.text))

In [11]:
get_ranking(market_type['kosdaq'], 'traded', '20000101', '20010101').sort_values('등락률', ascending=False).head(10)

Unnamed: 0,순위,종목코드,종목명,시장구분,시작일기준가,종료일종가,대비,등락률,거래량_합계,거래량_일평균,거래대금_합계,거래대금_일평균
13,14,20860,리타워텍,KOSDAQ,1372,3140,1768,128.86,181715449,754006,5857918324850,24306715041
14,15,37410,코리아링크,KOSDAQ,7886,6850,-1036,-13.14,132690380,550582,5540966803360,22991563499
49,50,37700,인디시스템,KOSDAQ,4210,3200,-1010,-23.99,86531325,359051,2314529492390,9603856815
33,34,28080,휴맥스,KOSDAQ,19200,10500,-8700,-45.31,197636639,820069,3164336244590,13130025911
48,49,31310,아이즈,KOSDAQ,4313,2170,-2143,-49.69,197696971,820319,2353632506390,9766109985
18,19,37340,장미디어,KOSDAQ,6072,2970,-3102,-51.09,146738211,608872,4614258602260,19146301254
41,42,35500,디지탈임팩트,KOSDAQ,3815,1530,-2285,-59.9,511434110,2122133,2665173478720,11058811115
28,29,19550,기술투자,KOSDAQ,6694,2420,-4274,-63.85,517365855,2146746,3327199725110,13805807988
7,8,37240,싸이버텍,KOSDAQ,8750,3000,-5750,-65.71,304169644,1262115,9027844747060,37459936710
32,33,32680,비티씨정보,KOSDAQ,3033,1000,-2033,-67.03,552757139,2293598,3185951539560,13219715932


In [12]:
# 모든 종목

from io import StringIO

def get_all():
  all_df = pd.DataFrame()
  for market in ['KSQ', 'STK']:
    today = datetime.date.today().strftime('%Y%m%d')
    data = {
      'mktId': market,
      'trdDd': today,
      'money': '1',
      'csvxls_isNo': 'false',
      'name': 'fileDown',
      'url': 'dbms/MDC/STAT/standard/MDCSTAT03901',
    }
    gen_url = 'http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd'
    gen_key = requests.post(gen_url, data=data)

    down_url = 'http://data.krx.co.kr/comm/fileDn/download_csv/download.cmd'
    r = requests.post(down_url, data={'code':gen_key.text})
    r.encoding = 'EUC-KR'

    df = pd.read_csv(StringIO(r.text))
    all_df = pd.concat([all_df, df], ignore_index=True)
  return all_df

In [13]:
get_all()

Unnamed: 0,종목코드,종목명,시장구분,업종명,종가,대비,등락률,시가총액
0,060310,3S,KOSDAQ,기계·장비,2885,0,0.00,133493315005
1,054620,APS홀딩스,KOSDAQ,금융,8220,30,0.37,167640496620
2,265520,AP시스템,KOSDAQ,반도체,16850,-50,-0.30,257491943850
3,211270,AP위성,KOSDAQ,통신장비,11450,-50,-0.43,172692380800
4,032790,BNGT,KOSDAQ,정보기기,4075,0,0.00,120352455875
...,...,...,...,...,...,...,...,...
2503,005010,휴스틸,KOSPI,철강금속,21950,0,0.00,172035649250
2504,000540,흥국화재,KOSPI,보험,3165,-25,-0.78,203327971425
2505,000547,흥국화재2우B,KOSPI,보험,23500,800,3.52,3609600000
2506,000545,흥국화재우,KOSPI,보험,6990,-80,-1.13,5368320000
