<a href="https://colab.research.google.com/github/yeonghun00/stock-notes/blob/main/crawler/Support.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import datetime
from io import BytesIO, StringIO

In [2]:
# 관련 업종
headers = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'}

def get_related(code:str='005930'):
  url = 'https://finance.naver.com/item/main.nhn?code=' + code
  result = requests.get(url, headers = headers)
  bs_obj = BeautifulSoup(result.content, "html.parser")
  th = bs_obj.find_all("th", {'scope':"col"})
  th = [x.get_text() for x in th][-12:-7]
  elements = [x[-6:] for x in th]
  return elements

In [3]:
get_related('005930')

['005930', '000660', '402340', '000990', '058470']

In [4]:
# 업종
dic = {'에너지': 'WI100', '화학': 'WI110', '비철금속': 'WI200', '철강': 'WI210', '건설': 'WI220', '기계': 'WI230', '조선': 'WI240', '상가,자본재': 'WI250', \
       '운송': 'WI260', '자동차': 'WI300', '화장품,의류': 'WI310', '호텔,레저': 'WI320', '미디어,교육': 'WI330', '소매(유통)': 'WI340', '필수소비재': 'WI400', \
       '건강관리': 'WI410', '은행': 'WI500', '증권': 'WI510', '보험': 'WI520', '소프트웨어': 'WI600', 'IT하드웨어': 'WI610', '반도체': 'WI620', 'IT가전': 'WI630', \
       '디스플레이': 'WI640', '전기통신서비스': 'WI700', '유틸리티': 'WI800'}

def get_date():
  url = 'https://finance.naver.com/sise/sise_deposit.naver'
  result = requests.get(url, headers = headers)
  bs_obj = BeautifulSoup(result.content, "html.parser")
  return bs_obj.find_all('span', {'class':'tah'})[0].get_text()[-10:]

date = ''.join(get_date().split('.'))

def get_sector(code):
  url = 'https://www.wiseindex.com/Index/GetIndexComponets?ceil_yn=0&dt=' + \
  date + '&sec_cd=' + dic[code]
  result = requests.get(url, headers = headers)
  json_list = result.json() 

  df = pd.DataFrame(columns=['code', 'name', 'sector_name', 'idx_name'])

  for json in json_list['list']:
    code = json['CMP_CD']
    name = json['CMP_KOR']
    sector_name = json['SEC_NM_KOR']
    idx_name = json['IDX_NM_KOR']
    df = df.append({'code':code, 'name':name, 'sector_name':sector_name, 'idx_name':idx_name}, ignore_index=True)

  return df

In [5]:
get_sector('에너지')

Unnamed: 0,code,name,sector_name,idx_name
0,96770,SK이노베이션,에너지,WI26 에너지
1,10950,S-Oil,에너지,WI26 에너지
2,267250,HD현대,에너지,WI26 에너지
3,78930,GS,에너지,WI26 에너지
4,6120,SK디스커버리,에너지,WI26 에너지
5,5090,SGC에너지,에너지,WI26 에너지
6,1390,KG케미칼,에너지,WI26 에너지
7,137950,제이씨케미칼,에너지,WI26 에너지
8,41590,플래스크,에너지,WI26 에너지
9,2960,한국쉘석유,에너지,WI26 에너지


In [6]:
# 검색 상위

def get_searched():
  url = 'https://finance.naver.com/sise/lastsearch2.nhn'
  result = requests.get(url, headers = headers)
  bs_obj = BeautifulSoup(result.content, "html.parser")
  a = bs_obj.find_all('a',{'class', 'tltle'}) # title nono tltle
  return [x['href'][-6:] for x in a]

In [7]:
get_searched()[:5]

['005930', '035720', '120115', '009540', '132030']

In [8]:
# 상승률
# market : 'KOSDAQ', 'KOSPI'
# interval : 'TODAY', 'YESTERDAY', 'DAYS_5', 'DAYS_10', 'DAYS_15', 'DAYS_20', 'DAYS_30', 'DAYS_60', 'DAYS_90', 'DAYS_120', 'DAYS_250'

def get_highest(market='KOSPI', interval='DAYS_250'):
  url = 'https://finance.daum.net/api/trend/price_performance?page=1&perPage=100&intervalType={0}&market={1}&changeType=RISE&pagination=true&order=desc'.format(interval, market)
  headers = {
      'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36',
      'x-requested-with': 'XMLHttpRequest',
      'referer': 'https://finance.daum.net/domestic/rise_stocks?market=KOSDAQ'
      }
  r = requests.get(url, headers = headers)
  return r.json()['data']

In [9]:
dic = {}
for i in ['TODAY', 'DAYS_5', 'DAYS_10', 'DAYS_15', 'DAYS_20', 'DAYS_30', 'DAYS_60', 'DAYS_90', 'DAYS_120', 'DAYS_250']:
  data = get_highest('KOSDAQ', i)
  dic[i] = [x['name'] for x in data]
pd.DataFrame(dic)[:10]

Unnamed: 0,TODAY,DAYS_5,DAYS_10,DAYS_15,DAYS_20,DAYS_30,DAYS_60,DAYS_90,DAYS_120,DAYS_250
0,THQ,소니드,휴림로봇,한일사료,하인크코리아,하인크코리아,현대사료,현대사료,현대사료,에디슨EV
1,지엘팜텍,THQ,대모,휴림로봇,한일사료,휴림로봇,한일사료,한일사료,휴림로봇,현대사료
2,토탈소프트,대모,베셀,하인크코리아,휴림로봇,베셀,휴림로봇,하인크코리아,한일사료,하인크코리아
3,에스퓨얼셀,멜파스,파이버프로,대모,베셀,현대사료,하인크코리아,휴림로봇,하인크코리아,지에스이
4,프리엠스,휴림로봇,한탑,한탑,한탑,한탑,아이에스이커머스,아이에스이커머스,로보로보,한일사료
5,코이즈,지엘팜텍,현대에버다임,파이버프로,대모,대모,베셀,베셀,지에스이,일진파워
6,수산아이앤티,프리엠스,하인크코리아,베셀,파이버프로,아이에스이커머스,이스트아시아홀딩스,디와이디,아이에스이커머스,휴림로봇
7,이루온,이루온,THQ,대주산업,대주산업,한일사료,한탑,미래생명자원,엠아이텍,엔피
8,CS,현대에버다임,지엘팜텍,현대에버다임,이스트아시아홀딩스,디와이디,금강철강,에디슨INNO,유진로봇,뉴프렉스
9,디엔에이링크,토탈소프트,기산텔레콤,오파스넷,현대에버다임,이스트아시아홀딩스,대주산업,기산텔레콤,에디슨INNO,탑코미디어


In [10]:
# 상승률 순위 (날짜 설정)

market_type = {
    'all':'ALL',
    'kospi':'STK',
    'kosdaq':'KSQ'
}

def get_ranking(market, start_date, end_date):
  gen_url = 'http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd'
  data = {
    'locale': 'ko_KR',
    'mktId': market,
    'itmTpCd2': '1',
    'strtDd': start_date,
    'endDd': end_date,
    'stkprcTpCd': 'Y',
    'share': '1',
    'money': '1',
    'csvxls_isNo': 'false',
    'name': 'fileDown',
    'url': 'dbms/MDC/EASY/ranking/MDCEASY01501'
  }
  gen_key = requests.post(gen_url, data=data).text
  down_url = 'http://data.krx.co.kr/comm/fileDn/download_csv/download.cmd'
  data = {'code':gen_key}
  r = requests.post(down_url, data=data)
  r.encoding = 'EUC-KR'
  return pd.read_csv(StringIO(r.text))

In [11]:
get_ranking(market_type['kosdaq'], '20000101', '20010101').head()

Unnamed: 0,순위,종목코드,종목명,시장구분,시작일기준가,종료일종가,대비,등락률,거래량_합계,거래량_일평균,거래대금_합계,거래대금_일평균
0,1,20075,동양토탈우선,KOSDAQ,2120,126500,124380,5866.98,2578023,10697,183825622410,762761919
1,2,17050,신안화섬,KOSDAQ,16050,570000,553950,3451.4,459154,1905,111553683050,462878353
2,3,20865,리타워텍우선,KOSDAQ,1412,22550,21138,1497.03,2822771,11713,277831673250,1152828520
3,4,17160,코스프,KOSDAQ,921,13700,12779,1387.51,23957945,99411,225445436080,935458241
4,5,1000,신라섬유,KOSDAQ,9400,115000,105600,1123.4,910031,3776,129434042240,537070715


In [12]:
# 모든 종목

from io import StringIO

def get_all():
  all_df = pd.DataFrame()
  for market in ['KSQ', 'STK']:
    today = datetime.date.today().strftime('%Y%m%d')
    data = {
      'mktId': market,
      'trdDd': today,
      'money': '1',
      'csvxls_isNo': 'false',
      'name': 'fileDown',
      'url': 'dbms/MDC/STAT/standard/MDCSTAT03901',
    }
    gen_url = 'http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd'
    gen_key = requests.post(gen_url, data=data)

    down_url = 'http://data.krx.co.kr/comm/fileDn/download_csv/download.cmd'
    r = requests.post(down_url, data={'code':gen_key.text})
    r.encoding = 'EUC-KR'

    df = pd.read_csv(StringIO(r.text))
    all_df = pd.concat([all_df, df], ignore_index=True)
  return all_df

In [13]:
get_all()

Unnamed: 0,종목코드,종목명,시장구분,업종명,종가,대비,등락률,시가총액
0,060310,3S,KOSDAQ,기계·장비,,,,
1,054620,APS홀딩스,KOSDAQ,금융,,,,
2,265520,AP시스템,KOSDAQ,반도체,,,,
3,211270,AP위성,KOSDAQ,통신장비,,,,
4,032790,BNGT,KOSDAQ,정보기기,,,,
...,...,...,...,...,...,...,...,...
2493,005010,휴스틸,KOSPI,철강금속,,,,
2494,000540,흥국화재,KOSPI,보험,,,,
2495,000547,흥국화재2우B,KOSPI,보험,,,,
2496,000545,흥국화재우,KOSPI,보험,,,,
