<a href="https://colab.research.google.com/github/yeonghun00/stock_public/blob/main/Stock_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import datetime
from multiprocessing import Pool

class Stock:
  def __init__(self, code):
    self.code = code
    self.headers = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'}

  def get_price(self, days:int=1000):
    url = 'https://fchart.stock.naver.com/sise.nhn?symbol=' + str(self.code) + '&timeframe=day&count='+ str(days) + '&requestType=0'
    result = requests.get(url, headers = self.headers)
    bs_obj = BeautifulSoup(result.content, "html.parser")
    items = bs_obj.find_all("item")

    data = np.array([item['data'].split('|') for item in items], int)

    d = {'Date':list(map(lambda x: datetime.datetime.strptime(str(x), '%Y%m%d'), np.array(data)[:,0])), \
     'Open':data[:,1], 'High':data[:,2], 'Low':data[:,3], 'Close':data[:,4], 'Volume':data[:,5]}

    df = pd.DataFrame(data=d,)
    df = df.set_index('Date')
    return df
    
  def get_fundamental(self):
    url = 'https://finance.naver.com/item/main.nhn?code=' + self.code
    result = requests.get(url, headers = self.headers)
    bs_obj = BeautifulSoup(result.content, "html.parser")

    ths = bs_obj.find_all("th", {'scope':'col'})
    ths = [th.get_text() for th in ths][10:-22]
    dates = list(map(lambda x: x.translate(str.maketrans('','','\n\t, ')),ths))
    dates = list(map(lambda x: x + '(Y)', dates[:4])) + list(map(lambda x: x + '(M)', dates[4:]))

    tr = bs_obj.find_all("tbody")
    tds = tr[2].find_all('td')
    tds = [td.get_text() for td in tds]
    elements = list(map(lambda x: x.translate(str.maketrans('','','\n\t, ')),tds))
    elements = list(map(lambda x: float(x) if x.replace('.','').isdigit() else np.nan, elements))

    temp_dict = {}
    cnt = 0
    index = ['sales', 'operating profit', 'net income', 'operating margin', 'net margin', 'roe', 'debt ratio', 'quick ratio', \
    'reserve ratio', 'eps', 'per', 'bps', 'pbr', 'dividend per share', 'dividend yield ratio', 'dividend payout ratio']

    for i in dates:
      temp_dict[i] = elements[cnt::10]
      cnt += 1

    df = pd.DataFrame.from_dict(temp_dict)
    df = df.set_index([pd.Index(index)])
    return df

In [2]:
stock = Stock('051900')

In [3]:
price_df = stock.get_price(1000)
price_df

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-04-19,801000,830000,801000,827000,43282
2017-04-20,838000,852000,823000,851000,63767
2017-04-21,852000,884000,851000,881000,68985
2017-04-24,889000,889000,867000,875000,38263
2017-04-25,867000,871000,850000,856000,47326
...,...,...,...,...,...
2021-05-12,1556000,1560000,1522000,1530000,42804
2021-05-13,1525000,1539000,1516000,1519000,36979
2021-05-14,1521000,1530000,1515000,1521000,23542
2021-05-17,1521000,1529000,1511000,1518000,23640


In [4]:
fundamental_df = stock.get_fundamental()
fundamental_df

Unnamed: 0,2018.12(Y),2019.12(Y),2020.12(Y),2021.12(E)(Y),2019.12(M),2020.03(M),2020.06(M),2020.09(M),2020.12(M),2021.03(E)(M)
sales,67475.0,76854.0,78445.0,86417.0,20133.0,18964.0,17832.0,20706.0,20944.0,20638.0
operating profit,10393.0,11764.0,12209.0,13758.0,2410.0,3337.0,3033.0,3276.0,2563.0,3562.0
net income,6923.0,7882.0,8131.0,9392.0,1338.0,2342.0,2046.0,2317.0,1426.0,2447.0
operating margin,15.4,15.31,15.56,15.92,11.97,17.6,17.01,15.82,12.24,17.26
net margin,10.26,10.26,10.37,10.87,6.65,12.35,11.47,11.19,6.81,11.86
roe,20.98,20.32,17.92,18.05,20.32,20.0,18.79,18.12,17.92,
debt ratio,46.8,53.26,40.26,,53.26,50.42,46.13,43.32,40.26,
quick ratio,79.22,79.99,79.33,,79.99,78.73,67.99,82.28,79.33,
reserve ratio,4132.2,4822.48,5534.21,,4822.48,4874.03,5099.47,5356.0,5534.21,
eps,38534.0,43916.0,45018.0,52203.0,7512.0,13016.0,11282.0,12823.0,7897.0,13852.0


In [5]:
from dateutil import relativedelta
import seaborn as sns
import matplotlib.pyplot as plt

class Analyser:
  def __init__(self, price_df, fundamental_df):
    self.price_df = price_df
    self.fundamental_df = fundamental_df
    self.mixed_df = self.get_mixed_df()

  def find_date(self, year, month):
    # 결산일 고려 (2달)
    year,month = int(year), int(month)
    next = datetime.date(year,month,1) + relativedelta.relativedelta(months=2)
    year, month = next.year, next.month

    start = str(year) + '-' + str(month) + '-' + '01'
    end = str(year) + '-' + str(month) + '-' + '20'
    return self.price_df.loc[start:end].index.values[0]

  def get_mixed_df(self):
    price_dic = {}
    cnt = 0
    for i in [(x[:4], x[5:7]) for x in self.fundamental_df.columns.values]:
      try:
        price_dic[self.fundamental_df.columns.values[cnt]] = price_df.loc[self.find_date(i[0],i[1])]['Close']
      except:
        price_dic[self.fundamental_df.columns.values[cnt]] = 0
      cnt+=1
    return self.fundamental_df.append(pd.DataFrame(data=price_dic,  index = ['Price']))

  def get_heatmap(self):
    # correlation heatmap
    corrmat = self.mixed_df.transpose().corr().abs() 
    plt.subplots(figsize=(12,12))
    sns.heatmap(corrmat, annot = True)

  def get_influential(self, num=6):
    return self.get_mixed_df().transpose().corr()['Price'].sort_values(ascending = False).head(num)

In [6]:
analyser = Analyser(price_df, fundamental_df)

In [7]:
analyser.get_influential()[1:]

reserve ratio    0.868179
pbr              0.559896
per              0.268608
quick ratio      0.168156
roe              0.079799
Name: Price, dtype: float64