<a href="https://colab.research.google.com/github/yeonghun00/stock_public/blob/main/risk_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import datetime

class Stock:
  def __init__(self, code_name, page:int=26):
    self.code_name = code_name
    self.page = page
    self.df = self.do_it()

  def page_to_df(self, page):
    url = "https://finance.naver.com/item/sise_day.nhn?code=" + self.code_name + "&page=" + str(page)
    headers = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'}
    result = requests.get(url, headers = headers)
    bs_obj = BeautifulSoup(result.content, "html.parser")
    tr = bs_obj.find_all("tr", {'onmouseover':'mouseOver(this)', 'onmouseout':"mouseOut(this)"})
    data_dict = {}
    for i in range(10):
      try: 
        spans = tr[i].find_all('span')
        lines = [span.get_text() for span in spans]
        lines = list(map(lambda x: x.replace(',',''), lines))

        objdate = datetime.datetime.strptime(lines[0], '%Y.%m.%d')
        data_dict[objdate] = lines[1:]
      except:
        break
      
    df = pd.DataFrame.from_dict(data_dict).transpose()
    df.columns = ['Close','_','Open',
                        'High','Low','Volume']
    df = df.drop(columns="_")

    return df

  def do_it(self):
    df = self.page_to_df(1)
    # max 9999
    # 1 page = 10 days
    for i in range(2, self.page): 
      try: 
        if (any(self.page_to_df(i).index != self.page_to_df(i-1).index)):
          df = df.append(self.page_to_df(i))
          print('.', end = '')
          if (i%100==0):
            print(i)
        else:
          break
      except:
        df = df.append(self.page_to_df(i))
        break
    df = df.sort_index(axis = 0) 
    for i in ['Close', 'Open', 'High', 'Low', 'Volume']:
      df[i] = df[i].astype(int)
    return df

  def get_df(self):
    return self.df

In [2]:
# 1 page = 10 days
stock = Stock('093380', 26)

........................

In [3]:
market = Stock

In [4]:
df = stock.get_df()

In [5]:
df

Unnamed: 0,Close,Open,High,Low,Volume
2020-03-23,1550,1610,1620,1500,134744
2020-03-24,1630,1560,1635,1560,136060
2020-03-25,1770,1655,1795,1655,158901
2020-03-26,1850,1760,1950,1735,206550
2020-03-27,1930,1920,2000,1870,221179
...,...,...,...,...,...
2021-03-22,3895,3920,3945,3865,139598
2021-03-23,3890,3895,3970,3870,116982
2021-03-24,3800,3860,3960,3785,280496
2021-03-25,3835,3800,3850,3795,81297


In [6]:
change = df['Close'][1:].values - df['Close'][:-1].values
change

array([  80,  140,   80,   80,  100,  100,   60,   -5,  105,   80,   55,
         35,   30,  -60,    0,   40,   55,  100,   60,  -30,  -10,   10,
       -115,   70,    5,   50,  -50,   40,   65,    0,  -55,  -95,   65,
        -30,  -20,   10,   95,   80,    0,  -60,   65,   85,   10,  -10,
         55,  195,   30,  -35,  -65,  -40,    0,  -55,  -70, -100,  -45,
       -170,  120,   25,   50,  -35,  -40,  110,  -70,  -10,  -80, -110,
         10,   10,  100,   35,   80,   45,    0,  -90,  120,   25,  285,
       -135,   40, -145,    0,  -35,   -5,  -10,    0,   45,  -55,   50,
        -25,   10,   20,   35,   -5,   -5,  -45,   15,   15,  -35,   50,
         45, -185,   40, -170,   20,   50,  100,   30,  -75,   25,  -20,
          5,   60,   95, -115,   45,    5,   45,   10,   -5,   90,  -30,
        -35,  395, -325,  170,  -70,  -40, -145,  -45,   50,   95,   80,
        485,   45, -110,  -35,   10, -145,  -65,  -70,   75, -105,  145,
        110, -150, -125,  -15,   60,   45, -110,   

In [7]:
#normailization
norm = np.linalg.norm(change)
normal_array = np.round(change/norm, 16)
print(normal_array)

[ 0.04199316  0.07348803  0.04199316  0.04199316  0.05249145  0.05249145
  0.03149487 -0.00262457  0.05511602  0.04199316  0.0288703   0.01837201
  0.01574743 -0.03149487  0.          0.02099658  0.0288703   0.05249145
  0.03149487 -0.01574743 -0.00524914  0.00524914 -0.06036517  0.03674401
  0.00262457  0.02624572 -0.02624572  0.02099658  0.03411944  0.
 -0.0288703  -0.04986688  0.03411944 -0.01574743 -0.01049829  0.00524914
  0.04986688  0.04199316  0.         -0.03149487  0.03411944  0.04461773
  0.00524914 -0.00524914  0.0288703   0.10235833  0.01574743 -0.01837201
 -0.03411944 -0.02099658  0.         -0.0288703  -0.03674401 -0.05249145
 -0.02362115 -0.08923546  0.06298974  0.01312286  0.02624572 -0.01837201
 -0.02099658  0.05774059 -0.03674401 -0.00524914 -0.04199316 -0.05774059
  0.00524914  0.00524914  0.05249145  0.01837201  0.04199316  0.02362115
  0.         -0.0472423   0.06298974  0.01312286  0.14960063 -0.07086346
  0.02099658 -0.0761126   0.         -0.01837201 -0.0026245

In [8]:
print('Mean: ', np.mean(normal_array))
print('Std: ', np.std(normal_array))

Mean:  0.004827526882757904
Std:  0.06318828405032346
