# **Collecting and Loading Data**

Collect KOSPI data at the individual stock level.

In [1]:
# [Description] Import necessary packages.

import numpy as np
import pandas as pd

import os
from datetime import datetime
from datetime import timedelta

from pykrx import stock
from pykrx import bond

import collect

In [51]:
# [Description] Get the last date of data available.



In [8]:
# [Description] Collection functions

def collect_stock_indiv(start, end, market='KOSPI'):
    '''
    Collect individual stock data.
    
    Parameters
    ----------
    - start
    - end
    - market
    
    Outputs
    ----------
    - o
    - h
    - l
    - c
    - v
    '''
    
    o = pd.DataFrame() # Empty DataFrame to save open price data
    h = pd.DataFrame() # Empty DataFrame to save high price data
    l = pd.DataFrame() # Empty DataFrame to save low price data
    c = pd.DataFrame() # Empty DataFrame to save close price data
    v = pd.DataFrame() # Empty DataFrame to save volume data
    
    for t in pd.date_range(start, end).astype('str'):
        
        try:
            daily = stock.get_market_ohlcv(t, market=market)
            daily.columns = ['open', 'high', 'low', 'close', 'volume', 'won_volume', 'pct_change']
        
        except KeyError:
            daily = pd.DataFrame(columns=['open', 'high', 'low', 'close', 'volume', 'won_volume', 'pct_change'])
    
        daily_o = daily.loc[:, ['open']].T.rename(index={'open':t})
        daily_h = daily.loc[:, ['high']].T.rename(index={'high':t})
        daily_l = daily.loc[:, ['low']].T.rename(index={'low':t})
        daily_c = daily.loc[:, ['close']].T.rename(index={'close':t})
        daily_v = daily.loc[:, ['volume']].T.rename(index={'volume':t})
        
        o = pd.concat([o, daily_o], axis=0)
        h = pd.concat([h, daily_h], axis=0)
        l = pd.concat([l, daily_l], axis=0)
        c = pd.concat([c, daily_c], axis=0)
        v = pd.concat([v, daily_v], axis=0)
    
    return (o, h, l, c, v)


def collect_index(start, end, idx='1028'):
    '''
    [Description]
    - Collect index data.
    
    [Inputs]
    '''
    df = stock.get_index_ohlcv(start, end, idx)
    df.columns = ['open', 'high', 'low', 'close', 'volume', 'won_volume', 'market_cap']
    
    o = df.loc[:, ['open']]
    h = df.loc[:, ['high']]
    l = df.loc[:, ['low']]
    c = df.loc[:, ['close']]
    v = df.loc[:, ['volume']]
    
    return o, h, l, c, v
    

def collect_volatility(start, end):
    pass


def collect_derivatives(start, end):
    pass


def collect_bondrate(start, end):
    pass

In [15]:
# [Description] Data Collection : 한국거래소 전체종목 데이터가 있는 1995년 1월 1일부터 현재까지의 데이터를 수집한다.

## 시계열 시작 시점부터 오늘 날짜까지의 시계열 인덱스를 생성한다.
year_list = list(range(1995, 2024))

for yr in year_list:
    
    start = f'{yr}-01-01'
    end = f'{yr}-12-31'
    
    o, h, l, c, v = collect_stock_indiv(start, end)
 
    ## 수집한 전종목 데이터를 로컬에 저장한다.
    o.to_csv(f'./data/kospi-individual/kospi_open_{yr}.csv')
    h.to_csv(f'./data/kospi-individual/kospi_high_{yr}.csv')
    l.to_csv(f'./data/kospi-individual/kospi_low_{yr}.csv')
    c.to_csv(f'./data/kospi-individual/kospi_close_{yr}.csv')
    v.to_csv(f'./data/kospi-individual/kospi_volume_{yr}.csv')
    
    print(f'Successfully saved the daily KOSPI data of year {yr}')

Successfully saved the daily KOSPI data of year 1995
Successfully saved the daily KOSPI data of year 1996
Successfully saved the daily KOSPI data of year 1997
Successfully saved the daily KOSPI data of year 1998
Successfully saved the daily KOSPI data of year 1999
Successfully saved the daily KOSPI data of year 2000
Successfully saved the daily KOSPI data of year 2001
Successfully saved the daily KOSPI data of year 2002
Successfully saved the daily KOSPI data of year 2003
Successfully saved the daily KOSPI data of year 2004
Successfully saved the daily KOSPI data of year 2005
Successfully saved the daily KOSPI data of year 2006
Successfully saved the daily KOSPI data of year 2007
Successfully saved the daily KOSPI data of year 2008
Successfully saved the daily KOSPI data of year 2009
Successfully saved the daily KOSPI data of year 2010
Successfully saved the daily KOSPI data of year 2011
Successfully saved the daily KOSPI data of year 2012
Successfully saved the daily KOSPI data of yea

In [52]:
# [Description] Data Update if start_year == end_year

start, end = get_date()
yr = end[:4]

o, h, l, c, v = collect_indiv(start, end)

## 수집한 전종목 데이터를 로컬에 저장한다.
o.to_csv(f'./data/kospi-individual/kospi_open_{yr}.csv')
h.to_csv(f'./data/kospi-individual/kospi_high_{yr}.csv')
l.to_csv(f'./data/kospi-individual/kospi_low_{yr}.csv')
c.to_csv(f'./data/kospi-individual/kospi_close_{yr}.csv')
v.to_csv(f'./data/kospi-individual/kospi_volume_{yr}.csv')
    
print(f'Successfully updated the daily KOSPI data.')

Successfully updated the daily KOSPI data.
