In [1]:
import sys, os, csv
sys.path.append('..')
from datetime import datetime
import numpy as np
import pandas as pd
import h5py

import nasdaqdatalink as ndl


In [2]:
#del sys.modules['tools.instruments']
#del sys.modules['tools.visualisation']
from tools.instruments import instruments
from tools.display import view
from tools.constants import SRF_CONTRACTS_DB_PATH

In [3]:
# 저장할 DB파일 생성 및 그룹생성
file = h5py.File(SRF_CONTRACTS_DB_PATH, 'w')
for instrument in instruments.values():
    if instrument.contracts:
        file.create_group(instrument.symbol)

In [4]:
instruments['AD'].contracts

[['AD', 'CME_AD', 'CME_ADH1987', '1987-01-13', '1987-03-16', '2015-04-27'],
 ['AD', 'CME_AD', 'CME_ADM1987', '1987-01-13', '1987-06-15', '2015-04-27'],
 ['AD', 'CME_AD', 'CME_ADU1987', '1987-01-16', '1987-09-14', '2015-04-27'],
 ['AD', 'CME_AD', 'CME_ADZ1987', '1987-03-18', '1987-12-14', '2015-04-27'],
 ['AD', 'CME_AD', 'CME_ADH1988', '1987-06-17', '1988-03-14', '2015-04-27'],
 ['AD', 'CME_AD', 'CME_ADM1988', '1987-10-30', '1988-06-13', '2015-04-27'],
 ['AD', 'CME_AD', 'CME_ADU1988', '1988-02-24', '1988-09-19', '2015-04-27'],
 ['AD', 'CME_AD', 'CME_ADZ1988', '1988-05-17', '1988-12-19', '2015-04-27'],
 ['AD', 'CME_AD', 'CME_ADH1989', '1988-09-02', '1989-03-13', '2015-04-27'],
 ['AD', 'CME_AD', 'CME_ADM1989', '1989-01-03', '1989-06-19', '2015-04-27'],
 ['AD', 'CME_AD', 'CME_ADU1989', '1989-02-21', '1989-09-18', '2015-04-27'],
 ['AD', 'CME_AD', 'CME_ADZ1989', '1989-04-19', '1989-12-18', '2015-04-27'],
 ['AD', 'CME_AD', 'CME_ADH1990', '1989-06-20', '1990-03-19', '2015-04-27'],
 ['AD', 'CME

In [5]:
# 다운 가능한 상품에 대해서 nasdaq-data-link 에서 SRF 데이터 다운
dtypes= np.dtype([
        ('date', 'i'),
        ('open', 'f'),
        ('high','f'),
        ('low','f'),
        ('close','f'),
        ('volume', 'i'),
        ('open_interest','i')
    ])

i=0
for instrument in instruments.values():
    if not instrument.contracts:
        continue
        
    if len(file[instrument.symbol]):
        print(f"SKIP DB: {instrument.name}")
        continue
    
    for contract in instrument.contracts:
        print(f"{i}: {contract[2]} 다운로드 중...")
        symbol = instrument.symbol
        code = contract[2]
        data = ndl.get(f'SRF/{code}')
        data.reset_index(inplace=True)
        data['Date'] = data['Date'].values.astype('M8[D]').astype('i')
        
        #미결제 약정에 들어있는 nan value를 0으로 변경
        if data.iloc[:,6].isna().sum():
            counter = data.iloc[:,6].isna().sum()
            print(f"미결제 약정 Nan Value 발생: {counter}개\n")
            data.iloc[:,6] = data.iloc[:,6].fillna(0)
        
        #거래량에 들어있는 nan value를 0으로 변경
        if data.iloc[:,5].isna().sum():
            counter = data.iloc[:,5].isna().sum()
            print(f"거래량 Nan Value 발생: ({counter}개)")
            data.iloc[:,5] = data.iloc[:,5].fillna(0)
        
        data = np.array(list(map(tuple, data.values)), dtypes)
        file[symbol].create_dataset(code, data=np.array(data, dtypes))
        file[symbol][code].attrs['refreshed_at'] = contract[5]
        i+= 1 #카운터
    
print("완료")

0: CME_ADH1987 다운로드 중...
1: CME_ADM1987 다운로드 중...
2: CME_ADU1987 다운로드 중...
3: CME_ADZ1987 다운로드 중...


KeyboardInterrupt: 

In [8]:
file['AD']['CME_ADH1987'].attrs['refreshed_at']

KeyError: "Can't open attribute (can't locate attribute: 'refreshed_at')"

In [18]:
file.close()

### 데이터 확인

In [28]:
from tools.visualisation import view

In [27]:
instruments['W'].contracts

[('W',
  'CME_W',
  'CME_WZ1959',
  datetime.datetime(1959, 7, 1, 0, 0),
  datetime.datetime(1959, 12, 21, 0, 0),
  '2015-04-27 16:17',
  'CBOT Wheat, December 1959 (WZ1959)'),
 ('W',
  'CME_W',
  'CME_WH1960',
  datetime.datetime(1959, 7, 1, 0, 0),
  datetime.datetime(1960, 3, 22, 0, 0),
  '2015-04-27 16:17',
  'CBOT Wheat, March 1960 (WH1960)'),
 ('W',
  'CME_W',
  'CME_WK1960',
  datetime.datetime(1959, 7, 1, 0, 0),
  datetime.datetime(1960, 5, 19, 0, 0),
  '2015-04-27 16:17',
  'CBOT Wheat, May 1960 (WK1960)'),
 ('W',
  'CME_W',
  'CME_WN1960',
  datetime.datetime(1959, 7, 1, 0, 0),
  datetime.datetime(1960, 7, 20, 0, 0),
  '2015-04-27 16:17',
  'CBOT Wheat, July 1960 (WN1960)'),
 ('W',
  'CME_W',
  'CME_WU1960',
  datetime.datetime(1959, 7, 1, 0, 0),
  datetime.datetime(1960, 9, 21, 0, 0),
  '2015-04-27 16:17',
  'CBOT Wheat, September 1960 (WU1960)'),
 ('W',
  'CME_W',
  'CME_WZ1960',
  datetime.datetime(1960, 1, 4, 0, 0),
  datetime.datetime(1960, 12, 20, 0, 0),
  '2015-04-27 16

In [42]:
df = pd.DataFrame(file['W']['CME_WZ1973'][:])

In [43]:
df

Unnamed: 0,date,open,high,low,close,volume,open_interest
0,1097,239.25,240.00,233.75,234.75,320,275
1,1098,234.25,239.75,234.25,239.25,310,375
2,1099,240.00,241.50,236.25,236.50,410,705
3,1100,237.25,239.50,234.50,236.50,70,740
4,1103,235.75,237.25,235.50,236.50,185,665
...,...,...,...,...,...,...,...
238,1441,572.00,578.00,547.00,550.50,3095,7250
239,1442,550.00,560.00,530.50,544.00,3565,5950
240,1443,547.00,547.00,527.00,528.50,1955,4805
241,1446,535.00,548.50,524.00,548.50,1835,3755
