In [1]:
# Data manipulation
# ==============================================================================
import numpy as np
import pandas as pd

# Plots
# ==============================================================================
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
plt.rcParams['lines.linewidth'] = 1.5
plt.rcParams['font.size'] = 10
plt.rcParams['figure.figsize'] = (16, 10)

# Modeling and Forecasting
# ==============================================================================
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregCustom import ForecasterAutoregCustom
from skforecast.ForecasterAutoregDirect import ForecasterAutoregDirect
from skforecast.model_selection import grid_search_forecaster
from skforecast.model_selection import backtesting_forecaster
from skforecast.utils import save_forecaster
from skforecast.utils import load_forecaster

from statsmodels.tsa.stattools import adfuller
# Warnings configuration
# ==============================================================================
import warnings
# warnings.filterwarnings('ignore')

In [2]:
# %matplotlib inline
%matplotlib qt

In [3]:
from lutils.fin.data_loader import load, load_tq, load_ctp

In [4]:
exchange = 'SHFE'
symbol_underlying = 'rb2305'

In [5]:
df_underlying = load(exchange, symbol_underlying)

load Z:/tq_data/ticks\SHFE.rb2305.h5
load Y:/fin_data\2023-05-04\SHFE.rb2305.h5
load Y:/fin_data\2023-05-05\SHFE.rb2305.h5
load Y:/fin_data\2023-05-08\SHFE.rb2305.h5
load Y:/fin_data\2023-05-09\SHFE.rb2305.h5
load Y:/fin_data\2023-05-10\SHFE.rb2305.h5
load Y:/fin_data\2023-05-11\SHFE.rb2305.h5
load Y:/fin_data\2023-05-12\SHFE.rb2305.h5
load Y:/fin_data\2023-05-15\SHFE.rb2305.h5


In [6]:
df = df_underlying[['datetime', 'last_price', 'volume', 'amount', ]]

In [7]:
df = df.dropna()

In [8]:
df.index = df.datetime

In [9]:
df.shape

(7034205, 4)

In [10]:
resample_1s = df.resample('1s').last()

In [11]:
resample_1s = resample_1s.ffill().bfill()

In [12]:
resample_1s.shape

(31431022, 4)

In [13]:
df_1s = pd.concat([resample_1s.between_time('09:00', '10:15'), 
          resample_1s.between_time('10:30', '11:30'),
          resample_1s.between_time('13:30', '15:00'),
          resample_1s.between_time('21:00', '23:00')], axis=0).sort_index()[['last_price', 'volume', 'amount']]

In [14]:
df_1s.shape

(7536256, 3)

In [15]:
df_1s.head(10)

Unnamed: 0_level_0,last_price,volume,amount
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-05-16 21:00:00,4430.0,70.0,3093500.0
2022-05-16 21:00:01,4430.0,75.0,3315000.0
2022-05-16 21:00:02,4480.0,78.0,3449400.0
2022-05-16 21:00:03,4480.0,78.0,3449400.0
2022-05-16 21:00:04,4480.0,84.0,3718200.0
2022-05-16 21:00:05,4480.0,86.0,3807800.0
2022-05-16 21:00:06,4480.0,87.0,3852600.0
2022-05-16 21:00:07,4480.0,87.0,3852600.0
2022-05-16 21:00:08,4480.0,87.0,3852600.0
2022-05-16 21:00:09,4480.0,87.0,3852600.0


In [16]:
rolmean = df_1s.rolling('5T').last_price.mean()

In [17]:
train_start = '2023-01-01'
train_end = '2023-04-30'
test_start = '2023-05-01'
data_train = rolmean[:train_end]
data_test  = rolmean[test_start:]

In [18]:
data_train = data_train.to_frame()
data_test = data_test.to_frame()

In [19]:
transformer = preprocessing.MinMaxScaler().fit(data_train)

In [20]:
transformer.transform(data_train)

array([[0.82848233],
       [0.82848233],
       [0.84109763],
       ...,
       [0.18737259],
       [0.18737259],
       [0.18737259]])

In [21]:
transformer.transform(data_test)

array([[0.18737259],
       [0.18737259],
       [0.18737259],
       ...,
       [0.11192802],
       [0.11192298],
       [0.11191793]])

In [None]:
forecaster = ForecasterAutoreg(regressor=RandomForestRegressor(random_state=123), lags=6)

forecaster.fit(y=data_train['last_price'])
forecaster

  ("`y` has DatetimeIndex index but no frequency. "
