In [184]:
import os
import datetime
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
from scipy.stats import t
from scipy.ndimage.filters import uniform_filter1d
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import ElasticNet
from sklearn.svm import SVR
from sklearn.feature_selection import RFE

In [2]:
log_pr = pd.read_pickle("./log_price.df")
volu = pd.read_pickle("./volume_usd.df")

In [126]:
volu = volu/1e10

In [3]:
# moving averages
def mov_avg(A, B, window_length=60):
    # Input: two 1440 x 1 numpy arrays
    # Output: two 288 x 1 numpy arrays
    
    pr_avg = uniform_filter1d(A, size=window_length, mode='nearest')[::30]
    vo_avg = uniform_filter1d(B, size=window_length, mode='nearest')[::30]

    return pr_avg[-10:], vo_avg[-5:]

In [4]:
# def MACD(A):
#     # Input: 1440 x 1 DataFrame
#     # Output: two 1440 x 1 numpy array
#     df = np.exp(A)
        
#     # Get the 26-day EMA of the closing price
#     k = df.ewm(span=26, adjust=False, min_periods=26).mean()
#     # Get the 12-day EMA of the closing price
#     d = df.ewm(span=12, adjust=False, min_periods=12).mean()

#     # Subtract the 26-day EMA from the 12-Day EMA to get the MACD
#     macd = k - d

#     # Get the 9-Day EMA of the MACD for the Trigger line
#     macd_s = np.array(macd.ewm(span=9, adjust=False, min_periods=9).mean())

#     # Calculate the difference between the MACD - Trigger for the Convergence/Divergence value
#     #macd_h = macd - macd_s
    
#     return np.array(macd), macd_s #, macd_h
def MACD(A):
    # Input: 1440 x 1 DataFrame
    # Output: two 1440 x 1 numpy array
    A = np.exp(A)
        
    # Get the 26-day EMA of the closing price
    k = np_ewma_vectorized(A, 26)
    # Get the 12-day EMA of the closing price
    d = np_ewma_vectorized(A, 12)

    # Subtract the 26-day EMA from the 12-Day EMA to get the MACD
    macd = k - d

    #Get the 9-Day EMA of the MACD for the Trigger line
    macd_s = np.array(np_ewma_vectorized(macd, 9))

    # Calculate the difference between the MACD - Trigger for the Convergence/Divergence value
    #macd_h = macd - macd_s
    
    return k[-10:], d[-10:], np.array(macd)[-10:]#, macd_s#, macd_h

In [5]:
def np_ewma_vectorized(data, window, method = "WMS"):

    if method == "WMS":
        alpha = 1 / window
    elif method == "EMA":
        alpha = 2 / (window + 1.0)
    alpha_rev = 1 - alpha
    n = data.shape[0]

    pows = alpha_rev**(np.arange(n+1))

    scale_arr = 1 / pows[:-1]
    offset = data[0] * pows[1:]
    pw0 = alpha * alpha_rev**(n-1)

    mult = data * pw0 * scale_arr
    cumsums = mult.cumsum()
    out = offset + cumsums * scale_arr[::-1]
    return out

In [6]:
def RSI_np(A, window_length=14, method="WMS"):
    """
    Calculate RSI
    A: numpy array of log price
    method : "SMA": simple moving average,
            "WMS": Wilder Smoothing Method,
            "EMA": exponential moving average
    
    Return RSI for last three periods
    """
    # transform log-price to price
    A = np.exp(A)
    tmp = np.diff(A)

    gain = np.clip(tmp, a_min = 0, a_max = None)
    loss = np.abs(np.clip(tmp, a_min = None, a_max = 0))

    if method == "WMS":
        avg_gain = np_ewma_vectorized(gain, window_length)[-10:]
        avg_loss = np_ewma_vectorized(loss, window_length)[-10:]
    else:
        avg_gain = np_ewma_vectorized(gain, window_length, method = "EMA")[-10:]
        avg_loss = np_ewma_vectorized(loss, window_length, method = "EMA")[-10:]
    
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi[-5:]

In [7]:
def volChanges(B, window_length=30):
    vol0 = np.mean(B[-30:])
    vol1 = np.mean(B[-60:-30])
    vol2 = np.mean(B[-90:-60])
    vol3 = np.mean(B[-120:-90])
    vol4 = np.mean(B[-180:-120])
    
    return np.array([vol3-vol4, vol2-vol3, vol1-vol2, vol0-vol1, vol0])

In [8]:
def getVolRatios(vol, volWindow=[10, 30, 60]):  
    # Input: 1440 x 1 numpy array
    # the window here is in minutes
    # Output: 3 dim numpy array
    
    return np.array([sum(vol[-win:]) for win in volWindow]) / sum(vol)

In [9]:
def priceVolCor(A, B, time=[1440, 720, 360]):
    # Input: two 1440 x 1 numpy arrays
    # Output: 3 dim numpy array
    
    pv_cor = A
    temp = np.hstack((A, B))
    pv_cor = [np.corrcoef(temp[-t:]) for t in time]
    return np.array(pv_cor)

In [10]:
# z-score of log-price
def zScorePr(A):
    # Input: 1440 x 1 numpy array
    # Output: 3 dim numpy array
    # the moving average log-price of 30min, 1h, and 3h
    
    # moving averages of 30 minutes
    pr_avg_0 = uniform_filter1d(A, size=30, mode='nearest')
    
    # 1 hour (60 minutes)
    pr_avg_1 = uniform_filter1d(A, size=60, mode='nearest')
    
    # 2 hour (120 minutes)
    pr_avg_2 = uniform_filter1d(A, size=120, mode='nearest')
    
    # 3 hours (180 minutes)
    pr_avg_3 = uniform_filter1d(A, size=180, mode='nearest')
    
    z0 = (A[-1] - pr_avg_0[-1]) / np.std(pr_avg_0)
    z1 = (A[-1] - pr_avg_1[-1]) / np.std(pr_avg_1)
    z2 = (A[-1] - pr_avg_2[-1]) / np.std(pr_avg_2)
    z3 = (A[-1] - pr_avg_3[-1]) / np.std(pr_avg_3)
    return np.array([z0, z1, z2, z3])

In [11]:
def neg30logr(A,window_length=10):
    logreturn = -np.diff(A,30)
    pr_avg = uniform_filter1d(logreturn, size=window_length, mode='nearest')[::30]
    return pr_avg[-5:]

In [12]:
def get_features(A, B):
    m1, m2 = mov_avg(A, B) #10, 5
    macd1, _, macd = MACD(A[::30]) #10,10
    RSI1 = RSI_np(A[::30]) #5
    RSI2 = RSI_np(A[::30],method="EMA") #5
    #print(len(m1), len(m2))
    #print(len(macd1), len(macd))
    #print(np.shape(RSI_np(A[::30])))
    return np.hstack((
        m1, m2, #10, 5
        macd1, macd, #10,10
        RSI1,RSI2, #5,5
        volChanges(B), #5
        getVolRatios(B), #3
#         priceVolCor(A, B),
#         zScorePr(A), 
        neg30logr(A) #5
    )).reshape((1, -1))

In [13]:
f = get_features(log_pr.iloc[:1440,0], volu.iloc[:1440,0])
p = np.shape(f)[1]
print(len(log_pr.iloc[0:1440,0]), np.shape(f))

1440 (1, 58)


In [14]:

# test_res = []
# selected_rank2 = []

# pred = []
# for asset in range(10):
# #     t0 = time.time()

#     fs = get_features(log_pr.iloc[:1440, asset], volu.iloc[:1440, asset])
#     y = log_pr.iloc[1440+29, asset] - log_pr.iloc[1440-1, asset]

#     d = 10

#     for t in range(1440*162 - 30)[d::d]: # compute the predictions every 10 minutes
#         f = get_features(log_pr.iloc[t:(t+1440), asset], volu.iloc[t:(t+1440), asset])
#         fs = np.vstack((fs, f))
#         y = np.vstack((y, log_pr.iloc[t+1440+29, asset] - log_pr.iloc[t+1440-1, asset]))

#     #t_used = time.time() - t0
#     #print(t_used, np.shape(fs), np.shape(y))
    
#     ftest = get_features(log_pr.iloc[10:1450, asset], volu.iloc[10:1450, asset])
#     ytest = log_pr.iloc[1450+29, asset] - log_pr.iloc[1450-1, asset]

#     ytest = []
#     for t in range(264960-1470)[1440*163 - 30::d]: # compute the predictions every 10 minutes
#         f = get_features(log_pr.iloc[t:(t+1440), asset], volu.iloc[t:(t+1440), asset])
#         ftest = np.vstack((ftest, f))
#         ytest = np.append(ytest, log_pr.iloc[t+1440+29, asset] - log_pr.iloc[t+1440-1, asset])
    
#     #glmnet
#     cv_model = glmnet.cv_glmnet(fs, y)
#     #find optimal lambda value that minimizes test MSE
#     lambda_cv = cv_model[-3]
#     cv_model = glmnet.glmnet(fs, y, lambda_=lambda_cv)
    
# #     with open('./model_{}.pkl'.format(asset),'wb') as f:
# #         pickle.dump(cv_model,f)

#     pred = np.append(pred,robjects.r.predict(cv_model,newx=ftest)[:,-1])
# np.corrcoef(pred[:,0], ytest[:,0])[0,1]
# #     t_used = time.time() - t0
# #     print(t_used, np.shape(ftest), np.shape(ytest))
    
# #     model = LinearRegression()
# #     model.fit(fs, y)

# #     pred = model.predict(ftest)
    
# #     test_res.append(np.corrcoef(pred[:,0], ytest[:,0])[0,1])
# #     #with open('./model_{}.pkl'.format(asset),'wb') as f:
# #     #    pickle.dump(model,f)
    
# #     select = RFE(model, n_features_to_select=40, step=1).fit(fs, y)
# #     #selected.append([i for i in range(p) if select.support_[i]])
# #     selected_rank2.append(select.ranking_)

In [15]:
# test_res = []
# selected_rank2 = []

# asset = 2
# t0 = time.time()

# fs = get_features(log_pr.iloc[:1440, asset], volu.iloc[:1440, asset])
# y = log_pr.iloc[1440+29, asset] - log_pr.iloc[1440-1, asset]

# d = 10

# for t in range(1440*162 - 30)[d::d]: # compute the predictions every 10 minutes
#     f = get_features(log_pr.iloc[t:(t+1440), asset], volu.iloc[t:(t+1440), asset])
#     fs = np.vstack((fs, f))
#     y = np.vstack((y, log_pr.iloc[t+1440+30, asset] - log_pr.iloc[t+1440, asset]))

# #t_used = time.time() - t0
# #print(t_used, np.shape(fs), np.shape(y))

# ftest = get_features(log_pr.iloc[10:1450, asset], volu.iloc[10:1450, asset])
# ytest = log_pr.iloc[1450+30, asset] - log_pr.iloc[1450, asset]

# d = 10

# for t in range(264960-1470)[1440*163 - 30::d]: # compute the predictions every 10 minutes
#     f = get_features(log_pr.iloc[t:(t+1440), asset], volu.iloc[t:(t+1440), asset])
#     ftest = np.vstack((ftest, f))
#     ytest = np.vstack((ytest, log_pr.iloc[t+1440+30, asset] - log_pr.iloc[t+1440, asset]))

# t_used = time.time() - t0
# print(t_used, np.shape(ftest), np.shape(ytest))



In [16]:
# #linear model
# model = LinearRegression()
# model.fit(fs, y)

# pred = model.predict(ftest)
# np.corrcoef(pred[:,0], ytest[:,0])[0,1]

In [17]:
# import rpy2
# from rpy2.robjects.packages import importr
# import rpy2.robjects.packages as rpackages
# import rpy2.robjects as robjects
# glmnet = rpackages.importr('glmnet')
# base = importr("base")
# from rpy2.robjects import pandas2ri

# # Convert pandas.DataFrames to R dataframes automatically.
# pandas2ri.activate()
# #cv_model = glmnet.cv_glmnet(fs, y)
# #find optimal lambda value that minimizes test MSE
# #lambda_cv = cv_model[-3]
# cv_model = glmnet.glmnet(fs, y)
#                          #, lambda_=lambda_cv)
# pred = robjects.r.predict(cv_model,newx=ftest)
# [np.corrcoef(pred[:,i], ytest[:,0])[0,1] for i in range(100)]

# Try Out Modeling

In [18]:
def mystandardize(D):
    S = np.std(D, axis=0, ddof=1)
    M = np.mean(D, axis = 0)
    D_norm = (D-M)/S
    return [D_norm, M, S]

In [198]:
asset = 2
fs = pd.read_pickle("feature"+str(asset)+".df")
y = pd.read_pickle("y"+str(asset)+".df")
vol_list = [10,11,12,13,14,45,46,47,48,49,53,54,55,56,57]
fs.iloc[:,vol_list] /= 1e8
fs_train = fs.iloc[:144*121-3,:]
fs_test = fs.iloc[144*121-3:,:]
y_train = y.iloc[:144*121-3,:]
y_test = y.iloc[144*121-3:,:]

In [None]:
Y_norm_train = pd.DataFrame(mystandardize((y_train-y_train.shift(3)).dropna())[0])
for alpha in [1e-6,1e-5,1e-4,1e-3]:
#np.linspace(1e-6, 1e-3, num=5):
    model = ElasticNet(l1_ratio = 1,alpha=alpha, 
                              fit_intercept = True, normalize = False, 
                         tol=0.0000001, max_iter = 100000)
    model.fit(fs_train.shift(3).dropna(), Y_norm_train)
    print(alpha)
#    print(model.alpha_)
#    print(model.l1_ratio_)
    print(model.coef_)

    pred = model.predict(fs_test)
    # np.corrcoef((pred-y_test.iloc[:,0].shift(3)).dropna(),\
    #             (y_test.iloc[:,0]-y_test.iloc[:,0].shift(3)).dropna())[0,1]
    print(np.corrcoef(pd.DataFrame(pred).shift(3).dropna().iloc[:,0],\
                 (y_test-y_test.shift(3)).dropna().iloc[:,0])[0,1])
    

  model = cd_fast.enet_coordinate_descent(


1e-06
[-1.64121598e+00  8.18835279e+00 -1.23593671e+01  4.74823498e+00
  1.51080711e+00 -4.12154906e+00 -3.81049242e+00  4.97850126e+00
  1.18076638e+01 -9.03785747e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00 -1.19303412e-01
  0.00000000e+00 -7.26770887e-03 -9.08767199e-03  0.00000000e+00
 -7.74369733e-03 -0.00000000e+00  0.00000000e+00 -0.00000000e+00
 -0.00000000e+00  8.22416053e+00 -2.70247171e+00  0.00000000e+00
  1.42914642e+01 -2.24885163e+01  1.63951209e+00  0.00000000e+00
 -7.11086783e+00  0.00000000e+00  9.54299078e+00  1.59392051e-02
 -1.10343664e-02 -1.73749551e-02  3.44130426e-03  1.07533287e-02
 -9.26460966e-03  7.62305120e-03  7.95414765e-03 -4.27065317e-03
 -4.42038534e-03  0.00000000e+00 -0.00000000e+00  0.00000000e+00
 -0.00000000e+00  0.00000000e+00 -0.00000000e+00 -7.34558011e-01
  1.81117437e-01  6.54490209e+00 -0.00000000e+00  7.56399719e+00
 -2.34147509e+00  0.00000000e+00]
0.02732997408114489
0.00025075000000000005
[ 0.000



# Arima Model

In [21]:
asset = 9
fs = pd.read_pickle("feature"+str(asset)+".df")
y = pd.read_pickle("y"+str(asset)+".df")
fs_train = fs.iloc[:23325,:]
fs_test = fs.iloc[23325:,:]
y_train = y.iloc[:23325,:]
y_test = y.iloc[23325:,:]

In [99]:
y_train = log_pr.iloc[29::30,0]

In [100]:
from pmdarima.arima.utils import ndiffs
## Adf Test
print(ndiffs(y_train, test='adf'))  # 1

# KPSS test
print(ndiffs(y_train, test='kpss'))  # 1

# PP test:
print(ndiffs(y_train, test='pp'))  # 1

1
2
1


In [52]:
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
model = ARIMA(y_train, order=(1,1,1))
model_fit = model.fit()
print(model_fit.summary())

                               SARIMAX Results                                
Dep. Variable:                      0   No. Observations:                23325
Model:                 ARIMA(1, 1, 1)   Log Likelihood              116740.996
Date:                Thu, 14 Apr 2022   AIC                        -233475.993
Time:                        22:04:42   BIC                        -233451.821
Sample:                             0   HQIC                       -233468.143
                              - 23325                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.0446      0.020      2.216      0.027       0.005       0.084
ma.L1          0.0551      0.021      2.688      0.007       0.015       0.095
sigma2      2.618e-06   3.59e-09    729.886      0.0

In [56]:
pred = model_fit.forecast(steps=3)
print(pred)
#np.corrcoef(pred, y_train.iloc[:,0])[0,1]

23325    0.005246
23326    0.005260
23327    0.005261
Name: predicted_mean, dtype: float64


In [58]:
y.iloc[23324:23325+3,:]

Unnamed: 0,0
23324,0.004917
23325,0.006584
23326,0.007966
23327,0.00955


In [46]:
model = ARIMA(y.iloc[23325-144:23325,:], order=(1,1,1))
model_fit = model.fit()
pred = model_fit.predict(step=3)
np.corrcoef(pred, y.iloc[23325+3-144:23325+3,0])[0,1]

  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


0.4519243724646293

In [127]:
# construct data
for asset in range(1,10):
#    t0 = time.time()

    fs = get_features(log_pr.iloc[14400:(1440 * 11), asset], volu.iloc[14400:(1440 * 11), asset])
    y = np.mean(log_pr.iloc[(1440 * 11+27):(1440 * 11+33), asset])

    d = 10

    for t in range(1440 * 0, 264960 - 1470, d): 
        f = get_features(log_pr.iloc[t:(t+1440), asset], volu.iloc[t:(t+1440), asset])
        fs = np.vstack((fs, f))
        y = np.vstack((
            y, 
            np.mean(log_pr.iloc[(t+1440+27):(t+1440+33), asset])
        ))
    pd.DataFrame(fs).to_pickle("feature"+str(asset)+".df")
    pd.DataFrame(y).to_pickle("y"+str(asset)+".df")

#     Y_norm_train = pd.DataFrame(mystandardize(y[:,0])[0])
#     model = ElasticNetCV(alphas=np.linspace(1000, 5e5, num=100), l1_ratio = 1,
#                               fit_intercept = True, normalize = False, tol=0.0000001, max_iter = 100000)
#     model.fit(fs, Y_norm_train)

#     with open('./model_{}.pkl'.format(asset),'wb') as f:
#         pickle.dump(model,f)
    

# Train Model for Submission Model 

In [None]:
# train the models for submission with Cluster

### group 1
for asset in [0,3,5,6]:
    fs = pd.read_pickle("feature"+str(asset)+".df")
    y = pd.read_pickle("y"+str(asset)+".df")

    Y_norm_train = pd.DataFrame(mystandardize(y)[0])
    model = ElasticNetCV(l1_ratio = 1,
                         #alphas=np.linspace(1000, 5e5, num=100), 
                              fit_intercept = True, normalize = False, 
                         tol=0.0000001, max_iter = 100000)
    model.fit(fs, Y_norm_train)

    with open('./model_{}.pkl'.format(asset),'wb') as f:
        pickle.dump(model,f)
        
        
### group 2
for asset in [1,4,7]:
    fs = pd.read_pickle("feature"+str(asset)+".df")
    y = pd.read_pickle("y"+str(asset)+".df")

    Y_norm_train = pd.DataFrame(mystandardize(y)[0])
    model = ElasticNetCV(l1_ratio = 1,
                         alphas=np.linspace(10, 100, num=100), 
                              fit_intercept = True, normalize = False, 
                         tol=0.0000001, max_iter = 100000)
    model.fit(fs, Y_norm_train)

    with open('./model_{}.pkl'.format(asset),'wb') as f:
        pickle.dump(model,f)
        
### group 3
for asset in [2]:
    fs = pd.read_pickle("feature"+str(asset)+".df")
    y = pd.read_pickle("y"+str(asset)+".df")

    Y_norm_train = pd.DataFrame(mystandardize(y)[0])
    model = ElasticNetCV(l1_ratio = 0.5,
                         alphas=np.linspace(1000, 5e5, num=100), 
                              fit_intercept = True, normalize = False, 
                         tol=0.0000001, max_iter = 100000)
    model.fit(fs, Y_norm_train)

    with open('./model_{}.pkl'.format(asset),'wb') as f:
        pickle.dump(model,f)

### group 4
for asset in [8,9]:
    fs = pd.read_pickle("feature"+str(asset)+".df")
    y = pd.read_pickle("y"+str(asset)+".df")

    Y_norm_train = pd.DataFrame(mystandardize(y)[0])
    model = ElasticNetCV(l1_ratio = 0.1,
                         alphas=np.linspace(100, 1000, num=100), 
                              fit_intercept = True, normalize = False, 
                         tol=0.0000001, max_iter = 100000)
    model.fit(fs, Y_norm_train)

    with open('./model_{}.pkl'.format(asset),'wb') as f:
        pickle.dump(model,f)
    

In [105]:
# MODELS = []

# for i in range(0,10):
#     with open('model_{}.pkl'.format(i), 'rb') as f:
#         model = pickle.load(f)
#         MODELS.append(model)
        
def get_r_hat(A, B): 
    """
        A: 1440-by-10 dataframe of log prices with columns log_pr_0, ... , log_pr_9
        B: 1440-by-10 dataframe of trading volumes with columns volu_0, ... , volu_9    
        return: a numpy array of length 10, 
            corresponding to the predictions for the forward 30-minutes returns of assets 0, 1, 2, ..., 9
    """
    answer = []
    df = A.iloc[29::30,:]
    # asset 0
    asset = 0
    model = ARIMA(df[asset], order=(1,2,1))
    model_fit = model.fit()
    pred = model_fit.forecast(steps=1) - A.iloc[-1, asset]
    answer.append(pred)
        
    for asset in range(1,10):
        
#         f = get_features(np.array(A)[:, asset], np.array(B)[:, asset])
        
#         pred = MODELS[asset].predict(f).reshape(-1, 1)- A.iloc[-1, asset]
        model = ARIMA(df[asset], order=(1,1,1))
        model_fit = model.fit()
        pred = model_fit.forecast(steps=1) - A.iloc[-1, asset]
        answer.append(pred)
    
    answer = np.array(answer).reshape(10)
    return answer

In [111]:
def get_r_hat(A, B): 
    """
        A: 1440-by-10 dataframe of log prices with columns log_pr_0, ... , log_pr_9
        B: 1440-by-10 dataframe of trading volumes with columns volu_0, ... , volu_9    
        return: a numpy array of length 10, 
            corresponding to the predictions for the forward 30-minutes returns of assets 0, 1, 2, ..., 9
    """
    answer = []
    # asset 0
    asset = 0
    model = ARIMA(A[asset], order=(1,2,1))
    model_fit = model.fit()
    pred = model_fit.forecast(steps=30)[-1] - A.iloc[-1, asset]
    print(model_fit.forecast(steps=30)[-1])
    answer.append(pred)
        
    for asset in range(1,10):
        
#         f = get_features(np.array(A)[:, asset], np.array(B)[:, asset])
        
#         pred = MODELS[asset].predict(f).reshape(-1, 1)- A.iloc[-1, asset]
        model = ARIMA(A[asset], order=(1,1,1))
        model_fit = model.fit()
        pred = model_fit.forecast(steps=30)[-1] - A.iloc[-1, asset]
        answer.append(pred)
    
    answer = np.array(answer).reshape(10)
    return answer

In [112]:
np.corrcoef(get_r_hat(log_pr.iloc[1440:1440*2,:],volu.iloc[1440:1440*2,:]),log_pr.iloc[1440*2+30,:])

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


-0.00503879349113658


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


array([[ 1.        , -0.03668557],
       [-0.03668557,  1.        ]])

In [104]:
A = log_pr.iloc[:1440,:]
A.iloc[29::30,:][0]

timestamp
2021-07-01 00:29:00   -0.001411
2021-07-01 00:59:00   -0.009531
2021-07-01 01:29:00   -0.012284
2021-07-01 01:59:00   -0.008864
2021-07-01 02:29:00   -0.008747
2021-07-01 02:59:00   -0.010647
2021-07-01 03:29:00   -0.004762
2021-07-01 03:59:00    0.001056
2021-07-01 04:29:00    0.001116
2021-07-01 04:59:00   -0.000264
2021-07-01 05:29:00   -0.002318
2021-07-01 05:59:00   -0.001721
2021-07-01 06:29:00    0.001954
2021-07-01 06:59:00    0.004308
2021-07-01 07:29:00    0.004084
2021-07-01 07:59:00    0.007866
2021-07-01 08:29:00   -0.000171
2021-07-01 08:59:00   -0.003883
2021-07-01 09:29:00   -0.000983
2021-07-01 09:59:00   -0.001637
2021-07-01 10:29:00    0.001903
2021-07-01 10:59:00    0.002995
2021-07-01 11:29:00    0.000836
2021-07-01 11:59:00    0.004060
2021-07-01 12:29:00    0.003237
2021-07-01 12:59:00    0.001190
2021-07-01 13:29:00    0.000952
2021-07-01 13:59:00    0.001787
2021-07-01 14:29:00    0.001455
2021-07-01 14:59:00    0.003093
2021-07-01 15:29:00   -0.00161