In [22]:
import numpy as np
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

series = pd.read_csv('../dataset/chattahoochee_1hr_02336490.csv', usecols=['DATE', 'gaze_height'], header=0, index_col='DATE')
series = series[:10000]

X = series.values
size = int(len(X) * 0.66)
train, test = X[0:size], X[size:len(X)]

batch_size = 1000  # Number of steps to predict in each batch
predictions = []

# Initial model fitting
model = ARIMA(train, order=(5, 1, 0))
model_fit = model.fit()

# Walk-forward validation with batch forecasting
for i in range(0, len(test), batch_size):
    # Forecast a batch of steps
    end_index = min(i + batch_size, len(test))
    prediction = model_fit.forecast(steps=end_index - i)
    
    # Store predictions
    predictions.extend(prediction)
    
    # Update the model with new data
    history = np.concatenate([train, test[:end_index]])
    model = ARIMA(history, order=(5,1,0))
    model_fit = model.fit()

# Evaluate performance
error = mean_squared_error(test, predictions[:len(test)])
nse_val = NSE(predictions[:len(test)], test.flatten())
print(f'Test RMSE: {error:.3f}')
print(f'Test NSE: {nse_val:.3f}')


[np.float64(4.827291383215597), np.float64(5.126561779209142), np.float64(5.305024302204709), np.float64(5.390983235554284), np.float64(5.409460800051719), np.float64(5.386113602631058), np.float64(5.345001484773385), np.float64(5.303186357174065), np.float64(5.269674772529316), np.float64(5.247462482589221), np.float64(5.235874247910829), np.float64(5.232386398436852), np.float64(5.233967995044298), np.float64(5.237941316243403), np.float64(5.242377465793704), np.float64(5.246147544715476), np.float64(5.248783204297472), np.float64(5.250266801908838), np.float64(5.2508263896647245), np.float64(5.250774882344719)]
[4.8  5.04 5.14 5.17 5.13 5.04 4.96 4.89 4.79 4.64 4.54 4.46 4.41 4.33
 4.24 4.13 4.04 3.98 3.94 3.9 ]
Test RMSE: 2.765
Test NSE: -0.051


In [20]:
def NSE(y_pred, y):
    print(y_pred[:20])
    print(y[:20])
    return (1-(np.sum((y_pred-y)**2)/np.sum((y-np.mean(y))**2)))