In [1]:
%load_ext autoreload
%autoreload 2

# Load Packages

In [2]:
import sys
sys.path.append('..')

import matplotlib.pyplot as plt
%matplotlib inline

from numpy_fracdiff import fracdiff
import numpy as np

from statsmodels.tsa.stattools import adfuller
import scipy.optimize

# Load Data

In [3]:
with np.load('data/demo1.npz') as data:
    X = data['px']
    #t = data['t']

In [4]:
X = X[1:]  # chop 01-Jan
len(X)

782

# Example
Transform all $X$ time series with `fracdiff` by the fractal order $d=0.3$.
Truncate at 100 (i.e. chop the first 100 NANs too).

In [5]:
Z = fracdiff(X, order=0.3, truncation=100)
#np.isnan(Z[100:]).sum(axis=0)
Z = Z[100:]

Run the ADF test on all 0.3 fractal differentiated times series

In [6]:
for j in range(4):
    adf, pval, _, _, _, _ = adfuller(Z[:, j], regression='c', autolag='BIC')
    print("p-values: {:5.4f} | ADF: {:>6.3f}".format(pval, adf))

p-values: 0.4009 | ADF: -1.759
p-values: 0.0863 | ADF: -2.633
p-values: 0.0341 | ADF: -3.008
p-values: 0.0000 | ADF: -5.322


# Backtracking
For $d=1$ we usually get a stationary time series transform.
Thus, let start at $d=1$ and reduce towards $d=0$, 
and stop when the p-value exceeds the threshold $\alpha=0.01$.

In [7]:
%%time
x = X[:, 0]  # pick the 1st time series

n_steps = 30
order = 1
n_trunc = 100
alpha = 0.001
bestorder = order

for order in np.flip(np.arange(n_steps) / n_steps):
    z = fracdiff(x, order=order, truncation=n_trunc)
    stat, pval, _, _, crit, _ = adfuller(z[n_trunc:], regression='c', autolag='BIC')
    print("d: {:5.4f} | DF:{: 7.4f} | crit:{: 7.4f} | p-val: {:1.2E}".format(
        order, stat, crit['1%'], pval))
    
    if (stat < crit['1%']) and (pval < alpha):
        bestorder = order
    else:
        #break
        pass
        

print(f"best d={bestorder}")

d: 0.9667 | DF:-28.0859 | crit:-3.4400 | p-val: 0.00E+00
d: 0.9333 | DF:-26.9952 | crit:-3.4400 | p-val: 0.00E+00
d: 0.9000 | DF:-25.8410 | crit:-3.4400 | p-val: 0.00E+00
d: 0.8667 | DF:-16.0098 | crit:-3.4400 | p-val: 6.37E-29
d: 0.8333 | DF:-11.7790 | crit:-3.4400 | p-val: 1.05E-21
d: 0.8000 | DF:-11.0341 | crit:-3.4400 | p-val: 5.57E-20
d: 0.7667 | DF:-10.2660 | crit:-3.4400 | p-val: 4.12E-18
d: 0.7333 | DF:-9.4815 | crit:-3.4400 | p-val: 3.86E-16
d: 0.7000 | DF:-7.4257 | crit:-3.4400 | p-val: 6.56E-11
d: 0.6667 | DF:-6.7656 | crit:-3.4400 | p-val: 2.72E-09
d: 0.6333 | DF:-6.1202 | crit:-3.4400 | p-val: 8.89E-08
d: 0.6000 | DF:-5.4981 | crit:-3.4400 | p-val: 2.10E-06
d: 0.5667 | DF:-3.8975 | crit:-3.4401 | p-val: 2.05E-03
d: 0.5333 | DF:-3.4665 | crit:-3.4401 | p-val: 8.89E-03
d: 0.5000 | DF:-3.0669 | crit:-3.4401 | p-val: 2.91E-02
d: 0.4667 | DF:-2.6991 | crit:-3.4401 | p-val: 7.42E-02
d: 0.4333 | DF:-2.9368 | crit:-3.4400 | p-val: 4.12E-02
d: 0.4000 | DF:-2.5472 | crit:-3.4400 | p

# Bisection
We will use difference between the ADF test p-value and required threshold $\alpha$.
The bisections requires the sign of this differences.

In [8]:
def loss_fn(d: float, alpha: float, x: np.array, n_trunc: int) -> float:
    z = fracdiff(x, order=d, truncation=n_trunc)
    stat, pval, _, _, crit, _ = adfuller(z[n_trunc:], regression='c', autolag='BIC')
    return stat - (crit['1%'] * 1.0001) #+ (alpha - pval)

In [9]:
loss_fn(0, alpha, x, n_trunc), loss_fn(1, alpha, x, n_trunc)

(3.7278096266363696, -25.67897829869691)

Also note, that the `xtol` parameter doesn't need to be super precise.
We will abort if the p-value is 1% away from $\alpha$, i.e. `xtol=alpha*.01`

In [10]:
x = X[:, 0]  # pick the 1st time series
n_trunc = 100
alpha = 0.01

%time d = scipy.optimize.bisect(loss_fn, 0.01, 1.5, args=(alpha, x, n_trunc), xtol=1e-04)
d

CPU times: user 721 ms, sys: 69.9 ms, total: 791 ms
Wall time: 403 ms


0.5311907958984375

The Ridder method is faster than the bisection method.

In [11]:
x = X[:, 0]  # pick the 1st time series
n_trunc = 100
alpha = 0.01

%time d = scipy.optimize.ridder(loss_fn, 0.01, 1.5, args=(alpha, x, n_trunc), xtol=1e-04)
d

CPU times: user 746 ms, sys: 73.2 ms, total: 820 ms
Wall time: 416 ms


0.5312875459658993

In [12]:
x = X[:, 0]  # pick the 1st time series
n_trunc = 100
alpha = 0.01

%time d = scipy.optimize.brenth(loss_fn, 0.01, 1.5, args=(alpha, x, n_trunc), xtol=1e-04)
d

CPU times: user 469 ms, sys: 49.5 ms, total: 519 ms
Wall time: 299 ms


0.5312358466278839

In [13]:
x = X[:, 0]  # pick the 1st time series
n_trunc = 100
alpha = 0.01

%time d = scipy.optimize.brentq(loss_fn, 0.01, 1.5, args=(alpha, x, n_trunc), xtol=1e-04)
d

CPU times: user 425 ms, sys: 46.1 ms, total: 471 ms
Wall time: 247 ms


0.5312233892293179

In [14]:
z = fracdiff(x, order=d, truncation=n_trunc)
stat, pval, _, _, crit, _ = adfuller(z[n_trunc:], regression='c', autolag='BIC')
print("d: {:5.4f} | DF:{: 7.4f} | crit:{: 7.4f} | p-val: {:1.2E}".format(
        d, stat, crit['1%'], pval))

d: 0.5312 | DF:-3.4402 | crit:-3.4401 | p-val: 9.66E-03


# Squared Errors
We will use the squared difference betweent the ADF test p-value and required threshold $\alpha$ as target function for a minimization problem.

$$
\min_d \; ({\rm DF(d) - crit})^2
$$

In [15]:
def loss_fn(d: float, x: np.array, n_trunc: int) -> float:
    z = fracdiff(x, order=d, truncation=n_trunc)
    stat, pval, _, _, crit, _ = adfuller(z[n_trunc:], regression='c', autolag='BIC')
    return (stat - crit['1%'])**2

The newton method is kind of unstable depending on the start value `x0` (e.g. 0.0 and 1.0 will fail)

In [16]:
x = X[:, 0]  # pick the 1st time series
n_trunc = 100
alpha = 0.01

%time d = scipy.optimize.newton(loss_fn, 0.5, args=(x, n_trunc), tol=(alpha*.01)**2, maxiter=500)
d

CPU times: user 1.44 s, sys: 149 ms, total: 1.59 s
Wall time: 874 ms


0.5312097960053145

In [17]:
def loss_fn2(d: float, x: np.array, n_trunc: int) -> float:
    z = fracdiff(x, order=d, truncation=n_trunc)
    stat, pval, _, _, crit, _ = adfuller(z[n_trunc:], regression='c', autolag='BIC')
    return (stat - crit['1%'])**2 + pval**2

In [18]:
x = X[:, 0]  # pick the 1st time series
n_trunc = 100
alpha = 0.01

%time d = scipy.optimize.fminbound(loss_fn, 0.01, 1.5, args=(x, n_trunc), xtol=1e-04, maxfun=200)
print(d)

%time d = scipy.optimize.fminbound(loss_fn2, 0.01, 1.5, args=(x, n_trunc), xtol=1e-04, maxfun=200)
print(d)

CPU times: user 514 ms, sys: 57 ms, total: 571 ms
Wall time: 345 ms
0.5312082297394483
CPU times: user 515 ms, sys: 58.2 ms, total: 574 ms
Wall time: 323 ms
0.5312319300973788


In [19]:
z = fracdiff(x, order=d, truncation=n_trunc)
stat, pval, _, _, crit, _ = adfuller(z[n_trunc:], regression='c', autolag='BIC')
print("d: {:5.4f} | DF:{: 7.4f} | crit:{: 7.4f} | p-val: {:1.2E}".format(
        d, stat, crit['1%'], pval))

d: 0.5312 | DF:-3.4403 | crit:-3.4401 | p-val: 9.66E-03
