In [1]:
import sys
sys.path.append('..')
from lagmat import lagmat, rmnanrow, chopnan

import numpy as np

Create some random variable

In [2]:
y = np.random.normal(size=(7,1)) * 5 + 75

Create lagged variables

In [3]:
x = lagmat(y, lags=[1,2,3])
x

array([[        nan,         nan,         nan],
       [76.42676888,         nan,         nan],
       [71.96466941, 76.42676888,         nan],
       [69.66094866, 71.96466941, 76.42676888],
       [78.22200481, 69.66094866, 71.96466941],
       [71.79094799, 78.22200481, 69.66094866],
       [71.6029272 , 71.79094799, 78.22200481]])

## Remove rows with NaNs
The function `rmnanrow` removes all rows with `NaN` values.
The function will throw a warning because this trimming approach is not safe!

In [4]:
rmnanrow(x)

  "This function 'rmnanrow' is for demonstration purpose only. "


array([[69.66094866, 71.96466941, 76.42676888],
       [78.22200481, 69.66094866, 71.96466941],
       [71.79094799, 78.22200481, 69.66094866],
       [71.6029272 , 71.79094799, 78.22200481]])

In case of missing values, `rmnanrow` would also delete observations between

In [5]:
y2 = y.copy()
y2[4,0] = np.nan
x2 = lagmat(y2, lags=[1,2,3])
x2

array([[        nan,         nan,         nan],
       [76.42676888,         nan,         nan],
       [71.96466941, 76.42676888,         nan],
       [69.66094866, 71.96466941, 76.42676888],
       [78.22200481, 69.66094866, 71.96466941],
       [        nan, 78.22200481, 69.66094866],
       [71.6029272 ,         nan, 78.22200481]])

In [6]:
rmnanrow(x2)

array([[69.66094866, 71.96466941, 76.42676888],
       [78.22200481, 69.66094866, 71.96466941]])

## Trim according to the lags
`chopnan` will not do anything "automagically".
You need to provide the applied lags, e.g. `lags=[1,2,3]` or `nchop=max(lags)`

In [7]:
try:
    chopnan(x)
except Exception as e:
    print("Error Message: ", e)

Error Message:  No lags=[1,2,..] nor nchop=? has been provided


The lags parameter will use `nchop=max(lags)` internally.

In [8]:
chopnan(x, lags=[1,2,3])

array([[69.66094866, 71.96466941, 76.42676888],
       [78.22200481, 69.66094866, 71.96466941],
       [71.79094799, 78.22200481, 69.66094866],
       [71.6029272 , 71.79094799, 78.22200481]])

You can also chop less rows, e.g.

In [9]:
chopnan(x, nchop=2)

array([[71.96466941, 76.42676888,         nan],
       [69.66094866, 71.96466941, 76.42676888],
       [78.22200481, 69.66094866, 71.96466941],
       [71.79094799, 78.22200481, 69.66094866],
       [71.6029272 , 71.79094799, 78.22200481]])

Or more rows

In [10]:
chopnan(x, nchop=4)

array([[78.22200481, 69.66094866, 71.96466941],
       [71.79094799, 78.22200481, 69.66094866],
       [71.6029272 , 71.79094799, 78.22200481]])