In [29]:
from tsai.data.core import TSTensor
from tsai.data.preprocessing  import TSRollingMean
import torch 
import numpy as np
from fastai.test_utils import test_eq

# Basic example without NaNs

In [17]:
def rolling_means_example1():
    arr = [[[
            211, 191, 176, 167, 188, 212, 207, 293, 295, 372, 427, 417, 415, 484, 
            442, 502, 571, 638, 689, 742, 738, 799
          ]]]
    t = TSTensor(arr)
    print(t)
    enc_t = TSRollingMean(sel_vars=[0], window=7)(t)
    print(enc_t.shape)
    print(enc_t.data)
rolling_means_example1()

TSTensor(samples:1, vars:1, len:22, device=cpu, dtype=torch.int64)
torch.Size([1, 2, 22])
tensor([[[211.0000, 191.0000, 176.0000, 167.0000, 188.0000, 212.0000, 207.0000,
          293.0000, 295.0000, 372.0000, 427.0000, 417.0000, 415.0000, 484.0000,
          442.0000, 502.0000, 571.0000, 638.0000, 689.0000, 742.0000, 738.0000,
          799.0000],
         [211.0000, 201.0000, 192.6667, 186.2500, 186.6000, 190.8333, 193.1429,
          204.8571, 219.7143, 247.7143, 284.8571, 317.5714, 346.5714, 386.1429,
          407.4286, 437.0000, 465.4286, 495.5714, 534.4286, 581.1429, 617.4286,
          668.4286]]])


Calcula la media desde max(0, pos - window) hasta pos

In [4]:
(211+191)/2

201.0

In [5]:
(211+191+176)/3

192.66666666666666

In [6]:
(211+191+176+167+188+212+207)/7

193.14285714285714

In [7]:
(191+176+167+188+212+207+293)/7

204.85714285714286

# Same example with some Nans, window = 1

In [30]:
def rolling_means_example2():
    arr = [[[
            211, 191, np.nan, 167, 188, np.nan, 207, 293, 295, 372, 427, 417, 415, 484, 
            442, 502, 571, np.nan, 689, np.nan, 738, 799
          ]]]
    print(arr)
    t = TSTensor(arr)
    print(t.data)
    print(t.shape)
    enc_t = TSRollingMean(sel_vars=[0], window=1)(t)
    print(enc_t.data)
    print(enc_t.shape)
rolling_means_example2()

[[[211, 191, nan, 167, 188, nan, 207, 293, 295, 372, 427, 417, 415, 484, 442, 502, 571, nan, 689, nan, 738, 799]]]
tensor([[[211., 191.,  nan, 167., 188.,  nan, 207., 293., 295., 372., 427.,
          417., 415., 484., 442., 502., 571.,  nan, 689.,  nan, 738., 799.]]])
torch.Size([1, 1, 22])
tensor([[[211., 191., 191., 167., 188., 188., 207., 293., 295., 372., 427.,
          417., 415., 484., 442., 502., 571., 571., 689., 689., 738., 799.],
         [211., 191., 191., 167., 188., 188., 207., 293., 295., 372., 427.,
          417., 415., 484., 442., 502., 571., 571., 689., 689., 738., 799.]]])
torch.Size([1, 2, 22])


See that nans have been filled up by the inmediate previous numeric value. The same happens if we come back to window = 7. 

In [31]:
def rolling_means_example3():
    arr = [[[
            211, 191, np.nan, 167, 188, np.nan, 207, 293, 295, 372, 427, 417, 415, 484, 
            442, 502, 571, np.nan, 689, np.nan, 738, 799
          ]]]
    print(arr)
    t = TSTensor(arr)
    print(t.data)
    print(t.shape)
    enc_t = TSRollingMean(sel_vars=[0], window=7)(t)
    print(enc_t.data)
    print(enc_t.shape)
rolling_means_example3()

[[[211, 191, nan, 167, 188, nan, 207, 293, 295, 372, 427, 417, 415, 484, 442, 502, 571, nan, 689, nan, 738, 799]]]
tensor([[[211., 191.,  nan, 167., 188.,  nan, 207., 293., 295., 372., 427.,
          417., 415., 484., 442., 502., 571.,  nan, 689.,  nan, 738., 799.]]])
torch.Size([1, 1, 22])
tensor([[[211.0000, 191.0000, 191.0000, 167.0000, 188.0000, 188.0000, 207.0000,
          293.0000, 295.0000, 372.0000, 427.0000, 417.0000, 415.0000, 484.0000,
          442.0000, 502.0000, 571.0000, 571.0000, 689.0000, 689.0000, 738.0000,
          799.0000],
         [211.0000, 201.0000, 197.6667, 190.0000, 189.6000, 189.3333, 191.8571,
          203.5714, 218.4286, 244.2857, 281.4286, 314.1429, 346.5714, 386.1429,
          407.4286, 437.0000, 465.4286, 486.0000, 524.8571, 564.0000, 600.2857,
          651.2857]]])
torch.Size([1, 2, 22])


## Ejemplo de la página web de TSAI
> Observa que si lo que es Nan son los primeros valores, se rellenan en función del valor numérico inmediatamente posterior. 

In [34]:
def tsai_example_1():        
    bs, c_in, seq_len = 1,3,8
    t = TSTensor(torch.rand(bs, c_in, seq_len))
    t[t > .6] = np.nan
    print(t.data)
    print(t.shape)
    print("--> Rolling mean vars 0,2 | w = 3 <--")
    enc_t = TSRollingMean(sel_vars=[0,2], window=3)(t)
    print(enc_t.shape)
    print(enc_t.data)
    
tsai_example_1()

tensor([[[   nan,    nan, 0.0100, 0.3162, 0.1992, 0.1408,    nan, 0.3045],
         [0.0657, 0.0416,    nan,    nan,    nan, 0.1085, 0.3925, 0.5477],
         [   nan, 0.4750,    nan,    nan, 0.5741,    nan, 0.5674, 0.3548]]])
torch.Size([1, 3, 8])
--> Rolling mean vars 0,2 | w = 3 <--
torch.Size([1, 5, 8])
tensor([[[0.0100, 0.0100, 0.0100, 0.3162, 0.1992, 0.1408, 0.1408, 0.3045],
         [0.0657, 0.0416,    nan,    nan,    nan, 0.1085, 0.3925, 0.5477],
         [0.4750, 0.4750, 0.4750, 0.4750, 0.5741, 0.5741, 0.5674, 0.3548],
         [0.0100, 0.0100, 0.0100, 0.1121, 0.1751, 0.2187, 0.1603, 0.1954],
         [0.4750, 0.4750, 0.4750, 0.4750, 0.5080, 0.5411, 0.5719, 0.4988]]])


In [36]:
(0.3162+0.01+0.01)/3

0.11206666666666666

Observa que, por defecto, replace = False y se añade una variable extra por cada rolling mean calculada. Si quieres sustituir la variable original por la media en cuestión, basta con poner replace a True

In [38]:
def tsai_example_2():        
    bs, c_in, seq_len = 1,3,8
    t = TSTensor(torch.rand(bs, c_in, seq_len))
    t[t > .6] = np.nan
    print(t.data)
    print(t.shape)

    print("--> Rolling mean vars default | w = 3 | replace = True => no debe añadir más columnas <--")
    enc_t = TSRollingMean(window=3, replace=True)(t)
    test_eq(enc_t.shape[1], 3)
    print(enc_t.data)

tsai_example_2()

tensor([[[0.1753,    nan,    nan, 0.1087,    nan, 0.5195,    nan,    nan],
         [   nan,    nan, 0.2085,    nan, 0.2067,    nan, 0.1996, 0.4179],
         [0.0807,    nan, 0.5955,    nan,    nan, 0.0918, 0.1510, 0.2600]]])
torch.Size([1, 3, 8])
--> Rolling mean vars default | w = 3 | replace = True => no debe añadir más columnas <--
tensor([[[0.1753, 0.1753, 0.1753, 0.1531, 0.1309, 0.2456, 0.3826, 0.5195],
         [0.2085, 0.2085, 0.2085, 0.2085, 0.2079, 0.2073, 0.2043, 0.2747],
         [0.0807, 0.0807, 0.2523, 0.4239, 0.5955, 0.4276, 0.2794, 0.1676]]])
