# Confusing Pandas Rolling
Rolling gives different rolling results when used different slicings from same DF.

In [1]:
import pandas as pd
import numpy as np

* Generate a DF: df1
* Slice a DF from df1: df2

In [2]:
df1 = pd.DataFrame([np.random.randn(1000),np.random.randn(1000)]).T
df1.columns=['a','b']
df1.iloc[100:500]

Unnamed: 0,a,b
100,1.830240,1.150862
101,-0.048248,-1.592608
102,-0.614654,0.787705
103,0.602816,1.737902
104,-0.467786,-0.859816
...,...,...
495,0.766202,-0.562597
496,0.787944,-0.976230
497,0.789858,-1.632839
498,-0.891867,0.125600


In [3]:
df2 = df1.iloc[100:500]
df2

Unnamed: 0,a,b
100,1.830240,1.150862
101,-0.048248,-1.592608
102,-0.614654,0.787705
103,0.602816,1.737902
104,-0.467786,-0.859816
...,...,...
495,0.766202,-0.562597
496,0.787944,-0.976230
497,0.789858,-1.632839
498,-0.891867,0.125600


## Look at Cov between col a and col b from df1 and df2

In [4]:
rst1 = df1.a.rolling(100).cov(df1.b)
rst1.iloc[100:500]

100   -0.032550
101   -0.038989
102   -0.043377
103   -0.030175
104   -0.052097
         ...   
495    0.012813
496    0.008416
497   -0.003164
498   -0.006308
499   -0.010771
Length: 400, dtype: float64

In [5]:
rst2 = df2.a.rolling(100).cov(df2.b)
rst2

100         NaN
101         NaN
102         NaN
103         NaN
104         NaN
         ...   
495    0.012813
496    0.008416
497   -0.003164
498   -0.006308
499   -0.010771
Length: 400, dtype: float64

## There's a difference.

In [6]:
rst1.iloc[100:500] - rst2

100             NaN
101             NaN
102             NaN
103             NaN
104             NaN
           ...     
495    1.040834e-17
496    1.214306e-17
497    1.431147e-17
498    1.387779e-17
499    1.040834e-17
Length: 400, dtype: float64

In [7]:
(rst1.iloc[100:500] - rst2).abs().max()

5.551115123125783e-17

## Look at Variance

In [8]:
rst3 = df1.a.rolling(100).var()
rst4 = df2.a.rolling(100).var()
rst3.iloc[100:500] - rst4

100             NaN
101             NaN
102             NaN
103             NaN
104             NaN
           ...     
495    4.440892e-16
496    2.220446e-16
497    2.220446e-16
498    2.220446e-16
499    2.220446e-16
Name: a, Length: 400, dtype: float64

In [9]:
(rst3.iloc[100:500] - rst4).abs().max()

4.440892098500626e-16

## What if you want to obtain rolling beta=cov(x,y)/var(x)

* In below case, abs error is not so big, but it may cause serious problem for certain data.

In [10]:
rst5 = df1.a.rolling(100).cov(df1.b) / df1.a.rolling(100).var()
rst6 = df2.a.rolling(100).cov(df2.b) / df2.a.rolling(100).var()
rst5.iloc[100:500] - rst6

100             NaN
101             NaN
102             NaN
103             NaN
104             NaN
           ...     
495    3.469447e-18
496    9.540979e-18
497    1.301043e-17
498    1.301043e-17
499    1.040834e-17
Length: 400, dtype: float64

In [11]:
(rst5.iloc[100:500] - rst6).abs().max()

8.326672684688674e-17