In [71]:
import pandas as pd
import numpy as np
from itertools import product

# Resampling

### Data Prep

In [105]:
dates = pd.date_range(start='2021-01-01', end='2021-01-14', freq='D')
categories1 = ['A', 'B']
data = list(product(categories1, dates))

df = pd.DataFrame(data, columns=['category1', 'date'])

target = np.random.randint(low=0, high=10, size=len(df))
df['target'] = target
df

Unnamed: 0,category1,date,target
0,A,2021-01-01,0
1,A,2021-01-02,8
2,A,2021-01-03,2
3,A,2021-01-04,6
4,A,2021-01-05,4
5,A,2021-01-06,0
6,A,2021-01-07,2
7,A,2021-01-08,7
8,A,2021-01-09,4
9,A,2021-01-10,6


### Demo

In [106]:
df.groupby(['category1']).resample('W', on='date', closed='left').sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,target
category1,date,Unnamed: 2_level_1
A,2021-01-03,8
A,2021-01-10,25
A,2021-01-17,32
B,2021-01-03,9
B,2021-01-10,20
B,2021-01-17,27


In [107]:
df.groupby(['category1']).resample('W', on='date', closed='right').sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,target
category1,date,Unnamed: 2_level_1
A,2021-01-03,10
A,2021-01-10,29
A,2021-01-17,26
B,2021-01-03,10
B,2021-01-10,19
B,2021-01-17,27


# Within Group Lagging (Shifting)

### Data Prep

In [112]:
dates = pd.date_range(start='2021-01-01', end='2021-01-04', freq='D')
categories1 = ['A', 'B']
categories2 = ['CC', 'DD']
data = list(product(categories1, categories2, dates))

In [113]:
df = pd.DataFrame(data, columns=['category1', 'category2', 'date'])
target = np.random.randint(low=0, high=10, size=len(df))
df['target'] = target
df

Unnamed: 0,category1,category2,date,target
0,A,CC,2021-01-01,8
1,A,CC,2021-01-02,9
2,A,CC,2021-01-03,4
3,A,CC,2021-01-04,2
4,A,DD,2021-01-01,2
5,A,DD,2021-01-02,7
6,A,DD,2021-01-03,9
7,A,DD,2021-01-04,8
8,B,CC,2021-01-01,8
9,B,CC,2021-01-02,0


### Demo

In [114]:
df['shifted_c1'] = df.groupby(['category1'])['target'].shift(1, fill_value=None)
df

Unnamed: 0,category1,category2,date,target,shifted_c1
0,A,CC,2021-01-01,8,
1,A,CC,2021-01-02,9,8.0
2,A,CC,2021-01-03,4,9.0
3,A,CC,2021-01-04,2,4.0
4,A,DD,2021-01-01,2,2.0
5,A,DD,2021-01-02,7,2.0
6,A,DD,2021-01-03,9,7.0
7,A,DD,2021-01-04,8,9.0
8,B,CC,2021-01-01,8,
9,B,CC,2021-01-02,0,8.0


In [115]:
df['shifted_c1_c2'] = df.groupby(['category1', 'category2'])['target'].shift(1, fill_value=None)
df

Unnamed: 0,category1,category2,date,target,shifted_c1,shifted_c1_c2
0,A,CC,2021-01-01,8,,
1,A,CC,2021-01-02,9,8.0,8.0
2,A,CC,2021-01-03,4,9.0,9.0
3,A,CC,2021-01-04,2,4.0,4.0
4,A,DD,2021-01-01,2,2.0,
5,A,DD,2021-01-02,7,2.0,2.0
6,A,DD,2021-01-03,9,7.0,7.0
7,A,DD,2021-01-04,8,9.0,9.0
8,B,CC,2021-01-01,8,,
9,B,CC,2021-01-02,0,8.0,8.0


# Within Group Rolling

### Data Prep

In [117]:
dates = pd.date_range(start='2021-01-01', end='2021-01-04', freq='D')
categories1 = ['A', 'B']
categories2 = ['CC', 'DD']
data = list(product(categories1, categories2, dates))

In [118]:
df = pd.DataFrame(data, columns=['category1', 'category2', 'date'])
target = np.random.randint(low=0, high=10, size=len(df))
df['target'] = target
df

Unnamed: 0,category1,category2,date,target
0,A,CC,2021-01-01,9
1,A,CC,2021-01-02,9
2,A,CC,2021-01-03,2
3,A,CC,2021-01-04,1
4,A,DD,2021-01-01,0
5,A,DD,2021-01-02,9
6,A,DD,2021-01-03,4
7,A,DD,2021-01-04,7
8,B,CC,2021-01-01,4
9,B,CC,2021-01-02,9


### Demo

In [122]:
df.groupby(['category1']).rolling(2)['target'].mean()#.reset_index(drop=True)

category1    
A          0     NaN
           1     9.0
           2     5.5
           3     1.5
           4     0.5
           5     4.5
           6     6.5
           7     5.5
B          8     NaN
           9     6.5
           10    6.5
           11    3.0
           12    4.0
           13    7.0
           14    6.5
           15    3.0
Name: target, dtype: float64

In [123]:
df.groupby(['category1', 'category2']).rolling(2)['target'].mean()#.reset_index()

category1  category2    
A          CC         0     NaN
                      1     9.0
                      2     5.5
                      3     1.5
           DD         4     NaN
                      5     4.5
                      6     6.5
                      7     5.5
B          CC         8     NaN
                      9     6.5
                      10    6.5
                      11    3.0
           DD         12    NaN
                      13    7.0
                      14    6.5
                      15    3.0
Name: target, dtype: float64