In [1]:
import pandas as pd

# Shifting
Shifting is a very useful functionality we could use to create training datasets for time series applications.
In auto regression we use shifting to create lagged features.

In [2]:
df = pd.DataFrame({'sales': [18, 10, 14, 13, 19, 24, 25, 29]})
print(df)

   sales
0     18
1     10
2     14
3     13
4     19
5     24
6     25
7     29


In [3]:
df.shift(1)

Unnamed: 0,sales
0,
1,18.0
2,10.0
3,14.0
4,13.0
5,19.0
6,24.0
7,25.0


In [4]:
# concat shifted dataframes with the original dataframe.
df_lags = pd.concat([df, df.shift(1), df.shift(2)], axis=1)
df_lags

Unnamed: 0,sales,sales.1,sales.2
0,18,,
1,10,18.0,
2,14,10.0,18.0
3,13,14.0,10.0
4,19,13.0,14.0
5,24,19.0,13.0
6,25,24.0,19.0
7,29,25.0,24.0


In [5]:
# we would also need to remove null values
df_lags.dropna(inplace=True)
df_lags

Unnamed: 0,sales,sales.1,sales.2
2,14,10.0,18.0
3,13,14.0,10.0
4,19,13.0,14.0
5,24,19.0,13.0
6,25,24.0,19.0
7,29,25.0,24.0


## Shifting a group
We can apply shifting to each group separately. For example, we want to group the following df by store and for each store find the previous day sales.

In [6]:
df = pd.DataFrame({'store': ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'B'],
                   'sales': [18, 10, 14, 13, 19, 24, 25, 29]})
print(df)

  store  sales
0     A     18
1     A     10
2     A     14
3     A     13
4     B     19
5     B     24
6     B     25
7     B     29


In [7]:
#add column that displays lag of sales column by store
df['lagged_sales'] = df.groupby(['store'])['sales'].shift(1)

#view updated DataFrame
print(df)

  store  sales  lagged_sales
0     A     18           NaN
1     A     10          18.0
2     A     14          10.0
3     A     13          14.0
4     B     19           NaN
5     B     24          19.0
6     B     25          24.0
7     B     29          25.0
