# Lag functions

> Module for producing lagged versions of pandas DataFrames

In [None]:
#| default_exp lag

In [None]:
#| export
#| hide
import pandas as pd
import numpy as np
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pingouin as pg

In [None]:
#| hide
from nbdev.showdoc import *

Let's start out by defining a short multivariate time-series:

In [None]:
X = pd.DataFrame([[2,3,4],[5,6,7],[8,8,6],[9,10,3],[11,4,6]]); X

Unnamed: 0,0,1,2
0,2,3,4
1,5,6,7
2,8,8,6
3,9,10,3
4,11,4,6


In [None]:
#| export
def lag(X: pd.DataFrame, #A pandas dataframe with observations as rows and variables as columns
        lags: list, #A list with the same number of lists as the number of columns of X containing the desired lagged versions desired for the variable in that column
       )->pd.DataFrame: # A dataframe with the specified lagged variables of X

    "Return a dataframe with specific lags"
    
    res = pd.DataFrame()
    
    for x in X:
        Xlag = pd.DataFrame()
        Xlag_name = []
        Xlag_og_name = []
        for i in lags[X.columns.get_loc(x)]:
            X_shift = pd.DataFrame(X[x].shift(i))
            Xlag = pd.concat([Xlag, X_shift], axis = 1)
            
            if i == 0:
                Xlag_name.append('t')
            else:
                Xlag_name.append('t-'+f'{i}')
            Xlag_og_name.append(f'{x}')
            Xlag.columns = [Xlag_og_name,Xlag_name]
    
        res = pd.concat([res,Xlag], axis = 1)
    res = res.dropna()

    return(res)

In [None]:
lags = [[0,1],[0,2],[0,3]]

In [None]:
lag(X,lags)

Unnamed: 0_level_0,0,0,1,1,2,2
Unnamed: 0_level_1,t,t-1,t,t-2,t,t-3
3,9,8.0,10,6.0,3,4.0
4,11,9.0,4,8.0,6,7.0


In [None]:
#| export
def lag_uniform(X: pd.DataFrame, #A pandas dataframe with observations as rows and variables as columns
        lag: int, #Integer of the maximum lag to include for all variables of X
       )->pd.DataFrame: # A dataframe with the lagged variables of X
    
    # Returns a uniformly lagged version of X

    n = np.shape(X)[0]
    p = np.shape(X)[1]
    l = lag

    res = pd.DataFrame()

    X_val = X.values

    Xlag_name = []
    Xlag_og_name = []


    for i in range(l+1):
        Xlag = pd.DataFrame(X_val[l-i:n-i])
        res = pd.concat([res,Xlag], axis = 1)
    
        if i == 0: 
            Xlag_name += ['t' for x in range(p)]
        else:
            Xlag_name += ['t-'+f'{i}' for x in range(p)]
    
        Xlag_og_name += X.columns.tolist()
    
    res.columns = [Xlag_og_name,Xlag_name]
    res.columns.names = ['Variable','Time']
    
    return res

In [None]:
lag_uniform(X,2)

Variable,0,1,2,0,1,2,0,1,2
Time,t,t,t,t-1,t-1,t-1,t-2,t-2,t-2
0,8,8,6,5,6,7,2,3,4
1,9,10,3,8,8,6,5,6,7
2,11,4,6,9,10,3,8,8,6


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()