Time-series forecasting to forecast store sales on data from Corporación Favorita, a large Ecuadorian-based grocery retailer

Build a model that more accurately predicts the unit sales for thousands of items sold at different Favorita stores. You'll practice your machine learning skills with an approachable training dataset of dates, store, and item information, promotions, and unit sales.

### 1) Import libraries and load data

In [1]:
import pandas as pd
import numpy as np
import datetime
import tqdm
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

import matplotlib.pyplot as plt
import seaborn as sns

import statsmodels
from statsmodels.tsa.stattools import grangercausalitytests
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller

In [2]:
# Load data
df_stores = pd.read_csv('./stores.csv')
df_holidays = pd.read_csv('./holidays_events.csv')
df_oil = pd.read_csv('./oil.csv')
df_transactions = pd.read_csv('./transactions.csv')
df_train = pd.read_csv('./train.csv')
df_test = pd.read_csv('./test.csv')

### 2) EDA

#### Store data

In [3]:
df_stores.dtypes

store_nbr     int64
city         object
state        object
type         object
cluster       int64
dtype: object

In [4]:
# Change datatype to category for ease of handling
df_stores['city'] = df_stores['city'].astype('category')
df_stores['state'] = df_stores['state'].astype('category')
df_stores['type'] = df_stores['type'].astype('category')
df_stores['cluster'] = df_stores['cluster'].astype('category')

df_stores.dtypes

store_nbr       int64
city         category
state        category
type         category
cluster      category
dtype: object

In [5]:
print(df_stores.shape, 'null:', df_stores.isnull().sum().sum())
df_stores.select_dtypes(include='category').describe()

(54, 5) null: 0


Unnamed: 0,city,state,type,cluster
count,54,54,54,54
unique,22,16,5,17
top,Quito,Pichincha,D,3
freq,18,19,18,7


In [6]:
df_stores.head()

Unnamed: 0,store_nbr,city,state,type,cluster
0,1,Quito,Pichincha,D,13
1,2,Quito,Pichincha,D,13
2,3,Quito,Pichincha,D,8
3,4,Quito,Pichincha,D,9
4,5,Santo Domingo,Santo Domingo de los Tsachilas,D,4


#### Holiday data

In [7]:
print(df_holidays.shape)
df_holidays.head()

(350, 6)


Unnamed: 0,date,type,locale,locale_name,description,transferred
0,2012-03-02,Holiday,Local,Manta,Fundacion de Manta,False
1,2012-04-01,Holiday,Regional,Cotopaxi,Provincializacion de Cotopaxi,False
2,2012-04-12,Holiday,Local,Cuenca,Fundacion de Cuenca,False
3,2012-04-14,Holiday,Local,Libertad,Cantonizacion de Libertad,False
4,2012-04-21,Holiday,Local,Riobamba,Cantonizacion de Riobamba,False


In [8]:
df_holidays['date'].dtypes

dtype('O')

In [9]:
df_holidays['date'] = pd.to_datetime(df_holidays['date'])
df_holidays_deduped = df_holidays.drop_duplicates(subset=['date'])
df_holidays_deduped = df_holidays_deduped[['date']]
df_holidays_deduped['is_holiday'] = 1
print(df_holidays_deduped.shape)
df_holidays_deduped.head()

(312, 2)


Unnamed: 0,date,is_holiday
0,2012-03-02,1
1,2012-04-01,1
2,2012-04-12,1
3,2012-04-14,1
4,2012-04-21,1


In [10]:
df_holidays_deduped.select_dtypes(include='datetime').describe()

Unnamed: 0,date
count,312
mean,2015-04-24 00:55:23.076923136
min,2012-03-02 00:00:00
25%,2013-12-24 18:00:00
50%,2015-05-11 00:00:00
75%,2016-07-08 00:00:00
max,2017-12-26 00:00:00


#### Oil Price data

In [11]:
df_oil.dtypes

date           object
dcoilwtico    float64
dtype: object

In [12]:
df_oil['date'] = pd.to_datetime(df_oil['date'])

In [13]:
df_oil.dtypes

date          datetime64[ns]
dcoilwtico           float64
dtype: object

In [14]:
print(df_oil.shape, 'null:', df_oil.isnull().sum().sum())
df_oil.describe()

(1218, 2) null: 43


Unnamed: 0,date,dcoilwtico
count,1218,1175.0
mean,2015-05-02 12:00:00,67.714366
min,2013-01-01 00:00:00,26.19
25%,2014-03-03 06:00:00,46.405
50%,2015-05-02 12:00:00,53.19
75%,2016-06-30 18:00:00,95.66
max,2017-08-31 00:00:00,110.62
std,,25.630476


In [15]:
full_date_range = pd.date_range(start=df_oil['date'].min(), end=df_oil['date'].max(), freq='D')
missing_dates = set(full_date_range) - set(df_oil['date'].unique())
missing_dates = list(missing_dates)
len(missing_dates)

486

In [16]:
# Append missing dates into data_model
missing_rows = pd.DataFrame({'date': missing_dates,
                             'dcoilwtico': np.nan})
missing_rows['date'] = missing_rows['date'].dt.date
df_oil['date'] = df_oil['date'].dt.date
df_oil_new = pd.concat([missing_rows, df_oil], ignore_index=True)
df_oil_new.sort_values(by='date', ascending=True, inplace=True)
print(df_oil_new.shape)

(1704, 2)


In [17]:
# Interpolate the 'dcoilwtico' column
df_oil_new['dcoilwtico'].interpolate()

# Fill any remaining NaN values with backward fill
df_oil_int = df_oil_new.bfill()

# Check for any remaining null values
df_oil_int.isnull().sum().sum()

np.int64(0)

#### Transactions data

In [18]:
df_transactions.dtypes

date            object
store_nbr        int64
transactions     int64
dtype: object

In [19]:
df_transactions['date'] = pd.to_datetime(df_transactions['date'])

In [20]:
df_transactions.dtypes

date            datetime64[ns]
store_nbr                int64
transactions             int64
dtype: object

In [21]:
print(df_transactions.shape, 'null:', df_transactions.isnull().sum().sum())
df_transactions.describe()

(83488, 3) null: 0


Unnamed: 0,date,store_nbr,transactions
count,83488,83488.0,83488.0
mean,2015-05-20 16:07:40.866232064,26.939237,1694.602158
min,2013-01-01 00:00:00,1.0,5.0
25%,2014-03-27 00:00:00,13.0,1046.0
50%,2015-06-08 00:00:00,27.0,1393.0
75%,2016-07-14 06:00:00,40.0,2079.0
max,2017-08-15 00:00:00,54.0,8359.0
std,,15.608204,963.286644


In [22]:
df_transactions

Unnamed: 0,date,store_nbr,transactions
0,2013-01-01,25,770
1,2013-01-02,1,2111
2,2013-01-02,2,2358
3,2013-01-02,3,3487
4,2013-01-02,4,1922
...,...,...,...
83483,2017-08-15,50,2804
83484,2017-08-15,51,1573
83485,2017-08-15,52,2255
83486,2017-08-15,53,932


#### df_train

In [23]:
df_train.dtypes

id               int64
date            object
store_nbr        int64
family          object
sales          float64
onpromotion      int64
dtype: object

In [24]:
df_train['date'] = pd.to_datetime(df_train['date'])
df_train['family'] = df_train['family'].astype('category')

In [25]:
df_train.dtypes

id                      int64
date           datetime64[ns]
store_nbr               int64
family               category
sales                 float64
onpromotion             int64
dtype: object

In [26]:
print(df_train.shape, 'null:', df_train.isnull().sum().sum())
df_train.describe()

(3000888, 6) null: 0


Unnamed: 0,id,date,store_nbr,sales,onpromotion
count,3000888.0,3000888,3000888.0,3000888.0,3000888.0
mean,1500444.0,2015-04-24 08:27:04.703088384,27.5,357.7757,2.60277
min,0.0,2013-01-01 00:00:00,1.0,0.0,0.0
25%,750221.8,2014-02-26 18:00:00,14.0,0.0,0.0
50%,1500444.0,2015-04-24 12:00:00,27.5,11.0,0.0
75%,2250665.0,2016-06-19 06:00:00,41.0,195.8473,0.0
max,3000887.0,2017-08-15 00:00:00,54.0,124717.0,741.0
std,866281.9,,15.58579,1101.998,12.21888


In [27]:
df_train.select_dtypes(include='category').describe()

Unnamed: 0,family
count,3000888
unique,33
top,AUTOMOTIVE
freq,90936


In [28]:
df_train.head()

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0
1,1,2013-01-01,1,BABY CARE,0.0,0
2,2,2013-01-01,1,BEAUTY,0.0,0
3,3,2013-01-01,1,BEVERAGES,0.0,0
4,4,2013-01-01,1,BOOKS,0.0,0


full_date_range = pd.date_range(start=df_train['date'].min(), end=df_train['date'].max(), freq='D')
missing_dates = set(full_date_range) - set(df_train['date'].unique())
missing_dates = list(missing_dates)
missing_dates

##### Append missing dates into data_model
missing_rows = pd.DataFrame({'date': missing_dates,
                             'store_nbr': np.nan, 
                             'family': np.nan,
                             'sales': np.nan,
                             'onpromotion': np.nan})
missing_rows['date'] = missing_rows['date'].dt.date
df_train['date'] = df_train['date'].dt.date
df_train_new = pd.concat([missing_rows, df_train[['date', 'store_nbr', 'family', 'sales', 'onpromotion']]], ignore_index=True)
df_train_new.sort_values(by='date', ascending=True, inplace=True)
print(df_train_new.shape)
df_train_new.head()

In [29]:
df_train_new = pd.merge(df_train, df_stores, how='left', on=['store_nbr'])
print(df_train_new.shape)
df_train_new.head()

(3000888, 10)


Unnamed: 0,id,date,store_nbr,family,sales,onpromotion,city,state,type,cluster
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0,Quito,Pichincha,D,13
1,1,2013-01-01,1,BABY CARE,0.0,0,Quito,Pichincha,D,13
2,2,2013-01-01,1,BEAUTY,0.0,0,Quito,Pichincha,D,13
3,3,2013-01-01,1,BEVERAGES,0.0,0,Quito,Pichincha,D,13
4,4,2013-01-01,1,BOOKS,0.0,0,Quito,Pichincha,D,13


In [30]:
df_train_new['date'] = pd.to_datetime(df_train_new['date'])
df_train_new = pd.merge(df_train_new, df_holidays_deduped, how='left', on=['date'])
df_train_new['is_holiday'] = df_train_new['is_holiday'].fillna(0)
print(df_train_new.shape)
df_train_new.head()

(3000888, 11)


Unnamed: 0,id,date,store_nbr,family,sales,onpromotion,city,state,type,cluster,is_holiday
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0,Quito,Pichincha,D,13,1.0
1,1,2013-01-01,1,BABY CARE,0.0,0,Quito,Pichincha,D,13,1.0
2,2,2013-01-01,1,BEAUTY,0.0,0,Quito,Pichincha,D,13,1.0
3,3,2013-01-01,1,BEVERAGES,0.0,0,Quito,Pichincha,D,13,1.0
4,4,2013-01-01,1,BOOKS,0.0,0,Quito,Pichincha,D,13,1.0


In [31]:
df_oil_int['date'] = pd.to_datetime(df_oil_int['date'])
df_train_new = pd.merge(df_train_new, df_oil_int, how='left', on=['date'])
print(df_train_new.shape)
df_train_new.head()

(3000888, 12)


Unnamed: 0,id,date,store_nbr,family,sales,onpromotion,city,state,type,cluster,is_holiday,dcoilwtico
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0,Quito,Pichincha,D,13,1.0,93.14
1,1,2013-01-01,1,BABY CARE,0.0,0,Quito,Pichincha,D,13,1.0,93.14
2,2,2013-01-01,1,BEAUTY,0.0,0,Quito,Pichincha,D,13,1.0,93.14
3,3,2013-01-01,1,BEVERAGES,0.0,0,Quito,Pichincha,D,13,1.0,93.14
4,4,2013-01-01,1,BOOKS,0.0,0,Quito,Pichincha,D,13,1.0,93.14


In [32]:
df_transactions['date'] = pd.to_datetime(df_transactions['date'])
df_train_new = pd.merge(df_train_new, df_transactions, how='left', on=['date', 'store_nbr'])
df_train_new['transactions'] = df_train_new['transactions'].fillna(0)
print(df_train_new.shape)
df_train_new.head()

(3000888, 13)


Unnamed: 0,id,date,store_nbr,family,sales,onpromotion,city,state,type,cluster,is_holiday,dcoilwtico,transactions
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0,Quito,Pichincha,D,13,1.0,93.14,0.0
1,1,2013-01-01,1,BABY CARE,0.0,0,Quito,Pichincha,D,13,1.0,93.14,0.0
2,2,2013-01-01,1,BEAUTY,0.0,0,Quito,Pichincha,D,13,1.0,93.14,0.0
3,3,2013-01-01,1,BEVERAGES,0.0,0,Quito,Pichincha,D,13,1.0,93.14,0.0
4,4,2013-01-01,1,BOOKS,0.0,0,Quito,Pichincha,D,13,1.0,93.14,0.0


In [33]:
df_train_new.isnull().sum().sum()

np.int64(0)

In [34]:
df_test.dtypes

id              int64
date           object
store_nbr       int64
family         object
onpromotion     int64
dtype: object

In [35]:
df_test['date'] = pd.to_datetime(df_test['date'])
df_test['family'] = df_test['family'].astype('category')

In [36]:
df_test.dtypes

id                      int64
date           datetime64[ns]
store_nbr               int64
family               category
onpromotion             int64
dtype: object

In [37]:
print(df_test.shape, 'null:', df_test.isnull().sum().sum())
df_test.describe()

(28512, 5) null: 0


Unnamed: 0,id,date,store_nbr,onpromotion
count,28512.0,28512,28512.0,28512.0
mean,3015144.0,2017-08-23 12:00:00,27.5,6.965383
min,3000888.0,2017-08-16 00:00:00,1.0,0.0
25%,3008016.0,2017-08-19 18:00:00,14.0,0.0
50%,3015144.0,2017-08-23 12:00:00,27.5,0.0
75%,3022271.0,2017-08-27 06:00:00,41.0,6.0
max,3029399.0,2017-08-31 00:00:00,54.0,646.0
std,8230.85,,15.586057,20.683952


In [38]:
df_test.select_dtypes(include='category').describe()

Unnamed: 0,family
count,28512
unique,33
top,AUTOMOTIVE
freq,864


In [39]:
df_test_new = pd.merge(df_test, df_stores, how='left', on=['store_nbr'])
print(df_test_new.shape)
df_test_new.head()

(28512, 9)


Unnamed: 0,id,date,store_nbr,family,onpromotion,city,state,type,cluster
0,3000888,2017-08-16,1,AUTOMOTIVE,0,Quito,Pichincha,D,13
1,3000889,2017-08-16,1,BABY CARE,0,Quito,Pichincha,D,13
2,3000890,2017-08-16,1,BEAUTY,2,Quito,Pichincha,D,13
3,3000891,2017-08-16,1,BEVERAGES,20,Quito,Pichincha,D,13
4,3000892,2017-08-16,1,BOOKS,0,Quito,Pichincha,D,13


In [40]:
df_test_new['date'] = pd.to_datetime(df_test_new['date'])
df_test_new = pd.merge(df_test_new, df_holidays_deduped, how='left', on=['date'])
df_test_new['is_holiday'] = df_test_new['is_holiday'].fillna(0)
print(df_test_new.shape)
df_test_new.head()

(28512, 10)


Unnamed: 0,id,date,store_nbr,family,onpromotion,city,state,type,cluster,is_holiday
0,3000888,2017-08-16,1,AUTOMOTIVE,0,Quito,Pichincha,D,13,0.0
1,3000889,2017-08-16,1,BABY CARE,0,Quito,Pichincha,D,13,0.0
2,3000890,2017-08-16,1,BEAUTY,2,Quito,Pichincha,D,13,0.0
3,3000891,2017-08-16,1,BEVERAGES,20,Quito,Pichincha,D,13,0.0
4,3000892,2017-08-16,1,BOOKS,0,Quito,Pichincha,D,13,0.0


In [41]:
df_oil_int['date'] = pd.to_datetime(df_oil_int['date'])
df_test_new = pd.merge(df_test_new, df_oil_int, how='left', on=['date'])
print(df_test_new.shape)
df_test_new.head()

(28512, 11)


Unnamed: 0,id,date,store_nbr,family,onpromotion,city,state,type,cluster,is_holiday,dcoilwtico
0,3000888,2017-08-16,1,AUTOMOTIVE,0,Quito,Pichincha,D,13,0.0,46.8
1,3000889,2017-08-16,1,BABY CARE,0,Quito,Pichincha,D,13,0.0,46.8
2,3000890,2017-08-16,1,BEAUTY,2,Quito,Pichincha,D,13,0.0,46.8
3,3000891,2017-08-16,1,BEVERAGES,20,Quito,Pichincha,D,13,0.0,46.8
4,3000892,2017-08-16,1,BOOKS,0,Quito,Pichincha,D,13,0.0,46.8


In [42]:
df_transactions['date'] = pd.to_datetime(df_transactions['date'])
df_test_new = pd.merge(df_test_new, df_transactions, how='left', on=['date', 'store_nbr'])
df_test_new['transactions'] = df_test_new['transactions'].fillna(0)
print(df_test_new.shape)
df_test_new.head()

(28512, 12)


Unnamed: 0,id,date,store_nbr,family,onpromotion,city,state,type,cluster,is_holiday,dcoilwtico,transactions
0,3000888,2017-08-16,1,AUTOMOTIVE,0,Quito,Pichincha,D,13,0.0,46.8,0.0
1,3000889,2017-08-16,1,BABY CARE,0,Quito,Pichincha,D,13,0.0,46.8,0.0
2,3000890,2017-08-16,1,BEAUTY,2,Quito,Pichincha,D,13,0.0,46.8,0.0
3,3000891,2017-08-16,1,BEVERAGES,20,Quito,Pichincha,D,13,0.0,46.8,0.0
4,3000892,2017-08-16,1,BOOKS,0,Quito,Pichincha,D,13,0.0,46.8,0.0


In [43]:
df_test_new.isnull().sum().sum()

np.int64(0)

**Observation:**

- Train data: 2013 - Mid Aug 2017
- Test data: Mid - Eng Aug 2017
- Holiday data: Mar 2012 - Dec 2017
- Oil data: 2013 - End Aug 2017
- Transaction data: 2013 - Mid Aug 2017

#### Granger causality test

In [44]:
df_train_new.columns

Index(['id', 'date', 'store_nbr', 'family', 'sales', 'onpromotion', 'city',
       'state', 'type', 'cluster', 'is_holiday', 'dcoilwtico', 'transactions'],
      dtype='object')

In [47]:
df_train_final = pd.get_dummies(df_train_new, columns=['family', 'city', 'state', 'type'], drop_first=True, dtype=float)
df_train_final.drop(columns=['id'], inplace=True)
df_train_final.set_index('date', inplace=True)
df_train_final

Unnamed: 0_level_0,store_nbr,sales,onpromotion,cluster,is_holiday,dcoilwtico,transactions,family_BABY CARE,family_BEAUTY,family_BEVERAGES,...,state_Manabi,state_Pastaza,state_Pichincha,state_Santa Elena,state_Santo Domingo de los Tsachilas,state_Tungurahua,type_B,type_C,type_D,type_E
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-01,1,0.000,0,13,1.0,93.14,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2013-01-01,1,0.000,0,13,1.0,93.14,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2013-01-01,1,0.000,0,13,1.0,93.14,0.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2013-01-01,1,0.000,0,13,1.0,93.14,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2013-01-01,1,0.000,0,13,1.0,93.14,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-08-15,9,438.133,0,6,1.0,47.57,2155.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2017-08-15,9,154.553,1,6,1.0,47.57,2155.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2017-08-15,9,2419.729,148,6,1.0,47.57,2155.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2017-08-15,9,121.000,8,6,1.0,47.57,2155.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [44]:
max_lag = 5
variables = df_train_final.columns.tolist()

causality_results = {}

for var1 in tqdm(variables):
    
    for var2 in variables:
        
        if var1 != var2:
            print(f'Testing if {var2} Granger-causes {var1}')
            result = grangercausalitytests(df_train_final[[var1, var2]], max_lag, verbose=False)
            causality_results[(var1, var2)] = result

NameError: name 'df_train_final' is not defined

#### VAR

In [47]:
def adf_test(series):
    result = adfuller(series)
    adf_statistic = result[0]
    p_value = result[1]
    return adf_statistic, p_value

In [None]:
results = []

for column in tqdm(df_train_final.columns):
    # Check if the column is numeric
    if pd.api.types.is_numeric_dtype(df_train_final[column]):
        adf_statistic, p_value = adf_test(df_train_final[column])
        results.append((column, adf_statistic, p_value))

results_df = pd.DataFrame(results, columns=['Column', 'ADF Statistic', 'p-value'])
print(results_df)

In [51]:
results = []

for column in tqdm(df_train_final.columns):
    # Check if the column is numeric
    if pd.api.types.is_numeric_dtype(df_train_final[column]):
        adf_statistic, p_value = adf_test(df_train_final[column])
        results.append((column, adf_statistic, p_value))

results_df = pd.DataFrame(results, columns=['Column', 'ADF Statistic', 'p-value'])
print(results_df)

TypeError: 'module' object is not callable

In [49]:
for column in tqdm(df_train_final.columns):
    adf_test(df_train_final[column])

TypeError: 'module' object is not callable

In [None]:
df_diff = df_train_final.diff().dropna()

In [None]:
model = VAR(df_diff)

In [None]:
lag_order = model.select_order(maxlags=10)
print(lag_order.summary())

In [None]:
opt_lag = lag_order.aic
print(f'Optimal lag order based on AIC: {opt_lag}')

In [None]:
var_model_fitted = model.fit(opt_lag)

In [None]:
forecast_input = df_diff.values[-opt_lag:]
forecast = var_model_fitted.forecast(y=forecast_input, steps=10)

In [None]:
df_forecast = pd.DataFrame(forecast, index=pd.date_range(start=df_train_final.index[-1], periods=10, freq='D'), columns=df_train_final.columns)
df_forecast

In [None]:
# Optional: Invert differencing to get forecast in original scale
df_forecast = forecast_df.cumsum() + df.iloc[-1]

# Plot the forecasted values
df_forecast.plot()
plt.show()