In [1]:
pip install prophet

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from prophet import Prophet
import statsmodels.api as sm
import numpy as np

In [3]:
# Load your sales and price data
sales_df = pd.read_csv('sales_train_validation.csv')
price_df = pd.read_csv('sell_prices.csv')

In [4]:
sales_df

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
0,HOBBIES_1_001_CA_1_validation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,3,0,1,1,1,3,0,1,1
1,HOBBIES_1_002_CA_1_validation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,HOBBIES_1_003_CA_1_validation,HOBBIES_1_003,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,2,1,2,1,1,1,0,1,1,1
3,HOBBIES_1_004_CA_1_validation,HOBBIES_1_004,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,0,5,4,1,0,1,3,7,2
4,HOBBIES_1_005_CA_1_validation,HOBBIES_1_005,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,2,1,1,0,1,1,2,2,2,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30485,FOODS_3_823_WI_3_validation,FOODS_3_823,FOODS_3,FOODS,WI_3,WI,0,0,2,2,...,2,0,0,0,0,0,1,0,0,1
30486,FOODS_3_824_WI_3_validation,FOODS_3_824,FOODS_3,FOODS,WI_3,WI,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
30487,FOODS_3_825_WI_3_validation,FOODS_3_825,FOODS_3,FOODS,WI_3,WI,0,6,0,2,...,2,1,0,2,0,1,0,0,1,0
30488,FOODS_3_826_WI_3_validation,FOODS_3_826,FOODS_3,FOODS,WI_3,WI,0,0,0,0,...,0,0,1,0,0,1,0,3,1,3


In [5]:
price_df

Unnamed: 0,store_id,item_id,wm_yr_wk,sell_price
0,CA_1,HOBBIES_1_001,11325,9.58
1,CA_1,HOBBIES_1_001,11326,9.58
2,CA_1,HOBBIES_1_001,11327,8.26
3,CA_1,HOBBIES_1_001,11328,8.26
4,CA_1,HOBBIES_1_001,11329,8.26
...,...,...,...,...
6841116,WI_3,FOODS_3_827,11617,1.00
6841117,WI_3,FOODS_3_827,11618,1.00
6841118,WI_3,FOODS_3_827,11619,1.00
6841119,WI_3,FOODS_3_827,11620,1.00


In [6]:
# Step 1: Melt the sales dataframe to convert daily sales columns into rows
sales_melted = sales_df.melt(id_vars=['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id'], 
                             var_name='d', value_name='sales')
sales_melted

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d,sales
0,HOBBIES_1_001_CA_1_validation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0
1,HOBBIES_1_002_CA_1_validation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0
2,HOBBIES_1_003_CA_1_validation,HOBBIES_1_003,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0
3,HOBBIES_1_004_CA_1_validation,HOBBIES_1_004,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0
4,HOBBIES_1_005_CA_1_validation,HOBBIES_1_005,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0
...,...,...,...,...,...,...,...,...
58327365,FOODS_3_823_WI_3_validation,FOODS_3_823,FOODS_3,FOODS,WI_3,WI,d_1913,1
58327366,FOODS_3_824_WI_3_validation,FOODS_3_824,FOODS_3,FOODS,WI_3,WI,d_1913,0
58327367,FOODS_3_825_WI_3_validation,FOODS_3_825,FOODS_3,FOODS,WI_3,WI,d_1913,0
58327368,FOODS_3_826_WI_3_validation,FOODS_3_826,FOODS_3,FOODS,WI_3,WI,d_1913,3


In [7]:
# Step 2: Create a mapping for day d_X to actual calendar date
calendar_df = pd.read_csv('calendar.csv')
calendar_df

Unnamed: 0,date,wm_yr_wk,weekday,wday,month,year,d,event_name_1,event_type_1,event_name_2,event_type_2,snap_CA,snap_TX,snap_WI
0,2011-01-29,11101,Saturday,1,1,2011,d_1,,,,,0,0,0
1,2011-01-30,11101,Sunday,2,1,2011,d_2,,,,,0,0,0
2,2011-01-31,11101,Monday,3,1,2011,d_3,,,,,0,0,0
3,2011-02-01,11101,Tuesday,4,2,2011,d_4,,,,,1,1,0
4,2011-02-02,11101,Wednesday,5,2,2011,d_5,,,,,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1964,2016-06-15,11620,Wednesday,5,6,2016,d_1965,,,,,0,1,1
1965,2016-06-16,11620,Thursday,6,6,2016,d_1966,,,,,0,0,0
1966,2016-06-17,11620,Friday,7,6,2016,d_1967,,,,,0,0,0
1967,2016-06-18,11621,Saturday,1,6,2016,d_1968,,,,,0,0,0


In [8]:
# Merge the sales data with the calendar data
sales_with_calendar = pd.merge(sales_melted, calendar_df[['d', 'wm_yr_wk', 'date']], on='d', how='left')
sales_with_calendar

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d,sales,wm_yr_wk,date
0,HOBBIES_1_001_CA_1_validation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0,11101,2011-01-29
1,HOBBIES_1_002_CA_1_validation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0,11101,2011-01-29
2,HOBBIES_1_003_CA_1_validation,HOBBIES_1_003,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0,11101,2011-01-29
3,HOBBIES_1_004_CA_1_validation,HOBBIES_1_004,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0,11101,2011-01-29
4,HOBBIES_1_005_CA_1_validation,HOBBIES_1_005,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0,11101,2011-01-29
...,...,...,...,...,...,...,...,...,...,...
58327365,FOODS_3_823_WI_3_validation,FOODS_3_823,FOODS_3,FOODS,WI_3,WI,d_1913,1,11613,2016-04-24
58327366,FOODS_3_824_WI_3_validation,FOODS_3_824,FOODS_3,FOODS,WI_3,WI,d_1913,0,11613,2016-04-24
58327367,FOODS_3_825_WI_3_validation,FOODS_3_825,FOODS_3,FOODS,WI_3,WI,d_1913,0,11613,2016-04-24
58327368,FOODS_3_826_WI_3_validation,FOODS_3_826,FOODS_3,FOODS,WI_3,WI,d_1913,3,11613,2016-04-24


In [9]:
# Step 3: Merge sales data with price data
merged_df = pd.merge(sales_with_calendar, price_df, on=['store_id', 'item_id', 'wm_yr_wk'], how='left')
merged_df

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d,sales,wm_yr_wk,date,sell_price
0,HOBBIES_1_001_CA_1_validation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0,11101,2011-01-29,
1,HOBBIES_1_002_CA_1_validation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0,11101,2011-01-29,
2,HOBBIES_1_003_CA_1_validation,HOBBIES_1_003,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0,11101,2011-01-29,
3,HOBBIES_1_004_CA_1_validation,HOBBIES_1_004,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0,11101,2011-01-29,
4,HOBBIES_1_005_CA_1_validation,HOBBIES_1_005,HOBBIES_1,HOBBIES,CA_1,CA,d_1,0,11101,2011-01-29,
...,...,...,...,...,...,...,...,...,...,...,...
58327365,FOODS_3_823_WI_3_validation,FOODS_3_823,FOODS_3,FOODS,WI_3,WI,d_1913,1,11613,2016-04-24,2.98
58327366,FOODS_3_824_WI_3_validation,FOODS_3_824,FOODS_3,FOODS,WI_3,WI,d_1913,0,11613,2016-04-24,2.48
58327367,FOODS_3_825_WI_3_validation,FOODS_3_825,FOODS_3,FOODS,WI_3,WI,d_1913,0,11613,2016-04-24,3.98
58327368,FOODS_3_826_WI_3_validation,FOODS_3_826,FOODS_3,FOODS,WI_3,WI,d_1913,3,11613,2016-04-24,1.28


In [10]:
# Filter the relevant time series for a single store-item combination
store_id = 'WI_3'  # example store
item_id = 'FOODS_3_820'  # example item

In [11]:
df_filtered = merged_df[(merged_df['store_id'] == store_id) & (merged_df['item_id'] == item_id)].copy()
df_filtered

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d,sales,wm_yr_wk,date,sell_price
30482,FOODS_3_820_WI_3_validation,FOODS_3_820,FOODS_3,FOODS,WI_3,WI,d_1,1,11101,2011-01-29,1.57
60972,FOODS_3_820_WI_3_validation,FOODS_3_820,FOODS_3,FOODS,WI_3,WI,d_2,1,11101,2011-01-30,1.57
91462,FOODS_3_820_WI_3_validation,FOODS_3_820,FOODS_3,FOODS,WI_3,WI,d_3,1,11101,2011-01-31,1.57
121952,FOODS_3_820_WI_3_validation,FOODS_3_820,FOODS_3,FOODS,WI_3,WI,d_4,1,11101,2011-02-01,1.57
152442,FOODS_3_820_WI_3_validation,FOODS_3_820,FOODS_3,FOODS,WI_3,WI,d_5,1,11101,2011-02-02,1.57
...,...,...,...,...,...,...,...,...,...,...,...
58205402,FOODS_3_820_WI_3_validation,FOODS_3_820,FOODS_3,FOODS,WI_3,WI,d_1909,1,11612,2016-04-20,1.98
58235892,FOODS_3_820_WI_3_validation,FOODS_3_820,FOODS_3,FOODS,WI_3,WI,d_1910,3,11612,2016-04-21,1.98
58266382,FOODS_3_820_WI_3_validation,FOODS_3_820,FOODS_3,FOODS,WI_3,WI,d_1911,6,11612,2016-04-22,1.98
58296872,FOODS_3_820_WI_3_validation,FOODS_3_820,FOODS_3,FOODS,WI_3,WI,d_1912,0,11613,2016-04-23,1.98


In [12]:
# Prepare the dataframe for linear regression
df_filtered = df_filtered[['date', 'sales', 'sell_price']]
df_filtered

Unnamed: 0,date,sales,sell_price
30482,2011-01-29,1,1.57
60972,2011-01-30,1,1.57
91462,2011-01-31,1,1.57
121952,2011-02-01,1,1.57
152442,2011-02-02,1,1.57
...,...,...,...
58205402,2016-04-20,1,1.98
58235892,2016-04-21,3,1.98
58266382,2016-04-22,6,1.98
58296872,2016-04-23,0,1.98


In [13]:
df_filtered.rename(columns={'date': 'ds', 'sales': 'y', 'sell_price': 'price'}, inplace=True)
df_filtered['price'] = df_filtered['price'].fillna(df_filtered['price'].mean())
df_filtered

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered.rename(columns={'date': 'ds', 'sales': 'y', 'sell_price': 'price'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['price'] = df_filtered['price'].fillna(df_filtered['price'].mean())


Unnamed: 0,ds,y,price
30482,2011-01-29,1,1.57
60972,2011-01-30,1,1.57
91462,2011-01-31,1,1.57
121952,2011-02-01,1,1.57
152442,2011-02-02,1,1.57
...,...,...,...
58205402,2016-04-20,1,1.98
58235892,2016-04-21,3,1.98
58266382,2016-04-22,6,1.98
58296872,2016-04-23,0,1.98


In [14]:
# Step 1: Fit the linear model on the time series using price as an external regressor
X = sm.add_constant(df_filtered['price'])  # add constant for OLS
X

Unnamed: 0,const,price
30482,1.0,1.57
60972,1.0,1.57
91462,1.0,1.57
121952,1.0,1.57
152442,1.0,1.57
...,...,...
58205402,1.0,1.98
58235892,1.0,1.98
58266382,1.0,1.98
58296872,1.0,1.98


In [15]:
linear_model = sm.OLS(df_filtered['y'], X).fit()
linear_model

<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x14dc97770>

In [16]:
# Step 2: Predict the sales using the linear model
df_filtered['linear_forecast'] = linear_model.predict(X)
df_filtered

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['linear_forecast'] = linear_model.predict(X)


Unnamed: 0,ds,y,price,linear_forecast
30482,2011-01-29,1,1.57,1.444444
60972,2011-01-30,1,1.57,1.444444
91462,2011-01-31,1,1.57,1.444444
121952,2011-02-01,1,1.57,1.444444
152442,2011-02-02,1,1.57,1.444444
...,...,...,...,...
58205402,2016-04-20,1,1.98,1.549189
58235892,2016-04-21,3,1.98,1.549189
58266382,2016-04-22,6,1.98,1.549189
58296872,2016-04-23,0,1.98,1.549189


In [17]:
# Step 3: Calculate the residuals (y - linear model forecast)
df_filtered['residuals'] = df_filtered['y'] - df_filtered['linear_forecast']
df_filtered

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['residuals'] = df_filtered['y'] - df_filtered['linear_forecast']


Unnamed: 0,ds,y,price,linear_forecast,residuals
30482,2011-01-29,1,1.57,1.444444,-0.444444
60972,2011-01-30,1,1.57,1.444444,-0.444444
91462,2011-01-31,1,1.57,1.444444,-0.444444
121952,2011-02-01,1,1.57,1.444444,-0.444444
152442,2011-02-02,1,1.57,1.444444,-0.444444
...,...,...,...,...,...
58205402,2016-04-20,1,1.98,1.549189,-0.549189
58235892,2016-04-21,3,1.98,1.549189,1.450811
58266382,2016-04-22,6,1.98,1.549189,4.450811
58296872,2016-04-23,0,1.98,1.549189,-1.549189


In [18]:
# Step 4: Fit the Prophet model on the residuals
prophet_data = df_filtered[['ds', 'residuals']].copy()
prophet_data.rename(columns={'residuals': 'y'}, inplace=True)
prophet_data

Unnamed: 0,ds,y
30482,2011-01-29,-0.444444
60972,2011-01-30,-0.444444
91462,2011-01-31,-0.444444
121952,2011-02-01,-0.444444
152442,2011-02-02,-0.444444
...,...,...
58205402,2016-04-20,-0.549189
58235892,2016-04-21,1.450811
58266382,2016-04-22,4.450811
58296872,2016-04-23,-1.549189


In [19]:
prophet_model = Prophet()
prophet_model.fit(prophet_data)
prophet_model

22:13:31 - cmdstanpy - INFO - Chain [1] start processing
22:13:31 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x14dcda0f0>

In [20]:
# Step 5: Make a future dataframe for Prophet to predict the residuals for the next 28 days
future = prophet_model.make_future_dataframe(periods=28)
future

Unnamed: 0,ds
0,2011-01-29
1,2011-01-30
2,2011-01-31
3,2011-02-01
4,2011-02-02
...,...
1936,2016-05-18
1937,2016-05-19
1938,2016-05-20
1939,2016-05-21


In [21]:
prophet_forecast = prophet_model.predict(future)
prophet_forecast

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2011-01-29,0.198903,-1.375109,2.306762,0.198903,0.198903,0.200425,0.200425,0.200425,0.381048,0.381048,0.381048,-0.180623,-0.180623,-0.180623,0.0,0.0,0.0,0.399328
1,2011-01-30,0.198482,-1.765434,1.913330,0.198482,0.198482,-0.091857,-0.091857,-0.091857,0.096212,0.096212,0.096212,-0.188069,-0.188069,-0.188069,0.0,0.0,0.0,0.106626
2,2011-01-31,0.198061,-1.910278,1.651364,0.198061,0.198061,-0.349500,-0.349500,-0.349500,-0.156549,-0.156549,-0.156549,-0.192951,-0.192951,-0.192951,0.0,0.0,0.0,-0.151439
3,2011-02-01,0.197640,-2.119627,1.525174,0.197640,0.197640,-0.410466,-0.410466,-0.410466,-0.215387,-0.215387,-0.215387,-0.195079,-0.195079,-0.195079,0.0,0.0,0.0,-0.212826
4,2011-02-02,0.197219,-1.946757,1.702690,0.197219,0.197219,-0.289032,-0.289032,-0.289032,-0.094727,-0.094727,-0.094727,-0.194305,-0.194305,-0.194305,0.0,0.0,0.0,-0.091813
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1936,2016-05-18,0.227655,-1.763539,1.860406,0.227588,0.227784,-0.209282,-0.209282,-0.209282,-0.094727,-0.094727,-0.094727,-0.114555,-0.114555,-0.114555,0.0,0.0,0.0,0.018374
1937,2016-05-19,0.228134,-1.993066,1.784874,0.228049,0.228287,-0.229119,-0.229119,-0.229119,-0.098604,-0.098604,-0.098604,-0.130515,-0.130515,-0.130515,0.0,0.0,0.0,-0.000985
1938,2016-05-20,0.228612,-1.811416,1.940973,0.228519,0.228782,-0.058779,-0.058779,-0.058779,0.088008,0.088008,0.088008,-0.146787,-0.146787,-0.146787,0.0,0.0,0.0,0.169833
1939,2016-05-21,0.229091,-1.438616,2.334553,0.228979,0.229282,0.217878,0.217878,0.217878,0.381048,0.381048,0.381048,-0.163170,-0.163170,-0.163170,0.0,0.0,0.0,0.446969


In [22]:
# Step 6: Combine the linear model forecast and Prophet's residual forecast
df_filtered['final_forecast'] = df_filtered['linear_forecast'] + prophet_forecast['yhat'][:len(df_filtered)].values
df_filtered

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['final_forecast'] = df_filtered['linear_forecast'] + prophet_forecast['yhat'][:len(df_filtered)].values


Unnamed: 0,ds,y,price,linear_forecast,residuals,final_forecast
30482,2011-01-29,1,1.57,1.444444,-0.444444,1.843773
60972,2011-01-30,1,1.57,1.444444,-0.444444,1.551070
91462,2011-01-31,1,1.57,1.444444,-0.444444,1.293006
121952,2011-02-01,1,1.57,1.444444,-0.444444,1.231618
152442,2011-02-02,1,1.57,1.444444,-0.444444,1.352631
...,...,...,...,...,...,...
58205402,2016-04-20,1,1.98,1.549189,-0.549189,1.960949
58235892,2016-04-21,3,1.98,1.549189,1.450811,1.933585
58266382,2016-04-22,6,1.98,1.549189,4.450811,2.096734
58296872,2016-04-23,0,1.98,1.549189,-1.549189,2.366710


In [23]:
# Forecast future sales by combining the linear model forecast and Prophet residual forecast
# Forecast the future prices for the next 28 days
future_prices = merged_df[(merged_df['store_id'] == store_id) & (merged_df['item_id'] == item_id)].iloc[-28:]['sell_price'].values
future_prices

array([1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98,
       1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98,
       1.98, 1.98, 1.98, 1.98, 1.98, 1.98])

In [24]:
# Handle NaNs in future_prices
future_prices = np.nan_to_num(future_prices, nan=df_filtered['price'].mean())
future_prices

array([1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98,
       1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98,
       1.98, 1.98, 1.98, 1.98, 1.98, 1.98])

In [25]:
# Predict future sales using the linear model
future_X = sm.add_constant(future_prices, has_constant='add')
future_linear_forecast = linear_model.predict(future_X)
future_linear_forecast

array([1.54918919, 1.54918919, 1.54918919, 1.54918919, 1.54918919,
       1.54918919, 1.54918919, 1.54918919, 1.54918919, 1.54918919,
       1.54918919, 1.54918919, 1.54918919, 1.54918919, 1.54918919,
       1.54918919, 1.54918919, 1.54918919, 1.54918919, 1.54918919,
       1.54918919, 1.54918919, 1.54918919, 1.54918919, 1.54918919,
       1.54918919, 1.54918919, 1.54918919])

In [26]:
# Predict future residuals using Prophet
future_residuals = prophet_forecast['yhat'][-28:].values
future_residuals

array([ 0.23624967,  0.15729442,  0.25921252,  0.23806355,  0.40890842,
        0.68766257,  0.38994555,  0.1255828 ,  0.05625887,  0.16735137,
        0.15460462,  0.33281098,  0.61767085,  0.32464714,  0.06347008,
       -0.00420866,  0.10701697,  0.092978  ,  0.26860699,  0.54979149,
        0.25222074, -0.01411868, -0.08729848,  0.01837386, -0.00098476,
        0.16983305,  0.44696891,  0.14633633])

In [27]:
# Combine linear model and Prophet residuals for future forecast
final_forecast = future_linear_forecast + future_residuals
final_forecast

array([1.78543886, 1.70648361, 1.80840171, 1.78725274, 1.95809761,
       2.23685175, 1.93913474, 1.67477199, 1.60544806, 1.71654056,
       1.70379381, 1.88200017, 2.16686004, 1.87383633, 1.61265927,
       1.54498053, 1.65620616, 1.64216719, 1.81779618, 2.09898068,
       1.80140993, 1.53507051, 1.46189071, 1.56756305, 1.54820443,
       1.71902224, 1.9961581 , 1.69552552])

In [28]:
# Check for NaNs in the final forecast
if np.any(np.isnan(final_forecast)):
    print("NaN values detected in the final forecast")

In [29]:
# Step 7: Evaluate the performance using RMSE on historical data
def rmse(y_true, y_pred):
    return np.sqrt(np.mean((y_true - y_pred) ** 2))

In [30]:
# Example of evaluation on historical data
train_rmse = rmse(df_filtered['y'], df_filtered['final_forecast'])
print(f'Training RMSE: {train_rmse}')
final_forecast

Training RMSE: 1.4560559073125399


array([1.78543886, 1.70648361, 1.80840171, 1.78725274, 1.95809761,
       2.23685175, 1.93913474, 1.67477199, 1.60544806, 1.71654056,
       1.70379381, 1.88200017, 2.16686004, 1.87383633, 1.61265927,
       1.54498053, 1.65620616, 1.64216719, 1.81779618, 2.09898068,
       1.80140993, 1.53507051, 1.46189071, 1.56756305, 1.54820443,
       1.71902224, 1.9961581 , 1.69552552])