<a href="https://colab.research.google.com/github/usermar445/afcs_assignments/blob/main/final_repo/R/prophet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#load packages
install.packages("fpp3")


Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘numDeriv’, ‘warp’, ‘BH’, ‘Rcpp’, ‘distributional’, ‘progressr’, ‘slider’, ‘anytime’, ‘fable’, ‘fabletools’, ‘feasts’, ‘tsibble’, ‘tsibbledata’


── [1mAttaching packages[22m ────────────────────────────────────────────── fpp3 0.5 ──

[32m✔[39m [34mtibble     [39m 3.2.1     [32m✔[39m [34mtsibble    [39m 1.1.3
[32m✔[39m [34mdplyr      [39m 1.1.4     [32m✔[39m [34mtsibbledata[39m 0.4.1
[32m✔[39m [34mtidyr      [39m 1.3.0     [32m✔[39m [34mfeasts     [39m 0.3.1
[32m✔[39m [34mlubridate  [39m 1.9.3     [32m✔[39m [34mfable      [39m 0.3.3
[32m✔[39m [34mggplot2    [39m 3.4.4     [32m✔[39m [34mfabletools [39m 0.3.4

── [1mConflicts[22m ───────────────────────────────────────────────── fpp3_conflicts ──
[31m✖[39m [34mlubridate[39m::[32mdate()[39m    masks [34mbase[39m::date()
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m      m

ERROR: ignored

In [5]:
install.packages("fable.prophet")

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)



In [4]:
library(fpp3)
library(stringr)
library(fable.prophet)


Loading required package: Rcpp



In [7]:

## ++++++++++++++++++ LOAD DATA ++++++++++++++++++++++++++++
#load data
#data_path <- "../data/"

# train data
df_sales_train <- read.csv("sales_train_validation_afcs2023.csv")

# test data
sales_test <- read.csv("sales_test_validation_afcs2022.csv")

# calendar data
calendar_df <- read.csv("calendar_afcs2023.csv")

# price data
price_df <- read.csv("sell_prices_afcs2023.csv")



In [8]:
## ++++++++++++++++++ PREPARE DATA SETS ++++++++++++++++++++++++++++

# 1) Calendar data
# convert to Date object
calendar_df$Date <- as.Date(calendar_df$date, format = "%m/%d/%Y")

# create id column to merge with sales data, everything else is disregarded
calendar_df <- calendar_df %>% select(-date) %>% arrange(Date) %>% mutate(id_day = row_number(), day = paste("d_", id_day, sep=""))

# save id mapping for maybe later use
cal_ids <- calendar_df %>% select(Date, id_day, day)

# dummy if weekend
calendar_df <- calendar_df %>% mutate(is_weekend = if_else(wday<=2, 1,0))

# day of month
calendar_df <- calendar_df %>% mutate(day_of_month = day(Date))

# dummy of is event (only event 1)
calendar_df <- calendar_df %>%
  mutate(event = if_else(!is.na(event_name_1), 1, 0))



In [9]:
# 2) Sales train data

# Create id column in order for easier handling (product name is too annoying)
df_sales_train <- df_sales_train %>% arrange(id) %>% mutate(product_id = row_number()) %>% select(id, product_id, everything())
ids <- df_sales_train %>% select(product_id, id)

# pivot to prepare to merge with data
sales_pivot <- df_sales_train %>% pivot_longer(cols=starts_with("d_"), names_to="Day", values_to="Sales") %>% rename(Product = id) %>% arrange(Day, Product)


# 3) Merge sales with calendar data
sales_train_ts <- sales_pivot %>%
  left_join(calendar_df, by=join_by(Day==day)) %>%
  as_tsibble(index=Date, key=product_id)


In [10]:


# 4) Add price data
sales_train_ts <- sales_train_ts %>%
  mutate(extracted_id = str_extract(Product, "FOODS_\\d+_\\d+")) %>%
  left_join(price_df, by=c("extracted_id" = "item_id", "wm_yr_wk"="wm_yr_wk"))

# fill na price values with last price
sales_train_ts <- sales_train_ts %>%
  arrange(product_id, Date) %>%
  fill(sell_price)


# 5) select relevant columns
sales_train <- sales_train_ts %>%
  select(Date, product_id,  Sales, wday, month, snap_TX, is_weekend, day_of_month, event, sell_price)


In [11]:


# 6) test data + Generate new data
# pivot to prepare to merge with data
sales_test <- sales_test %>% arrange(id) %>% mutate(product_id = row_number()) %>% select(id, product_id, everything())
sales_test_pivot <- sales_test %>% pivot_longer(cols=starts_with("d_"), names_to="Day", values_to="Sales") %>% rename(Product = id) %>% arrange(Day, Product)

# Merge data and create tsibble
sales_test_ts <- sales_test_pivot %>%
  left_join(calendar_df, by=c("Day" = "day")) %>%
  select(Date, product_id, Sales) %>%
  as_tsibble(index=Date, key=product_id)

# dates of test data
dates_test_data <- sales_test_ts %>% distinct(Date)

# create "empty" new data for test horizon
new_data <- new_data(sales_train, 28)

# extract calendar information for forcast horizon
cal_new <- calendar_df %>%
  filter(Date %in% dates_test_data$Date) %>%
  select(Date, wday, snap_TX, is_weekend, day_of_month, event)

# get last price before forecast
last_price <- sales_train %>%
  as_tibble() %>%
  group_by(product_id) %>% slice(n()) %>%
  select(product_id, sell_price)

# generate new data for forecast horizon
new_data <- new_data %>%
  left_join(cal_new, by="Date") %>%
  left_join(last_price, by="product_id")



# Forecasting


In [26]:
holidays <- calendar_df %>% select(Date, event_name_1) %>% rename(ds=Date, holiday=event_name_1) %>% filter(!is.na(holiday))

In [28]:
fit_prophet <- sales_train %>%
  model(prophet = prophet(Sales ~ holiday(holidays)))

In [29]:
fc_prophet <- fit_prophet %>% forecast(h=28)

In [31]:
## ++++++++++++++++++ Evaluate ++++++++++
accuracy_prophet <- fc_prophet %>% accuracy(sales_test_ts, measures = list(rmse = RMSE))
rmse_prophet <- accuracy_prophet %>% group_by(`.model`) %>% summarise(mean(rmse))
print(rmse_prophet)

[90m# A tibble: 1 × 2[39m
  .model  `mean(rmse)`
  [3m[90m<chr>[39m[23m          [3m[90m<dbl>[39m[23m
[90m1[39m prophet         1.98


In [32]:
submission_prophet_2 <- fc_prophet %>% as_tibble() %>%
  select(product_id, Date, `.mean`) %>%
  rename(fc = `.mean`) %>%
  mutate(across(fc, round))%>%
  left_join(ids, by="product_id") %>%
  left_join(calendar_df, by="Date") %>%
  select(id, day, fc) %>%
  pivot_wider(names_from = day, values_from = fc)

In [37]:
submission_prophet_2_non_neg <- submission_prophet_2 %>%
  mutate_all(~ replace(., . < 0, 0))

In [34]:
write.csv(submission_prophet_2, "fc_prophet_2.csv")

In [38]:
write.csv(submission_prophet_2_non_neg, "fc_prophet_2_nonneg.csv")