# Seasonal adjustment

------------------

ft_seasonal_adj/notebooks/seasonally_adjust_using_rjdemetra.ipynb

--------------------------------------

In [None]:
library(DBI)
library(tidyverse)
library(data.table)
library(bigrquery)
library(dplyr)
library(lubridate)
library(RJDemetra)
library(dygraphs)
library(readr)
library(ggdemetra)
library(ggplot2)
library(rjdqa)
library(conflicted)
library(lubridate)
library(xts)

conflicts_prefer(dplyr::filter)
conflicts_prefer(dplyr::lag)
conflicts_prefer(ggdemetra::raw)

In [None]:
# Load data
con <- dbConnect(
    bigquery(),
    project = 'ons-fintrans-analysis-prod'
    )

In [None]:
sql_all_spend <- paste("SELECT time_period_value, cardholder_location, spend, transactions, cardholders
  FROM ons-fintrans-data-prod.fintrans_visa.retail_performance_high_streets_towns
  WHERE time_period = 'Month' AND 
  merchant_location = 'All' AND
  cardholder_location = 'All' AND
  mcg = 'All'  ORDER BY time_period_value, cardholder_location", sep = "")
all_spend <- as.data.table(dbGetQuery(con, sql_all_spend))

## Single variable adjustment : UK wide

Convert to time series object and assign spend as the variable

In [None]:
all_ts <- all_spend %>% ts(start=c(2019,1),frequency=12)
all_spend_ts = all_ts[,"spend"]
dygraph(all_spend_ts, main = "Raw UK spend")

### Indexing spend values

In [None]:
conflicts_prefer(dplyr::first)
all_spend <- all_spend %>%
  group_by(cardholder_location) %>%
  mutate(idx_cards = cardholders / first(cardholders)) %>%
  ungroup() %>%
  mutate(idx_spend = spend / idx_cards)

# spend adjusted for number of cardholders
# number of cardholder / number of cardholders in Jan 2019 * spend 

In [None]:
all_tsi <- all_spend %>% ts(start=c(2019,1),frequency=12)
all_spend_tsi = all_tsi[,"idx_spend"]
dygraph(all_spend_tsi, main = "UK spend indexed to Jan 2019 cardholder")

##### Seasonal adjustment 
<b>Using X-13ARIMA-SEATS with RSA4c specification.</b>


X-13ARIMA-SEATS is usually preferred to the alternative TRAMO/SEATS in RTI team. 
RSA4c takes into account working day and/or leap year and/or Easter.


Info on alternatives can be found in <i> ft_seasonal_adj/notebooks/seasonally_adjust_using_rjdemetra.ipynb 

In [None]:
# Manual outliers for Covid period
spec_x13 <- RJDemetra::x13_spec(spec = "RSA4c")

# method 1: treat all months that fall within a covid-19 lockdown as additive outliers
spec_x13_AO <- RJDemetra::x13_spec(spec = "RSA4c", 
                              usrdef.outliersEnabled = TRUE, 
                              usrdef.outliersType = c("AO", "AO", "AO", "AO", "AO", "AO", "AO", "AO", "AO"), 
                              usrdef.outliersDate = c('2020-03-01', '2020-04-01', '2020-05-01', '2020-06-01', '2020-11-01', '2020-12-01', '2021-01-01', '2021-02-01', '2021-03-01')
                                  )

# method 2: treat all months that fall within a covid-19 lockdown as transitionary outliers
spec_x13_TC <- RJDemetra::x13_spec(spec = "RSA4c", 
                              usrdef.outliersEnabled = TRUE, 
                              usrdef.outliersType = c("TC", "TC", "TC", "TC", "TC", "TC", "TC", "TC", "TC"), 
                              usrdef.outliersDate = c('2020-03-01', '2020-04-01', '2020-05-01', '2020-06-01', '2020-11-01', '2020-12-01', '2021-01-01', '2021-02-01', '2021-03-01')
                                  )

# method 3: treat the months before and after covid periods as level shifts. 
# Still treat the small lockdown from 2020-11-01 to 2020-12-01 as an additive outlier.
spec_x13_LS <- RJDemetra::x13_spec(spec = "RSA4c", 
                              usrdef.outliersEnabled = TRUE, 
                              usrdef.outliersType = c("LS", "LS", "AO", "LS", "LS"), 
                              usrdef.outliersDate = c('2020-03-01', '2020-07-01', '2020-11-01', '2021-01-01', '2021-04-01')
                                  )

In [None]:
# set the specification.
# choose one of the options from above.
spec = spec_x13 # use the base RSA4c spec

# run the seasonal adjustment with the above specification
all_x13_model <- x13(all_spend_ts, spec = spec) 

In [None]:
# Here we use RJDemetra to plot the components of the seasonal adjustment model.
layout(matrix(1:6, 3, 2)); # create a matrix to plot the first six regarima components 
plot(all_x13_model$regarima)

par(mfrow = c(1,1)) # ensures that the following plots don't follow the previous layout
plot(all_x13_model$regarima, which = 7) # plots linearised, calendar effects and outliers
plot(all_x13_model$decomposition) # To plot the S-I ratio
plot(all_x13_model, caption='Results', type_chart = "sa-trend")
plot(all_x13_model, type_chart = "cal-seas-irr")

In [None]:
options(repr.plot.width=20, repr.plot.height=5)
data <- ts.union(raw(all_x13_model), raw(all_x13_model, forecast = TRUE),
                 trendcycle(all_x13_model), trendcycle(all_x13_model, forecast = TRUE),
                 seasonaladj(all_x13_model), seasonaladj(all_x13_model, forecast = TRUE))
colnames(data) <- c("y", "y_f",
                    "t", "t_f",
                    "sa", "sa_f")
ggplot(data = ts2df(data), mapping = aes(x = date)) +
    geom_line(mapping = aes(y = y), color =  "#F0B400", na.rm = TRUE) +
    geom_line(mapping = aes(y = y_f), color =  "#F0B400", na.rm = TRUE, linetype = 2) +
    geom_line(mapping = aes(y = t), color =  "#1E6C0B", na.rm = TRUE) +
    geom_line(mapping = aes(y = t_f), color =  "#1E6C0B", na.rm = TRUE, linetype = 2) +
    geom_line(mapping = aes(y = sa), color =  "#155692", na.rm = TRUE) +
    geom_line(mapping = aes(y = sa_f), color =  "#155692", na.rm = TRUE, linetype = 2) +
    theme_bw()

In [None]:
options(repr.plot.width=10, repr.plot.height=5)
seasonal(all_x13_model)
calendar(all_x13_model)
calendaradj(all_x13_model)
raw(all_x13_model)
init_ggplot(all_x13_model) +
    geom_line(color =  "#F0B400") +
    geom_sa(component = "sa", color = "#155692")
autoplot(all_x13_model)
ggsiratioplot(all_x13_model)

In [None]:
dygraph(all_x13_model$final$series[,1:3], main = " UK spend")
# y = raw data
# sa = seasonally adjusted
# t = trend

In [None]:
dygraph(all_x13_model$final$series[,2:2], main = "Seasonally adjusted UK spend")
# y = raw data
# sa = seasonally adjusted
# t = trend

##### Seasonally adjusting post-covid data

In [None]:
# filter the all spend data to only include post covid data
spend_ts_post_covid <- window(all_tsi, start = c(2021, 05))

In [None]:
post_cov_ts = spend_ts_post_covid[,"spend"]

In [None]:
dygraphs::dygraph(post_cov_ts, main = "Post-Covid spend")  #check to ensure data is plotted as expected

In [None]:
x13_post_covid_model <- x13(post_cov_ts, spec = spec) # X-13ARIMA method


In [None]:
# Here we use RJDemetra to plot the components of the seasonal adjustment model.
layout(matrix(1:6, 3, 2)); # create a matrix to plot the first six regarima components 
plot(x13_post_covid_model$regarima)

par(mfrow = c(1,1)) # ensures that the following plots don't follow the previous layout
plot(x13_post_covid_model$regarima, which = 7) # plots linearised, calendar effects and outliers
plot(x13_post_covid_model$decomposition) # To plot the S-I ratio
plot(x13_post_covid_model, caption='Results', type_chart = "sa-trend")
plot(x13_post_covid_model, type_chart = "cal-seas-irr")

In [None]:
dygraph(x13_post_covid_model$final$series[,1:1], main = "Raw post-covid spend")

In [None]:
dygraph(x13_post_covid_model$final$series[,2:2], main = "SA Post-Covid")

In [None]:
x13_post_covid_model

---------------------------------

## Multiple variable adjustment : EH12 and CF10

In [None]:
sql_all_spend <- paste("SELECT time_period_value, merchant_location_level,
merchant_location, cardholder_issuing_country, mcg, mcc, spend, transactions, cardholders
FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
WHERE time_period = 'Month' AND
mcg = 'All' AND
mcc = 'All' AND
merchant_location_level != 'All' AND
merchant_location_level != 'POSTAL_SECTOR' AND
cardholder_issuing_level != 'All' AND
time_period_value >= '202201'
GROUP BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders
ORDER BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders", sep = "")
all_spend <- as.data.table(dbGetQuery(con, sql_all_spend))

In [None]:
eh12_df <- filter(all_spend, merchant_location == "EH12" & merchant_location_level == 'POSTAL_DISTRICT' & cardholder_issuing_country == 'UNITED KINGDOM')
cf10_df <- filter(all_spend, merchant_location == "CF10" & merchant_location_level == 'POSTAL_DISTRICT' & cardholder_issuing_country == 'UNITED KINGDOM')


conflicts_prefer(dplyr::first)
eh12_df <- eh12_df %>%
  group_by(merchant_location) %>%
  mutate(idx_cards = cardholders / first(cardholders)) %>%
  ungroup() %>%
  mutate(idx_spend = spend / idx_cards)

conflicts_prefer(dplyr::first)
cf10_df <- cf10_df %>%
  group_by(merchant_location) %>%
  mutate(idx_cards = cardholders / first(cardholders)) %>%
  ungroup() %>%
  mutate(idx_spend = spend / idx_cards)

# for post-covid - change to 2021,05
# for full series - change to 2019,01

eh12_tsi <- eh12_df %>% ts(start=c(2022,01),frequency=12)
eh12_spend_tsi = eh12_tsi[,"spend"]
dygraph(eh12_spend_tsi, main = "EH12 spend indexed to earliest cardholder")
eh12_spend_ts = eh12_tsi[,"spend"]
dygraph(eh12_spend_ts, main = "EH12 raw spend")

cf10_tsi <- cf10_df %>% ts(start=c(2022,01),frequency=12)
cf10_spend_tsi = cf10_tsi[,"spend"]
dygraph(cf10_spend_tsi, main = "CF10 spend indexed to earliest cardholder")
cf10_spend_ts = cf10_tsi[,"spend"]
dygraph(cf10_spend_ts, main = "CF10 raw spend")

In [None]:
eh12_x13_model <- x13(eh12_spend_tsi, spec = spec) 
cf10_x13_model <- x13(cf10_spend_tsi, spec = spec) 

In [None]:
dygraph(eh12_x13_model$final$series[,1:3], main = "SA EH12 - cardholder adjusted")
dygraph(cf10_x13_model$final$series[,1:3], main = "SA CF10 - cardholder adjusted")

# y = raw data
# sa = seasonally adjusted
# t = trend

----------------------

## Figure 1 seasonally adjusted

In [None]:
# need cf10, eh12, ha9, l4, and UK
eh12_df <- filter(all_spend, merchant_location == "EH12" & merchant_location_level == 'POSTAL_DISTRICT' & cardholder_issuing_country == 'UNITED KINGDOM' )
cf10_df <- filter(all_spend, merchant_location == "CF10" & merchant_location_level == 'POSTAL_DISTRICT' & cardholder_issuing_country == 'UNITED KINGDOM' )
ha9_df <- filter(all_spend, merchant_location == "HA9" & merchant_location_level == 'POSTAL_DISTRICT' & cardholder_issuing_country == 'UNITED KINGDOM' )
l4_df <- filter(all_spend, merchant_location == "L4" & merchant_location_level == 'POSTAL_DISTRICT' & cardholder_issuing_country == 'UNITED KINGDOM')

conflicts_prefer(dplyr::first)
eh12_df <- eh12_df %>%
  group_by(merchant_location) %>%
  mutate(index_spend = spend / first(spend))
cf10_df <- cf10_df %>%
  group_by(merchant_location) %>%
  mutate(index_spend = spend / first(spend))
ha9_df <- ha9_df %>%
  group_by(merchant_location) %>%
  mutate(index_spend = spend / first(spend))
l4_df <- l4_df %>%
  group_by(merchant_location) %>%
  mutate(index_spend = spend / first(spend))

eh12_tsi <- eh12_df %>% ts(start=c(2022,01),frequency=12)
eh12_spend_tsi = eh12_tsi[,"spend"]
cf10_tsi <- cf10_df %>% ts(start=c(2022,01),frequency=12)
cf10_spend_tsi = cf10_tsi[,"spend"]
ha9_tsi <- ha9_df %>% ts(start=c(2022,01),frequency=12)
ha9_spend_tsi = ha9_tsi[,"spend"]
l4_tsi <- l4_df %>% ts(start=c(2022,01),frequency=12)
l4_spend_tsi = l4_tsi[,"spend"]


In [None]:
eh12_x13_model <- x13(eh12_spend_tsi, spec = spec) 
cf10_x13_model <- x13(cf10_spend_tsi, spec = spec) 
ha9_x13_model <- x13(ha9_spend_tsi, spec = spec) 
l4_x13_model <- x13(l4_spend_tsi, spec = spec) 

In [None]:
eh12_x13_model$final$series #t = trend, s = what seasonal effect was removed, i= irregular component

In [None]:

# extract seasonally adjusted series
sa_eh12 <- eh12_x13_model$final$series[,"sa"]
sa_cf10 <- cf10_x13_model$final$series[,"sa"]
sa_ha9 <- ha9_x13_model$final$series[,"sa"]
sa_l4 <- l4_x13_model$final$series[,"sa"]

# combine into one xts object
combined_sa <- cbind(sa_eh12, sa_cf10, sa_ha9, sa_l4)
colnames(combined_sa) <- c("EH12", "CF10", "HA9", "L4")

# Plot with dygraph
dygraph(combined_sa, main = "Seasonally Adjusted Figure 1")


In [None]:

# extract seasonally adjusted series
sa_eh12 <- eh12_x13_model$final$series[,"y"]
sa_cf10 <- cf10_x13_model$final$series[,"y"]
sa_ha9 <- ha9_x13_model$final$series[,"y"]
sa_l4 <- l4_x13_model$final$series[,"y"]

# combine into one xts object
combined_sa <- cbind(sa_eh12, sa_cf10, sa_ha9, sa_l4)
colnames(combined_sa) <- c("EH12", "CF10", "HA9", "L4")

# Plot with dygraph
dygraph(combined_sa, main = "Raw Figure 1")


---------------------------------------------

### Article seasonal adjustment

### Process:
##### Adjust cardholders for raw data
##### Seasonal adjustment
##### Index

In [None]:
# sql_all_spend <- paste("SELECT time_period_value, merchant_location_level,
# merchant_location, cardholder_issuing_country, mcg, mcc, spend, transactions, cardholders
# FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
# WHERE time_period = 'Month' AND
# mcg = 'All' AND
# mcc = 'All' AND
# merchant_location_level != 'All' AND
# merchant_location_level != 'POSTAL_SECTOR' AND
# cardholder_issuing_level != 'All' AND
# time_period_value >= '202201'
# GROUP BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders
# ORDER BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders", sep = "")
# all_spend <- as.data.table(dbGetQuery(con, sql_all_spend))

In [None]:
sql_all_spend <- paste("SELECT *
FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
WHERE time_period = 'Month' AND
mcg = 'All' AND
mcc = 'All' AND
merchant_location_level = 'POSTAL_DISTRICT' AND
time_period_value >= '202201' AND 
cardholder_issuing_level = 'All' AND
merchant_location IN ('EH12', 'CF10', 'HA9', 'L4')
ORDER BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders", sep = "")
all_spend <- as.data.table(dbGetQuery(con, sql_all_spend))

# for high level overview:
# cardholder_issuing_level = 'All' 

Descriptors for settings:
https://jdemetradocumentation.github.io/JDemetra-documentation/pages/reference-manual/sa-spec-X13.html

In [None]:
# spec used by RTI
rti_spec <- RJDemetra::x13_spec(spec = "RSA4c",                                 # base model spec
                                 outlier.enabled = TRUE,                        # whether outliers are enabled - always true
                                 outlier.from = "2022-01-01",                   # first month in series
                                 outlier.cv = 4,                                # outlier critical value
                                 outlier.ao = TRUE,                             # additive outliers enabled?
                                 outlier.ls = TRUE,                             # level shift outliers enabled?
                                 outlier.tc = TRUE,                             # transitory change outliers enabled?
                                 outlier.so = FALSE,                            # seasonal outliers enabled?
                                 outlier.tcrate = 0.7,                          # base spec for rate of decay for tc outliers
                                 benchmarking.enabled = TRUE,                   # benchmarking constrains total aggregate of SA series based on target
                                 benchmarking.target = "Original",              # benchmarking constraint based on aggregate of raw data (over each year, Jan - Dec)
                                 benchmarking.rho = 1,                          # base specs for benchmarking
                                 benchmarking.lambda = 1)                       # base specs for benchmarking

In [None]:
spec_x13 <- RJDemetra::x13_spec(spec = "RSA4c")
base_spec = spec_x13 # base spec

# TS

## Figure 1

In [None]:
# need cf10, eh12, ha9, l4, and UK
eh12_df <- filter(all_spend, merchant_location == "EH12"  )
cf10_df <- filter(all_spend, merchant_location == "CF10" )
ha9_df <- filter(all_spend, merchant_location == "HA9" )
l4_df <- filter(all_spend, merchant_location == "L4" )

# First month of cardholders x spend
conflicts_prefer(dplyr::first)
eh12_df <- eh12_df %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

cf10_df <- cf10_df %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

ha9_df <- ha9_df %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

l4_df <- l4_df %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

eh12_tsi <- eh12_df %>% ts(start=c(2022,01),frequency=12)
eh12_spend_tsi = eh12_tsi[,"spend"]
cf10_tsi <- cf10_df %>% ts(start=c(2022,01),frequency=12)
cf10_spend_tsi = cf10_tsi[,"spend"]
ha9_tsi <- ha9_df %>% ts(start=c(2022,01),frequency=12)
ha9_spend_tsi = ha9_tsi[,"spend"]
l4_tsi <- l4_df %>% ts(start=c(2022,01),frequency=12)
l4_spend_tsi = l4_tsi[,"spend"]


In [None]:
eh12_x13_model <- x13(eh12_spend_tsi, spec = base_spec) 
cf10_x13_model <- x13(cf10_spend_tsi, spec = base_spec) 
ha9_x13_model <- x13(ha9_spend_tsi, spec = base_spec) 
l4_x13_model <- x13(l4_spend_tsi, spec = base_spec) 

In [None]:
eh12_x13_model

In [None]:
# Here we use RJDemetra to plot the components of the seasonal adjustment model.
layout(matrix(1:6, 3, 2)); # create a matrix to plot the first six regarima components 
plot(eh12_x13_model$regarima)

In [None]:
# residuals: differences between the observed and modeled. should appear random and centered around zero.
# acf: autocorrelation of residuals. Large spikes outside confidence bounds would suggest resdiuals are correlated over time, therefore model may have missed some time-dependent structure.
# histogram: looking for normality. if skewed or heavy tailed then non-normal and could affect reliability
# PACF: partial autocorrelation. bars should fall within the confidence lines. if not, suggests that residuals are directly correlated with lagged values?
# QQ: should be normally distributed. deviations at ends can suggest skewness or kurtosis, which may indicate outliers or non-normality
# decomp: 

In [None]:
# extract seasonally adjusted series
sa_eh12 <- eh12_x13_model$final$series[,"sa"]
sa_cf10 <- cf10_x13_model$final$series[,"sa"]
sa_ha9 <- ha9_x13_model$final$series[,"sa"]
sa_l4 <- l4_x13_model$final$series[,"sa"]

sa_data_list <- list(
  sa_eh12 = sa_eh12,
  sa_cf10 = sa_cf10,
  sa_ha9 = sa_ha9,
  sa_l4 = sa_l4
)

sa_indexed_list <- list()

# Indexing the vals to first spend instance
for (name in names(sa_data_list)) {
  first_val <- sa_data_list[[name]][1]
  sa_indexed_list[[paste0(name, "_index")]] <- (sa_data_list[[name]] / first_val) * 100
}

# Pulling out the tables into separate variables
for (name in names(sa_indexed_list)) {
  assign(name, sa_indexed_list[[name]])
}


# combine into one xts object
combined_sa <- cbind(sa_eh12_index, sa_cf10_index, sa_ha9_index, sa_l4_index)
colnames(combined_sa) <- c("EH12", "CF10", "HA9", "L4")

# Plot with dygraph
dygraph(combined_sa, main = "Seasonally Adjusted Figure 1")


In [None]:
write.csv(combined_sa, file = "figure1.csv", row.names = FALSE)

--------------

## Figure 2 - International 

In [None]:
sql_int_ts <- paste("SELECT time_period_value, merchant_location, spend, transactions, cardholders
  FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
  WHERE time_period = 'Month' AND 
  merchant_location_level = 'POSTAL_DISTRICT' AND
  cardholder_issuing_level = 'International' AND
  cardholder_issuing_country = 'All' AND
  mcg = 'All' AND
  merchant_location IN  ('EH12', 'L4', 'CF10', 'HA9') AND
  time_period_value >= '202201'
  ORDER BY time_period_value, merchant_location", sep = "")
ts_int <- as.data.table(dbGetQuery(con, sql_int_ts))

In [None]:
eh12_df <- filter(ts_int, merchant_location == "EH12" )
cf10_df <- filter(ts_int, merchant_location == "CF10" )
ha9_df <- filter(ts_int, merchant_location == "HA9" )
l4_df <- filter(ts_int, merchant_location == "L4" )

# First month of cardholders x spend
conflicts_prefer(dplyr::first)
eh12_df <- eh12_df %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

cf10_df <- cf10_df %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

ha9_df <- ha9_df %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

l4_df <- l4_df %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

eh12_tsi <- eh12_df %>% ts(start=c(2022,01),frequency=12)
eh12_spend_tsi = eh12_tsi[,"spend"]
cf10_tsi <- cf10_df %>% ts(start=c(2022,01),frequency=12)
cf10_spend_tsi = cf10_tsi[,"spend"]
ha9_tsi <- ha9_df %>% ts(start=c(2022,01),frequency=12)
ha9_spend_tsi = ha9_tsi[,"spend"]
l4_tsi <- l4_df %>% ts(start=c(2022,01),frequency=12)
l4_spend_tsi = l4_tsi[,"spend"]

In [None]:
eh12_x13_model <- x13(eh12_spend_tsi, spec = rti_spec) 
cf10_x13_model <- x13(cf10_spend_tsi, spec = rti_spec) 
ha9_x13_model <- x13(ha9_spend_tsi, spec = rti_spec) 
l4_x13_model <- x13(l4_spend_tsi, spec = rti_spec) 

In [None]:
# extract seasonally adjusted series
sa_eh12 <- eh12_x13_model$final$series[,"sa"]
sa_cf10 <- cf10_x13_model$final$series[,"sa"]
sa_ha9 <- ha9_x13_model$final$series[,"sa"]
sa_l4 <- l4_x13_model$final$series[,"sa"]

sa_data_list <- list(
  sa_eh12 = sa_eh12,
  sa_cf10 = sa_cf10,
  sa_ha9 = sa_ha9,
  sa_l4 = sa_l4
)

sa_indexed_list <- list()

# Indexing the vals to first spend instance
for (name in names(sa_data_list)) {
  first_val <- sa_data_list[[name]][1]
  sa_indexed_list[[paste0(name, "_index")]] <- (sa_data_list[[name]] / first_val) * 100
}

# Pulling out the tables into separate variables
for (name in names(sa_indexed_list)) {
  assign(name, sa_indexed_list[[name]])
}


# combine into one xts object
combined_sa <- cbind(sa_eh12_index, sa_cf10_index, sa_ha9_index, sa_l4_index)
colnames(combined_sa) <- c("EH12", "CF10", "HA9", "L4")

# Plot with dygraph
dygraph(combined_sa, main = "Seasonally Adjusted TS international spend")

-----------------

## SN

In [None]:
sql_full_sn <- paste("SELECT time_period_value, merchant_location_level,
merchant_location, cardholder_issuing_country, mcg, mcc, spend, transactions, cardholders
FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
WHERE time_period = 'Month' AND
mcg = 'All' AND
mcc = 'All' AND
merchant_location_level != 'All' AND
cardholder_issuing_level = 'All' AND
merchant_location_level != 'POSTAL_SECTOR' AND
merchant_location IN ('EH12', 'CF10', 'TW2') AND
time_period_value >= '202201'
GROUP BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders
ORDER BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders", sep = "")
sn_full <- as.data.table(dbGetQuery(con, sql_full_sn))

In [None]:
eh12_df <- filter(sn_full, merchant_location == "EH12" )
cf10_df <- filter(sn_full, merchant_location == "CF10" )
tw2_df <- filter(sn_full, merchant_location == "TW2" )

# First month of cardholders x spend
conflicts_prefer(dplyr::first)
eh12_df <- eh12_df %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

cf10_df <- cf10_df %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

tw2_df <- tw2_df %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)


eh12_tsi <- eh12_df %>% ts(start=c(2022,01),frequency=12)
eh12_spend_tsi = eh12_tsi[,"spend"]
cf10_tsi <- cf10_df %>% ts(start=c(2022,01),frequency=12)
cf10_spend_tsi = cf10_tsi[,"spend"]
tw2_tsi <- tw2_df %>% ts(start=c(2022,01),frequency=12)
tw2_spend_tsi = tw2_tsi[,"spend"]


In [None]:
eh12_x13_model <- x13(eh12_spend_tsi, spec = spec) 
cf10_x13_model <- x13(cf10_spend_tsi, spec = spec) 
tw2_x13_model <- x13(tw2_spend_tsi, spec = spec) 

In [None]:
# extract seasonally adjusted series
sa_eh12 <- eh12_x13_model$final$series[,"sa"]
sa_cf10 <- cf10_x13_model$final$series[,"sa"]
sa_tw2 <- tw2_x13_model$final$series[,"sa"]

sa_data_list <- list(
  sa_eh12 = sa_eh12,
  sa_cf10 = sa_cf10,
  sa_tw2 = sa_tw2
)

sa_indexed_list <- list()

# Indexing the vals to first spend instance
for (name in names(sa_data_list)) {
  first_val <- sa_data_list[[name]][1]
  sa_indexed_list[[paste0(name, "_index")]] <- (sa_data_list[[name]] / first_val) * 100
}

# Pulling out the tables into separate variables
for (name in names(sa_indexed_list)) {
  assign(name, sa_indexed_list[[name]])
}


# combine into one xts object
combined_sa <- cbind(sa_eh12_index, sa_cf10_index, sa_tw2_index)
colnames(combined_sa) <- c("EH12", "CF10", "TW2")

# Plot with dygraph
dygraph(combined_sa, main = "Seasonally Adjusted SN sum spend")