This notebook will provide an overview of the seasonal adjustment process and validation.

In [None]:
library(DBI)
library(tidyverse)
library(data.table)
library(bigrquery)
library(dplyr)
library(lubridate)
library(RJDemetra)
library(dygraphs)
library(readr)
library(ggdemetra)
library(ggplot2)
library(rjdqa)
library(conflicted)
library(lubridate)
library(xts)

conflicts_prefer(dplyr::filter)
conflicts_prefer(dplyr::lag)
conflicts_prefer(ggdemetra::raw)

In [None]:
# Load data
con <- dbConnect(
    bigquery(),
    project = 'ons-fintrans-analysis-prod'
    )

## High-level overview: <i>seasonally adjusting UK wide data (2019-2025)

In [None]:
sql_all_spend <- paste("SELECT time_period_value, cardholder_location, spend, transactions, cardholders
  FROM ons-fintrans-data-prod.fintrans_visa.retail_performance_high_streets_towns
  WHERE time_period = 'Month' AND 
  merchant_location = 'All' AND
  cardholder_location = 'All' AND
  mcg = 'All'  ORDER BY time_period_value, cardholder_location", sep = "")
all_spend <- as.data.table(dbGetQuery(con, sql_all_spend))

In [None]:
# Convert to time series object and assign spend as the variable
all_ts <- all_spend %>% ts(start=c(2019,1),frequency=12)
all_spend_ts = all_ts[,"spend"]
dygraph(all_spend_ts, main = "Raw UK spend")

In [None]:
# Spend value adjustment for cardholders

conflicts_prefer(dplyr::first)
all_spend <- all_spend %>%
  group_by(cardholder_location) %>%
  mutate(idx_cards = cardholders / first(cardholders)) %>%
  ungroup() %>%
  mutate(idx_spend = spend / idx_cards)

# spend adjusted for number of cardholders
# number of cardholder / number of cardholders in Jan 2019 * spend 

all_tsi <- all_spend %>% ts(start=c(2019,1),frequency=12)
all_spend_tsi = all_tsi[,"idx_spend"]
dygraph(all_spend_tsi, main = "UK spend indexed to Jan 2019 cardholders")

In [None]:
# spec used by RTI
rti_spec <- RJDemetra::x13_spec(spec = "RSA4c",                                 # base model spec
                                 outlier.enabled = TRUE,                        # whether outliers are enabled - always true
                                 outlier.from = "2019-01-01",                   # first month in series
                                 outlier.cv = 4,                                # outlier critical value
                                 outlier.ao = TRUE,                             # additive outliers enabled?
                                 outlier.ls = TRUE,                             # level shift outliers enabled?
                                 outlier.tc = TRUE,                             # transitory change outliers enabled?
                                 outlier.so = FALSE,                            # seasonal outliers enabled?
                                 outlier.tcrate = 0.7,                          # base spec for rate of decay for tc outliers
                                 benchmarking.enabled = TRUE,                   # benchmarking constrains total aggregate of SA series based on target
                                 benchmarking.target = "Original",              # benchmarking constraint based on aggregate of raw data (over each year, Jan - Dec)
                                 benchmarking.rho = 1,                          # base specs for benchmarking
                                 benchmarking.lambda = 1)                       # base specs for benchmarking

In [None]:
# or treat all months that fall within a covid-19 lockdown as additive outliers
additive_spec <- RJDemetra::x13_spec(spec = "RSA4c", 
                              usrdef.outliersEnabled = TRUE, 
                              usrdef.outliersType = c("AO", "AO", "AO", "AO", "AO", "AO", "AO", "AO", "AO"), 
                              usrdef.outliersDate = c('2020-03-01', '2020-04-01', '2020-05-01', '2020-06-01', '2020-11-01', '2020-12-01', '2021-01-01', '2021-02-01', '2021-03-01')
                                  )

In [None]:
# Running the models
all_rti <- x13(all_spend_tsi, spec = rti_spec) 
all_additive <- x13(all_spend_tsi, spec = additive_spec) 

In [None]:
# Here we use RJDemetra to plot the components of the seasonal adjustment model.
layout(matrix(1:6, 3, 2)); # create a matrix to plot the first six regarima components 
plot(all_rti$regarima)

par(mfrow = c(1,1)) # ensures that the following plots don't follow the previous layout
plot(all_rti$regarima, which = 7) # plots linearised, calendar effects and outliers
plot(all_rti$decomposition) # To plot the S-I ratio
plot(all_rti, caption='Results', type_chart = "sa-trend")
plot(all_rti, type_chart = "cal-seas-irr")

In [None]:
# Here we use RJDemetra to plot the components of the seasonal adjustment model.
layout(matrix(1:6, 3, 2)); # create a matrix to plot the first six regarima components 
plot(all_additive$regarima)

par(mfrow = c(1,1)) # ensures that the following plots don't follow the previous layout
plot(all_additive$regarima, which = 7) # plots linearised, calendar effects and outliers
plot(all_additive$decomposition) # To plot the S-I ratio
plot(all_additive, caption='Results', type_chart = "sa-trend")
plot(all_additive, type_chart = "cal-seas-irr")

In [None]:
# Plotting raw SA spend
sa_rti_full <- all_rti$final$series[,"sa"]
sa_additive_full <- all_additive$final$series[,"sa"]
y_vals_full <- all_additive$final$series[,"y"]

spec_sa <- cbind(sa_rti_full, sa_additive_full, y_vals_full)
colnames(spec_sa) <- c("RTI", "Additive", "Non-SA")

dygraph(spec_sa, main = "Raw UK spend - RTI and Additive spec and non-SA cardholder adjusted" )

In [None]:


sa_rti <- all_rti$final$series[,"sa"]
sa_additive <- all_additive$final$series[,"sa"]

# Plotting indexed SA spend
sa_data_list <- list(
  sa_rti = sa_rti,
  sa_additive = sa_additive,
  y_vals = y_vals
)

sa_indexed_list <- list()

# Indexing the vals to first spend instance
for (name in names(sa_data_list)) {
  first_val <- sa_data_list[[name]][1]
  sa_indexed_list[[paste0(name, "_index")]] <- (sa_data_list[[name]] / first_val) * 100
}

# Pulling out the tables into separate variables
for (name in names(sa_indexed_list)) {
  assign(name, sa_indexed_list[[name]])
}


# combine into one xts object
spec_indexed_sa <- cbind(sa_rti_index, sa_additive_index, y_vals_index)
colnames(spec_indexed_sa) <- c("RTI", "Additive", "Non-SA")

# Plot with dygraph
dygraph(spec_indexed_sa, main = "Indexed UK spend - RTI and Additive spec and non-SA cardholder adjusted")

------------------------------------------

## High-level overview: <i>seasonally adjusting UK wide data (2022-2025)


In [None]:
sql_postcov_spend <- paste("SELECT time_period_value, cardholder_location, spend, transactions, cardholders
  FROM ons-fintrans-data-prod.fintrans_visa.retail_performance_high_streets_towns
  WHERE time_period = 'Month' AND 
  merchant_location = 'All' AND
  cardholder_location = 'All' AND
  time_period_value >= '202201' AND
  mcg = 'All'  ORDER BY time_period_value, cardholder_location", sep = "")
postcov_spend <- as.data.table(dbGetQuery(con, sql_postcov_spend))

In [None]:
# Convert to time series object and assign spend as the variable
postcov_ts <- postcov_spend %>% ts(start=c(2022,1),frequency=12)
postcov_spend_ts = postcov_ts[,"spend"]
dygraph(postcov_spend_ts, main = "Raw UK spend")

In [None]:
# Spend value adjustment for cardholders

conflicts_prefer(dplyr::first)
postcov_spend <- postcov_spend %>%
  group_by(cardholder_location) %>%
  mutate(idx_cards = cardholders / first(cardholders)) %>%
  ungroup() %>%
  mutate(idx_spend = spend / idx_cards)

# spend adjusted for number of cardholders
# number of cardholder / number of cardholders in Jan 2019 * spend 

postcov_tsi <- postcov_spend %>% ts(start=c(2022,1),frequency=12)
postcov_spend_tsi = postcov_tsi[,"idx_spend"]
dygraph(postcov_spend_tsi, main = "UK spend indexed to Jan 2019 cardholders")

In [None]:
# spec used by RTI
rti_spec <- RJDemetra::x13_spec(spec = "RSA4c",                                 # base model spec
                                 outlier.enabled = TRUE,                        # whether outliers are enabled - always true
                                 outlier.from = "2019-01-01",                   # first month in series
                                 outlier.cv = 4,                                # outlier critical value
                                 outlier.ao = TRUE,                             # additive outliers enabled?
                                 outlier.ls = TRUE,                             # level shift outliers enabled?
                                 outlier.tc = TRUE,                             # transitory change outliers enabled?
                                 outlier.so = FALSE,                            # seasonal outliers enabled?
                                 outlier.tcrate = 0.7,                          # base spec for rate of decay for tc outliers
                                 benchmarking.enabled = TRUE,                   # benchmarking constrains total aggregate of SA series based on target
                                 benchmarking.target = "Original",              # benchmarking constraint based on aggregate of raw data (over each year, Jan - Dec)
                                 benchmarking.rho = 1,                          # base specs for benchmarking
                                 benchmarking.lambda = 1)                       # base specs for benchmarking
# Runnign base rti spec
postcov_rti <- x13(postcov_spend_tsi, spec = rti_spec) 


In [None]:
# Here we use RJDemetra to plot the components of the seasonal adjustment model.
layout(matrix(1:6, 3, 2)); # create a matrix to plot the first six regarima components 
plot(postcov_rti$regarima)

par(mfrow = c(1,1)) # ensures that the following plots don't follow the previous layout
plot(postcov_rti$regarima, which = 7) # plots linearised, calendar effects and outliers
plot(postcov_rti$decomposition) # To plot the S-I ratio
plot(postcov_rti, caption='Results', type_chart = "sa-trend")
plot(postcov_rti, type_chart = "cal-seas-irr")

In [None]:
# Plotting raw SA spend
sa_postcov <- postcov_rti$final$series[,"sa"]
raw_postcov <- postcov_rti$final$series[,"y"]

postcov_sa_raw <- cbind(sa_postcov, raw_postcov)
colnames(postcov_sa_raw) <- c("SA", "Non-SA")

dygraph(postcov_sa_raw, main = "UK spend - SA and Non-SA cardholder adjusted data")

In [None]:
# Plotting indexed SA spend
sa_data_list <- list(
  sa_postcov = sa_postcov,
  raw_postcov = raw_postcov
)

sa_indexed_list <- list()

# Indexing the vals to first spend instance
for (name in names(sa_data_list)) {
  first_val <- sa_data_list[[name]][1]
  sa_indexed_list[[paste0(name, "_index")]] <- (sa_data_list[[name]] / first_val) * 100
}

# Pulling out the tables into separate variables
for (name in names(sa_indexed_list)) {
  assign(name, sa_indexed_list[[name]])
}


# combine into one xts object
spec_indexed_sa_postcov <- cbind(sa_postcov_index, raw_postcov_index)
colnames(spec_indexed_sa_postcov) <- c("SA", "Non-SA")

# Plot with dygraph
dygraph(spec_indexed_sa_postcov, main = "Indexed UK spend - RTI spec and Non-SA cardholder adjusted data")

In [None]:
## Plotting both full time-series v cropped time series model


ts_sa_comparison <- cbind(sa_postcov, sa_rti_full)
colnames(ts_sa_comparison) <- c("Post-lockdown", "Full")

dygraph(ts_sa_comparison, main = "Comparison of RSA4c on full and cropped time series")

In [None]:
## Plotting both full time-series v cropped time series model


ts_sa_comparison <- cbind(sa_postcov, sa_rti_full)
colnames(ts_sa_comparison) <- c("Post-lockdown", "Full")

dygraph(ts_sa_comparison, main = "Comparison of RSA4c on full and cropped time series")

In [None]:
## Indexing comparison to Jan 2022

# Plotting indexed SA spend
sa_data_list <- list(
  sa_postcov = sa_postcov,
  sa_rti_full = sa_rti_full
)

sa_indexed_list <- list()

# Indexing the vals to first spend instance
for (name in names(sa_data_list)) {
  jan_val <- sa_data_list[[name]]["2022", "Jan"]
  sa_indexed_list[[paste0(name, "_index")]] <- (sa_data_list[[name]] / jan_val) * 100
}

# Pulling out the tables into separate variables
for (name in names(sa_indexed_list)) {
  assign(name, sa_indexed_list[[name]])
}


# combine into one xts object
compare_sa_indexed <- cbind(sa_postcov_index, sa_rti_full_index)
colnames(compare_sa_indexed) <- c("Post-lockdown", "Full")

# Plot with dygraph
dygraph(compare_sa_indexed, main = "Indexed RSA4c on full and cropped time series")


# ts_sa_comparison <- cbind(sa_postcov, sa_rti_full)
# colnames(ts_sa_comparison) <- c("Post-lockdown", "Full")

# dygraph(ts_sa_comparison, main = "Comparison of RSA4c on full and cropped time series")

In [None]:

# Plotting indexed SA spend
sa_data_list <- list(
  sa_postcov = sa_postcov,
  sa_rti_full = sa_rti_full
)

sa_indexed_list <- list()

# Indexing the values to January 2022
for (name in names(sa_data_list)) {
  ts_obj <- sa_data_list[[name]]
  
  # Extract time index
  time_index <- time(ts_obj)
  
  # Convert numeric time to year and month
  year <- floor(time_index)
  month <- round((time_index - year) * 12 + 1)
  
  # Find position of Jan 2022
  jan_2022_pos <- which(year == 2022 & month == 1)
  
  jan_2022_val <- ts_obj[jan_2022_pos]
  
  # Index the time series
  sa_indexed_list[[paste0(name, "_index")]] <- (ts_obj / jan_2022_val) * 100
}

# Pulling out the tables into separate variables
for (name in names(sa_indexed_list)) {
  assign(name, sa_indexed_list[[name]])
}

# Combine into one xts object
compare_sa_indexed <- cbind(sa_postcov_index, sa_rti_full_index)
colnames(compare_sa_indexed) <- c("Post-lockdown", "Full")

# Plot with dygraph
dygraph(compare_sa_indexed, main = "Indexed RSA4c on full and cropped time series")


------------------------------------

--------------------

## TS venue high level spend: <i> (2019-2025)

In [None]:
sql_all_spend <- paste("SELECT *
FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
WHERE time_period = 'Month' AND
mcg = 'All' AND
mcc = 'All' AND
merchant_location_level = 'POSTAL_DISTRICT' AND
cardholder_issuing_level = 'All' AND
merchant_location IN ('EH12', 'CF10', 'HA9', 'L4') AND
time_period_value <= '202503'
ORDER BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders", sep = "")
all_ts <- as.data.table(dbGetQuery(con, sql_all_spend))


In [None]:
# need cf10, eh12, ha9, l4, and UK
eh12_df <- filter(all_ts, merchant_location == "EH12"  )
cf10_df <- filter(all_ts, merchant_location == "CF10" )
ha9_df <- filter(all_ts, merchant_location == "HA9" )
l4_df <- filter(all_ts, merchant_location == "L4" )


# l4 is missing 202303

In [None]:
l4_missing <- data.frame(time_period = 'Month',
                        time_period_value = '202303',
                        merchant_location_level = 'POSTAL_DISTRICT',
                        merchant_location = 'L4',
                        cardholder_issuing_level = 'All',
                        cardholder_issuing_country = 'All',
                        mcg = 'All',
                        mcc = 'All',
                        spend = NA,
                        transactions = NA,
                        cardholders = NA,
                        dist_merchants = NA,
                        pct_repeat_pan_cnt = NA
                        )
df3 <- rbind(l4_df, l4_missing)
l4_df <- df3[order(time_period_value),]

#### Cardholder adjustment

##### Option 1: Adjust the cardholders on a local district level

In [None]:
# First month of cardholders x spend
conflicts_prefer(dplyr::first)
eh12_df <- eh12_df %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

cf10_df <- cf10_df %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

ha9_df <- ha9_df %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

l4_df <- l4_df %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

eh12_ts <- eh12_df %>% ts(start=c(2019,01),frequency=12)
eh12_spend_ts = eh12_ts[,"spend"]
cf10_ts <- cf10_df %>% ts(start=c(2019,01),frequency=12)
cf10_spend_ts = cf10_ts[,"spend"]
ha9_ts <- ha9_df %>% ts(start=c(2019,01),frequency=12)
ha9_spend_ts = ha9_ts[,"spend"]
l4_ts <- l4_df %>% ts(start=c(2019,01),frequency=12)
l4_spend_ts = l4_ts[,"spend"]

eh12_tsi <- eh12_df %>% ts(start=c(2019,01),frequency=12)
eh12_spend_tsi = eh12_tsi[,"card_spend"]
cf10_tsi <- cf10_df %>% ts(start=c(2019,01),frequency=12)
cf10_spend_tsi = cf10_tsi[,"card_spend"]
ha9_tsi <- ha9_df %>% ts(start=c(2019,01),frequency=12)
ha9_spend_tsi = ha9_tsi[,"card_spend"]
l4_tsi <- l4_df %>% ts(start=c(2019,01),frequency=12)
l4_spend_tsi = l4_tsi[,"card_spend"]

#### Option 2: Adjust the spend to UK cardholders

In [None]:
sql_full <- paste("SELECT *
FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
WHERE time_period = 'Month' AND
mcg = 'All' AND
mcc = 'All' AND
merchant_location_level = 'All' AND
cardholder_issuing_level = 'All' AND
time_period_value <= '202503'
ORDER BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders", sep = "")
highlvl_sml <- as.data.table(dbGetQuery(con, sql_full))

In [None]:
# join UK wide cardholder data to locals
eh12_df <- eh12_df %>% left_join(highlvl_sml %>% select(time_period_value, uk_sum_cards = cardholders), by = 'time_period_value')
cf10_df <- cf10_df %>% left_join(highlvl_sml %>% select(time_period_value, uk_sum_cards = cardholders), by = 'time_period_value')
ha9_df <- ha9_df %>% left_join(highlvl_sml %>% select(time_period_value, uk_sum_cards = cardholders), by = 'time_period_value')
l4_df <- l4_df %>% left_join(highlvl_sml %>% select(time_period_value, uk_sum_cards = cardholders), by = 'time_period_value')

# indexing the uk wide cardholders to first val
eh12_df <- eh12_df %>% mutate(index_cards_uk = uk_sum_cards / first(uk_sum_cards))
# adjusting local district spend to the uk wide index
# 1 # adjust the local district cardholders to uk wide cardholders then adjust the spend based on the adjusted local cardholders
#eh12_df <- eh12_df %>% mutate(adj_cards = cardholders / index_cards_uk) %>% mutate(adj_spend = spend / adj_cards)
# 2 # straight to adjusting local spend to uk wide cardholders
eh12_df <- eh12_df %>% mutate(adj_spend = spend / index_cards_uk)

cf10_df <- cf10_df %>% mutate(index_cards_uk = uk_sum_cards / first(uk_sum_cards))
cf10_df <- cf10_df %>% mutate(adj_spend = spend / index_cards_uk)

ha9_df <- ha9_df %>% mutate(index_cards_uk = uk_sum_cards / first(uk_sum_cards))
ha9_df <- ha9_df %>% mutate(adj_spend = spend / index_cards_uk)

l4_df <- l4_df %>% mutate(index_cards_uk = uk_sum_cards / first(uk_sum_cards))
l4_df <- l4_df %>% mutate(adj_spend = spend / index_cards_uk)

eh12_tsi_uk <- eh12_df %>% ts(start=c(2019,01),frequency=12)
eh12_spend_tsi_uk  = eh12_tsi_uk [,"adj_spend"]
cf10_tsi_uk  <- cf10_df %>% ts(start=c(2019,01),frequency=12)
cf10_spend_tsi_uk  = cf10_tsi_uk [,"adj_spend"]
ha9_tsi_uk  <- ha9_df %>% ts(start=c(2019,01),frequency=12)
ha9_spend_tsi_uk  = ha9_tsi_uk [,"adj_spend"]
l4_tsi_uk  <- l4_df %>% ts(start=c(2019,01),frequency=12)
l4_spend_tsi_uk  = l4_tsi_uk [,"adj_spend"]

In [None]:
adj_spend <- cbind(eh12_spend_ts, cf10_spend_ts, ha9_spend_ts, l4_spend_ts)
colnames(adj_spend) <- c("EH12", "CF10", 'HA9', 'L4')
dygraph(adj_spend, main = "Raw spend at venues")

In [None]:
adj_spendi_uk <- cbind(eh12_spend_tsi_uk, cf10_spend_tsi_uk, ha9_spend_tsi_uk, l4_spend_tsi_uk)
colnames(adj_spendi_uk) <- c("EH12", "CF10", 'HA9', 'L4')
dygraph(adj_spendi_uk, main = " UK wide cardholder adjusted raw spend at venues")

In [None]:
adj_spendi_uk <- cbind(eh12_spend_tsi_uk, cf10_spend_tsi_uk, ha9_spend_tsi_uk, l4_spend_tsi_uk)
colnames(adj_spendi_uk) <- c("EH12", "CF10", 'HA9', 'L4')
dygraph(adj_spendi_uk, main = " UK wide cardholder adjusted raw spend at venues")

In [None]:
eh12_x13_model <- x13(eh12_spend_tsi_uk, spec = rti_spec) 
cf10_x13_model <- x13(cf10_spend_tsi_uk, spec = rti_spec) 
ha9_x13_model <- x13(ha9_spend_tsi_uk, spec = rti_spec) 
l4_x13_model <- x13(l4_spend_tsi_uk, spec = rti_spec) 

In [None]:
# Here we use RJDemetra to plot the components of the seasonal adjustment model.
layout(matrix(1:6, 3, 2)); # create a matrix to plot the first six regarima components 
plot(ha9_x13_model$regarima)

par(mfrow = c(1,1)) # ensures that the following plots don't follow the previous layout
plot(ha9_x13_model$regarima, which = 7) # plots linearised, calendar effects and outliers
plot(ha9_x13_model$decomposition) # To plot the S-I ratio
plot(ha9_x13_model, caption='Results', type_chart = "sa-trend")
plot(ha9_x13_model, type_chart = "cal-seas-irr")

In [None]:
# Plotting raw SA spend
# extract seasonally adjusted series
sa_eh12 <- eh12_x13_model$final$series[,"sa"]
sa_cf10 <- cf10_x13_model$final$series[,"sa"]
sa_ha9 <- ha9_x13_model$final$series[,"sa"]
sa_l4 <- l4_x13_model$final$series[,"sa"]

# combine into one xts object
ts_sas <- cbind(sa_eh12, sa_cf10, sa_ha9, sa_l4)
colnames(ts_sas) <- c("EH12", "CF10", "HA9", "L4")

# Plot with dygraph
dygraph(ts_sas, main = "Seasonally Adjusted Figure 1 raw values")


In [None]:
sa_data_list <- list(
  sa_eh12 = sa_eh12,
  sa_cf10 = sa_cf10,
  sa_ha9 = sa_ha9,
  sa_l4 = sa_l4
)

sa_indexed_list <- list()

# Indexing the vals to first spend instance
for (name in names(sa_data_list)) {
  first_val <- sa_data_list[[name]][1]
  sa_indexed_list[[paste0(name, "_index")]] <- (sa_data_list[[name]] / first_val) * 100
}

# Pulling out the tables into separate variables
for (name in names(sa_indexed_list)) {
  assign(name, sa_indexed_list[[name]])
}


# combine into one xts object
combined_sa <- cbind(sa_eh12_index, sa_cf10_index, sa_ha9_index, sa_l4_index)
colnames(combined_sa) <- c("EH12", "CF10", "HA9", "L4")

# Plot with dygraph
dygraph(combined_sa, main = "Indexed seasonally adjusted spend")


In [None]:
y_eh12 <- eh12_x13_model$final$series[,"y"]
y_cf10 <- cf10_x13_model$final$series[,"y"]
y_ha9 <- ha9_x13_model$final$series[,"y"]
y_l4 <- l4_x13_model$final$series[,"y"]

data_list <- list(
  y_eh12 = y_eh12,
  y_cf10 = y_cf10,
  y_ha9 = y_ha9,
  y_l4 = y_l4

)

y_indexed_list <- list()

# Indexing the vals to first spend instance
for (name in names(data_list)) {
  first_val <- data_list[[name]][1]
  y_indexed_list[[paste0(name, "_index")]] <- (data_list[[name]] / first_val) * 100
}

# Pulling out the tables into separate variables
for (name in names(y_indexed_list)) {
  assign(name, y_indexed_list[[name]])
}


# combine into one xts object
combined_y <- cbind(y_eh12_index, y_cf10_index, y_ha9_index, y_l4_index)
colnames(combined_y) <- c("EH12", "CF10", "HA9", "L4")

# Plot with dygraph
dygraph(combined_y, main = "Non-SA indexed spend")

------------------

## TS venue high level spend: <i> (2022-2025)

In [None]:
sql_all_spend <- paste("SELECT *
FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
WHERE time_period = 'Month' AND
mcg = 'All' AND
mcc = 'All' AND
merchant_location_level = 'POSTAL_DISTRICT' AND
cardholder_issuing_level = 'All' AND
merchant_location IN ('EH12', 'CF10', 'HA9', 'L4') AND
time_period_value >= '202201' AND
time_period_value <= '202503'
ORDER BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders", sep = "")
cut_ts <- as.data.table(dbGetQuery(con, sql_all_spend))

In [None]:
# need cf10, eh12, ha9, l4, and UK
eh12_df2 <- filter(cut_ts, merchant_location == "EH12"  )
cf10_df2 <- filter(cut_ts, merchant_location == "CF10" )
ha9_df2 <- filter(cut_ts, merchant_location == "HA9" )
l4_df2 <- filter(cut_ts, merchant_location == "L4" )


# l4 is missing 202303

df4 <- rbind(l4_df2, l4_missing)
l4_df2 <- df4[order(time_period_value),]

In [None]:
# First month of cardholders x spend
conflicts_prefer(dplyr::first)
eh12_df2 <- eh12_df2 %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

cf10_df2 <- cf10_df2 %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

ha9_df2 <- ha9_df2 %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

l4_df2 <- l4_df2 %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

eh12_ts2 <- eh12_df2 %>% ts(start=c(2022,01),frequency=12)
eh12_spend_ts2 = eh12_ts2[,"spend"]
eh12_spend_tsi2 = eh12_ts2[,"card_spend"]

cf10_ts2 <- cf10_df2 %>% ts(start=c(2022,01),frequency=12)
cf10_spend_ts2 = cf10_ts2[,"spend"]
cf10_spend_tsi2 = cf10_ts2[,"card_spend"]

ha9_ts2 <- ha9_df2 %>% ts(start=c(2022,01),frequency=12)
ha9_spend_ts2 = ha9_ts2[,"spend"]
ha9_spend_tsi2 = ha9_ts2[,"card_spend"]

l4_ts2 <- l4_df2 %>% ts(start=c(2022,01),frequency=12)
l4_spend_ts2 = l4_ts2[,"spend"]
l4_spend_tsi2 = l4_ts2[,"card_spend"]


In [None]:
sql_full2 <- paste("SELECT *
FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
WHERE time_period = 'Month' AND
mcg = 'All' AND
mcc = 'All' AND
merchant_location_level = 'All' AND
cardholder_issuing_level = 'All' AND
time_period_value <= '202503' AND
time_period_value >= '202201'
ORDER BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders", sep = "")
highlvl_sml2 <- as.data.table(dbGetQuery(con, sql_full2))

In [None]:
# join UK wide cardholder data to locals
eh12_df2 <- eh12_df2 %>% left_join(highlvl_sml2 %>% select(time_period_value, uk_sum_cards = cardholders), by = 'time_period_value')
cf10_df2 <- cf10_df2 %>% left_join(highlvl_sml2 %>% select(time_period_value, uk_sum_cards = cardholders), by = 'time_period_value')
ha9_df2 <- ha9_df2 %>% left_join(highlvl_sml2 %>% select(time_period_value, uk_sum_cards = cardholders), by = 'time_period_value')
l4_df2 <- l4_df2 %>% left_join(highlvl_sml2 %>% select(time_period_value, uk_sum_cards = cardholders), by = 'time_period_value')

# indexing the uk wide cardholders to first val
eh12_df2 <- eh12_df2 %>% mutate(index_cards_uk = uk_sum_cards / first(uk_sum_cards))
# adjusting local district spend to the uk wide index
# 1 # adjust the local district cardholders to uk wide cardholders then adjust the spend based on the adjusted local cardholders
#eh12_df <- eh12_df %>% mutate(adj_cards = cardholders / index_cards_uk) %>% mutate(adj_spend = spend / adj_cards)
# 2 # straight to adjusting local spend to uk wide cardholders
eh12_df2 <- eh12_df2 %>% mutate(adj_spend = spend / index_cards_uk)

cf10_df2 <- cf10_df2 %>% mutate(index_cards_uk = uk_sum_cards / first(uk_sum_cards))
cf10_df2 <- cf10_df2 %>% mutate(adj_spend = spend / index_cards_uk)

ha9_df2 <- ha9_df2 %>% mutate(index_cards_uk = uk_sum_cards / first(uk_sum_cards))
ha9_df2 <- ha9_df2 %>% mutate(adj_spend = spend / index_cards_uk)

l4_df2 <- l4_df2 %>% mutate(index_cards_uk = uk_sum_cards / first(uk_sum_cards))
l4_df2 <- l4_df2 %>% mutate(adj_spend = spend / index_cards_uk)

eh12_tsi_uk2 <- eh12_df2 %>% ts(start=c(2022,01),frequency=12)
eh12_spend_tsi_uk2  = eh12_tsi_uk2 [,"adj_spend"]
cf10_tsi_uk2  <- cf10_df2 %>% ts(start=c(2022,01),frequency=12)
cf10_spend_tsi_uk2  = cf10_tsi_uk2 [,"adj_spend"]
ha9_tsi_uk2  <- ha9_df2 %>% ts(start=c(2022,01),frequency=12)
ha9_spend_tsi_uk2  = ha9_tsi_uk2 [,"adj_spend"]
l4_tsi_uk2  <- l4_df2 %>% ts(start=c(2022,01),frequency=12)
l4_spend_tsi_uk2  = l4_tsi_uk2 [,"adj_spend"]

In [None]:
adj_spend2 <- cbind(eh12_spend_ts2, cf10_spend_ts2, ha9_spend_ts2, l4_spend_ts2)
colnames(adj_spend2) <- c("EH12", "CF10", 'HA9', 'L4')
dygraph(adj_spend2, main = "Raw spend at venues")

In [None]:
adj_spendi2 <- cbind(eh12_spend_tsi2, cf10_spend_tsi2, ha9_spend_tsi2, l4_spend_tsi2)
colnames(adj_spendi2) <- c("EH12", "CF10", 'HA9', 'L4')
dygraph(adj_spendi2, main = "Local cardholder adjusted raw spend at venues")

In [None]:
adj_spendi2 <- cbind(eh12_spend_tsi_uk2, cf10_spend_tsi_uk2, ha9_spend_tsi_uk2, l4_spend_tsi_uk2)
colnames(adj_spendi2) <- c("EH12", "CF10", 'HA9', 'L4')
dygraph(adj_spendi2, main = "UK-wide cardholder adjusted raw spend at venues")

In [None]:
eh12_x13_model2 <- x13(eh12_spend_tsi_uk2, spec = rti_spec) 
cf10_x13_model2 <- x13(cf10_spend_tsi_uk2, spec = rti_spec) 
ha9_x13_model2 <- x13(ha9_spend_tsi_uk2, spec = rti_spec) 
l4_x13_model2 <- x13(l4_spend_tsi_uk2, spec = rti_spec) 

In [None]:
# Here we use RJDemetra to plot the components of the seasonal adjustment model.
layout(matrix(1:6, 3, 2)); # create a matrix to plot the first six regarima components 
plot(l4_x13_model2$regarima)

par(mfrow = c(1,1)) # ensures that the following plots don't follow the previous layout
plot(l4_x13_model2$regarima, which = 7) # plots linearised, calendar effects and outliers
plot(l4_x13_model2$decomposition) # To plot the S-I ratio
plot(l4_x13_model2, caption='Results', type_chart = "sa-trend")
plot(l4_x13_model2, type_chart = "cal-seas-irr")

In [None]:
# Plotting raw SA spend
# extract seasonally adjusted series
sa_eh12c <- eh12_x13_model2$final$series[,"sa"]
sa_cf10c <- cf10_x13_model2$final$series[,"sa"]
sa_ha9c <- ha9_x13_model2$final$series[,"sa"]
sa_l4c <- l4_x13_model2$final$series[,"sa"]

# combine into one xts object
ts_sas2 <- cbind(sa_eh12c, sa_cf10c, sa_ha9c, sa_l4c)
colnames(ts_sas2) <- c("EH12", "CF10", "HA9", "L4")

# Plot with dygraph
dygraph(ts_sas2, main = "Seasonally Adjusted Figure 1 raw values")

In [None]:
sa_data_list <- list(
  sa_eh12c = sa_eh12c,
  sa_cf10c = sa_cf10c,
  sa_ha9c = sa_ha9c,
  sa_l4c = sa_l4c
)

sa_indexed_list <- list()

# Indexing the vals to first spend instance
for (name in names(sa_data_list)) {
  first_val <- sa_data_list[[name]][1]
  sa_indexed_list[[paste0(name, "_index")]] <- (sa_data_list[[name]] / first_val) * 100
}

# Pulling out the tables into separate variables
for (name in names(sa_indexed_list)) {
  assign(name, sa_indexed_list[[name]])
}


# combine into one xts object
combined_sa <- cbind(sa_eh12c_index, sa_cf10c_index, sa_ha9c_index, sa_l4c_index)
colnames(combined_sa) <- c("EH12", "CF10", "HA9", "L4")

# Plot with dygraph
dygraph(combined_sa, main = "Indexed seasonally adjusted spend")

In [None]:
y_eh12c <- eh12_x13_model2$final$series[,"y"]
y_cf10c <- cf10_x13_model2$final$series[,"y"]
y_ha9c <- ha9_x13_model2$final$series[,"y"]
y_l4c <- l4_x13_model2$final$series[,"y"]

data_list <- list(
  y_eh12c = y_eh12c,
  y_cf10c = y_cf10c,
  y_ha9c = y_ha9c,
  y_l4c = y_l4c

)

y_indexed_list <- list()

# Indexing the vals to first spend instance
for (name in names(data_list)) {
  first_val <- data_list[[name]][1]
  y_indexed_list[[paste0(name, "_index")]] <- (data_list[[name]] / first_val) * 100
}

# Pulling out the tables into separate variables
for (name in names(y_indexed_list)) {
  assign(name, y_indexed_list[[name]])
}


# combine into one xts object
combined_y <- cbind(y_eh12c_index, y_cf10c_index, y_ha9c_index, y_l4c_index)
colnames(combined_y) <- c("EH12", "CF10", "HA9", "L4")

# Plot with dygraph
dygraph(combined_y, main = "Non-SA indexed spend")

In [None]:
### Full time series v cropped

In [None]:

# combine into one xts object
ts_comp <- cbind(sa_eh12c, sa_cf10c, sa_ha9c, sa_l4c, sa_eh12, sa_cf10, sa_ha9, sa_l4)
colnames(ts_comp) <- c("EH12c", "CF10c", "HA9c", "L4c", "EH12c", "CF10c", "HA9c", "L4c")

# Plot with dygraph
dygraph(ts_comp, main = "Seasonally Adjusted Figure 1 raw values")

In [None]:

# combine into one xts object
ts_comp <- cbind(sa_l4c,  sa_l4)
colnames(ts_comp) <- c("L4c",  "L4")

# Plot with dygraph
dygraph(ts_comp, main = "L4")

------------------

------------------

## SN venue high level spend: <i> (2019-2025)

In [None]:
sql_full_sn <- paste("SELECT time_period_value, merchant_location_level,
merchant_location, cardholder_issuing_country, mcg, mcc, spend, transactions, cardholders
FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
WHERE time_period = 'Month' AND
mcg = 'All' AND
mcc = 'All' AND
merchant_location_level != 'All' AND
cardholder_issuing_level = 'All' AND
merchant_location_level = 'POSTAL_DISTRICT' AND
merchant_location IN ('EH12', 'CF10', 'TW2') AND
time_period_value >= '202201' AND
time_period_value < '202504'
GROUP BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders
ORDER BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders", sep = "")
sn_full <- as.data.table(dbGetQuery(con, sql_full_sn))

In [None]:
eh12_df <- filter(sn_full, merchant_location == "EH12" )
cf10_df <- filter(sn_full, merchant_location == "CF10" )
tw2_df <- filter(sn_full, merchant_location == "TW2" )



In [None]:
# First month of cardholders x spend
conflicts_prefer(dplyr::first)
eh12_df <- eh12_df %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

cf10_df <- cf10_df %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)

tw2_df <- tw2_df %>%
  group_by(merchant_location) %>%
  mutate(index_cards = cardholders / first(cardholders))%>%
  ungroup() %>%
  mutate(card_spend = spend / index_cards)


eh12_tsi <- eh12_df %>% ts(start=c(2022,01),frequency=12)
eh12_spend_ts = eh12_tsi[,"spend"]
eh12_spend_tsi = eh12_tsi[,"card_spend"]

cf10_tsi <- cf10_df %>% ts(start=c(2022,01),frequency=12)
cf10_spend_ts = cf10_tsi[,"spend"]
cf10_spend_tsi = cf10_tsi[,"card_spend"]

tw2_tsi <- tw2_df %>% ts(start=c(2022,01),frequency=12)
tw2_spend_ts = tw2_tsi[,"spend"]
tw2_spend_tsi = tw2_tsi[,"card_spend"]

In [None]:
raw_spend <- cbind(eh12_spend_ts, cf10_spend_ts, tw2_spend_ts)
colnames(raw_spend) <- c("EH12", "CF10", 'TW2')
dygraph(raw_spend, main = "Raw spend at venues")

In [None]:
ca_spend <- cbind(eh12_spend_tsi, cf10_spend_tsi, tw2_spend_tsi)
colnames(ca_spend) <- c("EH12", "CF10", 'TW2')
dygraph(ca_spend, main = "Cardholder adjusted spend at venues")

In [None]:
eh12_sn <- x13(eh12_spend_tsi, spec = rti_spec) 
cf10_sn <- x13(cf10_spend_tsi, spec = rti_spec) 
tw2_sn <- x13(tw2_spend_tsi, spec = rti_spec) 


In [None]:
# Here we use RJDemetra to plot the components of the seasonal adjustment model.
layout(matrix(1:6, 3, 2)); # create a matrix to plot the first six regarima components 
plot(tw2_sn$regarima)

par(mfrow = c(1,1)) # ensures that the following plots don't follow the previous layout
plot(tw2_sn$regarima, which = 7) # plots linearised, calendar effects and outliers
plot(tw2_sn$decomposition) # To plot the S-I ratio
plot(tw2_sn, caption='Results', type_chart = "sa-trend")
plot(tw2_sn, type_chart = "cal-seas-irr")

In [None]:
# Plotting raw SA spend
# extract seasonally adjusted series
sa_eh12_sn <- eh12_sn$final$series[,"sa"]
sa_cf10_sn <- cf10_sn$final$series[,"sa"]
sa_tw2_sn <- tw2_sn$final$series[,"sa"]

# combine into one xts object
sn_sa <- cbind(sa_eh12_sn, sa_cf10_sn, sa_tw2_sn)
colnames(sn_sa) <- c("EH12", "CF10", "TW2")

# Plot with dygraph
dygraph(sn_sa, main = "Seasonally Adjusted SN raw spend")

In [None]:
sa_data_list <- list(
  sa_eh12_sn = sa_eh12_sn,
  sa_cf10_sn = sa_cf10_sn,
  sa_tw2_sn = sa_tw2_sn

)

sa_indexed_list <- list()

# Indexing the vals to first spend instance
for (name in names(sa_data_list)) {
  first_val <- sa_data_list[[name]][1]
  sa_indexed_list[[paste0(name, "_index")]] <- (sa_data_list[[name]] / first_val) * 100
}

# Pulling out the tables into separate variables
for (name in names(sa_indexed_list)) {
  assign(name, sa_indexed_list[[name]])
}


# combine into one xts object
combined_sa_sn <- cbind(sa_eh12_sn_index, sa_cf10_sn_index, sa_tw2_sn_index)
colnames(combined_sa_sn) <- c("EH12", "CF10", "TW2")

# Plot with dygraph
dygraph(combined_sa_sn, main = "Indexed seasonally adjusted spend")

In [None]:
y_eh12_sn <- eh12_sn$final$series[,"y"]
y_cf10_sn <- cf10_sn$final$series[,"y"]
y_tw2_sn <- tw2_sn$final$series[,"y"]

data_list <- list(
  y_eh12_sn = y_eh12_sn,
  y_cf10_sn = y_cf10_sn,
  y_tw2_sn = y_tw2_sn

)

y_indexed_list <- list()

# Indexing the vals to first spend instance
for (name in names(data_list)) {
  first_val <- data_list[[name]][1]
  y_indexed_list[[paste0(name, "_index")]] <- (data_list[[name]] / first_val) * 100
}

# Pulling out the tables into separate variables
for (name in names(y_indexed_list)) {
  assign(name, y_indexed_list[[name]])
}


# combine into one xts object
combined_y_sn <- cbind(y_eh12_sn_index, y_cf10_sn_index, y_tw2_sn_index)
colnames(combined_y_sn) <- c("EH12", "CF10", "TW2")

# Plot with dygraph
dygraph(combined_y_sn, main = "Non-SA indexed spend")