This script performs the seasonal adjustment for the time of spend article in R. It will save the seasonally adjusted data to your environment to use in python.

In [None]:
library(DBI)
library(tidyverse)
library(data.table)
library(bigrquery)
library(dplyr)
library(lubridate)
library(RJDemetra)
library(dygraphs)
library(readr)
library(ggdemetra)
library(ggplot2)
library(rjdqa)
library(conflicted)
library(lubridate)
library(xts)
library(rjdqa)

conflicts_prefer(dplyr::filter)
conflicts_prefer(dplyr::lag)
conflicts_prefer(ggdemetra::raw)

# Load data
con <- dbConnect(
    bigquery(),
    project = 'ons-fintrans-analysis-prod'
    )

In [None]:
# base spec for SA
base_spec <- RJDemetra::x13_spec(spec = "RSA4c",                                 # base model spec
                                 outlier.enabled = TRUE,                        # whether outliers are enabled - always true
                                 outlier.from = "2019-01-01",                   # first month in series
                                 outlier.cv = 4,                                # outlier critical value
                                 outlier.ao = TRUE,                             # additive outliers enabled?
                                 outlier.ls = TRUE,                             # level shift outliers enabled?
                                 outlier.tc = TRUE,                             # transitory change outliers enabled?
                                 outlier.so = FALSE,                            # seasonal outliers enabled?
                                 outlier.tcrate = 0.7,                          # base spec for rate of decay for tc outliers
                                 benchmarking.enabled = TRUE,                   # benchmarking constrains total aggregate of SA series based on target
                                 benchmarking.target = "Original",              # benchmarking constraint based on aggregate of raw data (over each year, Jan - Dec)
                                 benchmarking.rho = 1,                          # base specs for benchmarking
                                 benchmarking.lambda = 1)                       # base specs for benchmarking

In [None]:
# to test other spec:
#base_spec <- RJDemetra::x13_spec(spec = "RSA5") 

#RSA5 same

In [None]:
sql_all_spend_ret <- paste("SELECT time_period_value, cardholder_location, spend, transactions, cardholders
  FROM ons-fintrans-data-prod.fintrans_visa.retail_performance_high_streets_towns
  WHERE time_period = 'Month' AND 
  merchant_location = 'All' AND
  cardholder_location = 'All' AND
  mcg = 'All' AND
  time_period_value <= '202503'
  ORDER BY time_period_value, cardholder_location", sep = "")
all_spend_ret <- as.data.table(dbGetQuery(con, sql_all_spend_ret))

In [None]:
sql_all_spend_sml <- paste("SELECT time_period_value, merchant_location, spend, transactions, cardholders
  FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
  WHERE time_period = 'Month' AND 
  merchant_location_level = 'All' AND
  cardholder_issuing_level = 'All' AND
  mcg = 'All'  AND 
  time_period_value <= '202503'
  ORDER BY time_period_value, merchant_location", sep = "")
all_spend_sml <- as.data.table(dbGetQuery(con, sql_all_spend_sml))

In [None]:
conflicts_prefer(dplyr::first)
all_spend_ret <- all_spend_ret %>%
  mutate(idx_cards = cardholders / first(cardholders)) 

all_spend_sml <- all_spend_ret %>%
  mutate(idx_cards = cardholders / first(cardholders)) 

# TS

### High-level

In [None]:
# Load TS venue district data
sql_all_spend <- paste("SELECT *
FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
WHERE time_period = 'Month' AND
mcg = 'All' AND
mcc = 'All' AND
merchant_location_level = 'POSTAL_DISTRICT' AND
cardholder_issuing_level = 'All' AND
merchant_location IN ('EH12', 'CF10', 'HA9', 'L4') AND
time_period_value <= '202503'
ORDER BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders", sep = "")
all_ts <- as.data.table(dbGetQuery(con, sql_all_spend))

In [None]:
# Convert to time series object and assign spend as the variable
# need cf10, eh12, ha9, l4, and UK
eh12_df <- filter(all_ts, merchant_location == "EH12"  )
cf10_df <- filter(all_ts, merchant_location == "CF10" )
ha9_df <- filter(all_ts, merchant_location == "HA9" )
l4_df <- filter(all_ts, merchant_location == "L4" )

uk_df <- all_spend_sml

In [None]:
# NA for missing data in L4
l4_missing <- data.frame(time_period = 'Month',
                        time_period_value = '202303',
                        merchant_location_level = 'POSTAL_DISTRICT',
                        merchant_location = 'L4',
                        cardholder_issuing_level = 'All',
                        cardholder_issuing_country = 'All',
                        mcg = 'All',
                        mcc = 'All',
                        spend = NA,
                        transactions = NA,
                        cardholders = NA,
                        dist_merchants = NA,
                        pct_repeat_pan_cnt = NA
                        )
df3 <- rbind(l4_df, l4_missing)
l4_df <- df3[order(time_period_value),]

In [None]:
# Cardholder adjustment: adjusting spend values to numbers of UK cardholders

# join UK wide cardholder index data to locals
conflicts_prefer(dplyr::first)

eh12_df <- eh12_df %>% left_join(all_spend_sml %>% select(time_period_value, uk_index_cards = idx_cards), by = 'time_period_value')
cf10_df <- cf10_df %>% left_join(all_spend_sml %>% select(time_period_value, uk_index_cards = idx_cards), by = 'time_period_value')
ha9_df <- ha9_df %>% left_join(all_spend_sml %>% select(time_period_value, uk_index_cards = idx_cards), by = 'time_period_value')
l4_df <- l4_df %>% left_join(all_spend_sml %>% select(time_period_value, uk_index_cards = idx_cards), by = 'time_period_value')

# indexing the uk wide cardholders to first val
eh12_df <- eh12_df %>% mutate(adj_spend = spend / uk_index_cards)
cf10_df <- cf10_df %>% mutate(adj_spend = spend / uk_index_cards)
ha9_df <- ha9_df %>% mutate(adj_spend = spend / uk_index_cards)
l4_df <- l4_df %>% mutate(adj_spend = spend / uk_index_cards)
uk_df <- uk_df %>% mutate(adj_spend = spend / idx_cards)

eh12_tsi_uk <- eh12_df %>% ts(start=c(2019,01),frequency=12)
eh12_spend_tsi_uk  = eh12_tsi_uk [,"adj_spend"]
cf10_tsi_uk  <- cf10_df %>% ts(start=c(2019,01),frequency=12)
cf10_spend_tsi_uk  = cf10_tsi_uk [,"adj_spend"]
ha9_tsi_uk  <- ha9_df %>% ts(start=c(2019,01),frequency=12)
ha9_spend_tsi_uk  = ha9_tsi_uk [,"adj_spend"]
l4_tsi_uk  <- l4_df %>% ts(start=c(2019,01),frequency=12)
l4_spend_tsi_uk  = l4_tsi_uk [,"adj_spend"]
uk_tsi_uk  <- uk_df %>% ts(start=c(2019,01),frequency=12)
uk_spend_tsi_uk  = uk_tsi_uk [,"adj_spend"]

In [None]:
dates <- seq(from = as.Date("2019-01-01"), by = "month", length.out = nrow(eh12_df))
df <- data.frame(Date = dates, eh12_df)

write_csv(df, "eh12_uk.csv")

dates <- seq(from = as.Date("2019-01-01"), by = "month", length.out = nrow(cf10_df))
df <- data.frame(Date = dates, cf10_df)
write_csv(df, "cf10_uk.csv")

dates <- seq(from = as.Date("2019-01-01"), by = "month", length.out = nrow(ha9_df))
df <- data.frame(Date = dates, ha9_df)
write_csv(df, "ha9_uk.csv")

dates <- seq(from = as.Date("2019-01-01"), by = "month", length.out = nrow(l4_df))
df <- data.frame(Date = dates, l4_df)
write_csv(df, "l4_uk.csv")

dates <- seq(from = as.Date("2019-01-01"), by = "month", length.out = nrow(uk_df))
df <- data.frame(Date = dates, uk_df)
write_csv(df, "uk_uk.csv")

In [None]:
eh12_x13_model <- x13(eh12_spend_tsi_uk, spec = base_spec) 
cf10_x13_model <- x13(cf10_spend_tsi_uk, spec = base_spec) 
ha9_x13_model <- x13(ha9_spend_tsi_uk, spec = base_spec) 
l4_x13_model <- x13(l4_spend_tsi_uk, spec = base_spec) 
uk_x13_model <- x13(uk_spend_tsi_uk, spec = base_spec) 

In [None]:
# # Here we use RJDemetra to plot the components of the seasonal adjustment model.
# layout(matrix(1:6, 3, 2)); # create a matrix to plot the first six regarima components 
# plot(uk_x12_model$regarima)
# title("UK", outer = TRUE, line = -2, cex.main = 2)


In [None]:
dashboard_data <- simple_dashboard2(uk_x13_model)
plot(dashboard_data, main = "UK dashboard",
     subtitle = "SA with X13 RSA4c")

In [None]:
# Indexing spend to January 2019

# extract seasonally adjusted series
sa_eh12 <- eh12_x13_model$final$series[,"sa"]
sa_cf10 <- cf10_x13_model$final$series[,"sa"]
sa_ha9 <- ha9_x13_model$final$series[,"sa"]
sa_l4 <- l4_x13_model$final$series[,"sa"]
sa_uk <- uk_x13_model$final$series[,"sa"]



sa_data_list <- list(
  sa_eh12 = sa_eh12,
  sa_cf10 = sa_cf10,
  sa_ha9 = sa_ha9,
  sa_l4 = sa_l4,
  sa_uk = sa_uk
)

sa_indexed_list <- list()

# Indexing the vals to first spend instance
for (name in names(sa_data_list)) {
  first_val <- sa_data_list[[name]][1]
  sa_indexed_list[[paste0(name, "_index")]] <- (sa_data_list[[name]] / first_val) * 100
}

# Pulling out the tables into separate variables
for (name in names(sa_indexed_list)) {
  assign(name, sa_indexed_list[[name]])
}


# combine into one xts object
combined_sa <- cbind(sa_eh12_index, sa_cf10_index, sa_ha9_index, sa_l4_index, sa_uk_index)
colnames(combined_sa) <- c("EH12", "CF10", "HA9", "L4", "UK")

# Plot with dygraph
dygraph(combined_sa, main = "Indexed seasonally adjusted spend")


In [None]:
# Diagnostics

# Here we use RJDemetra to plot the components of the seasonal adjustment model.
layout(matrix(1:6, 3, 2)); # create a matrix to plot the first six regarima components 
plot(eh12_x13_model$regarima)

par(mfrow = c(1,1)) # ensures that the following plots don't follow the previous layout
plot(eh12_x13_model$regarima, which = 7) # plots linearised, calendar effects and outliers
plot(eh12_x13_model$decomposition) # To plot the S-I ratio
plot(eh12_x13_model, caption='Results', type_chart = "sa-trend")
plot(eh12_x13_model, type_chart = "cal-seas-irr")

In [None]:
sa_eh12 <- eh12_x13_model$final$series[,"sa"]
sa_cf10 <- cf10_x13_model$final$series[,"sa"]
sa_ha9 <- ha9_x13_model$final$series[,"sa"]
sa_l4 <- l4_x13_model$final$series[,"sa"]
sa_uk <- uk_x13_model$final$series[,"sa"]

In [None]:
# seasonal-irregular ratios

plot(eh12_x13_model$decomposition)
title("EH12", outer = TRUE, line = -2, cex.main = 1.5)


In [None]:
dygraph(uk_x13_model$final$series[,1:2], main = "HA9 : SA and Non-SA")

In [None]:
# Quarterly S-I graphs
eh_decomp <- eh12_x13_model$decomposition
si <- eh_decomp[["si_ratio"]]

In [None]:
library(lubridate)
library(tidyr)

si_df <- data.frame(date= as.Date(time(si)), si = as.numeric(si))

In [None]:
conflicts_prefer(lubridate::year)
conflicts_prefer(lubridate::quarter)

si_df <- si_df %>% mutate(year = year(date), quarter = quarter(date, with_year = FALSE, fiscal_start = 1))

In [None]:
# Gets the average seasonality
quarterly_summary <- si_df %>% group_by(quarter) %>% summarise(mean_si = mean(si, na.rm = TRUE))

In [None]:
library(ggplot2)

ggplot(si_df, aes(x= factor(quarter), y =si)) + geom_boxplot(fill = 'lightblue')

### International spend

In [None]:
# Load district data
sql_ts_int <- paste("SELECT *
  FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
  WHERE time_period = 'Month' AND 
  merchant_location_level = 'POSTAL_DISTRICT' AND
  cardholder_issuing_level = 'International' AND
  cardholder_issuing_country = 'All' AND
  mcg = 'All' AND
  merchant_location IN  ('EH12', 'CF10', 'HA9', 'L4') AND
  time_period_value <= '202503'
  ORDER BY time_period_value, merchant_location", sep = "")
int_ts <- as.data.table(dbGetQuery(con, sql_ts_int))

In [None]:
# Load UK data
sql_ts_int_uk <- paste("SELECT *
  FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
  WHERE time_period = 'Month' AND 
  merchant_location_level = 'All' AND
  cardholder_issuing_level = 'International' AND
  cardholder_issuing_country = 'All' AND
  mcg = 'All' AND
  time_period_value <= '202503'
  ORDER BY time_period_value, merchant_location", sep = "")
int_ts_uk <- as.data.table(dbGetQuery(con, sql_ts_int_uk))

int_ts_uk <- int_ts_uk %>%
  mutate(idx_cards = cardholders / first(cardholders)) 

In [None]:
# L4 missing 1 month: 202006

In [None]:
eh12_int <- filter(int_ts, merchant_location == "EH12"  )
cf10_int <- filter(int_ts, merchant_location == "CF10" )
ha9_int <- filter(int_ts, merchant_location == "HA9" )
l4_int <- filter(int_ts, merchant_location == "L4" )

uk_int <- int_ts_uk

In [None]:
# L4 missing 1 month: 202006
l4_missing_int <- data.frame(time_period = 'Month',
                        time_period_value = '202306',
                        merchant_location_level = 'POSTAL_DISTRICT',
                        merchant_location = 'L4',
                        cardholder_issuing_level = 'International',
                        cardholder_issuing_country = 'All',
                        mcg = 'All',
                        mcc = 'All',
                        spend = NA,
                        transactions = NA,
                        cardholders = NA,
                        dist_merchants = NA,
                        pct_repeat_pan_cnt = NA
                        )
df3 <- rbind(l4_int, l4_missing_int)
l4_int <- df3[order(time_period_value),]

In [None]:
# Cardholder adjustment: adjusting spend values to number of total international cardholders

# join UK wide cardholder data to locals
conflicts_prefer(dplyr::first)

eh12_int <- eh12_int %>% left_join(int_ts_uk %>% select(time_period_value, uk_index_cards = idx_cards), by = 'time_period_value')
cf10_int <- cf10_int %>% left_join(int_ts_uk %>% select(time_period_value, uk_index_cards = idx_cards), by = 'time_period_value')
ha9_int <- ha9_int %>% left_join(int_ts_uk %>% select(time_period_value, uk_index_cards = idx_cards), by = 'time_period_value')
l4_int <- l4_int %>% left_join(int_ts_uk %>% select(time_period_value, uk_index_cards = idx_cards), by = 'time_period_value')

# indexing the uk wide cardholders to first val
eh12_int <- eh12_int %>% mutate(adj_spend = spend / uk_index_cards)
cf10_int <- cf10_int %>% mutate(adj_spend = spend / uk_index_cards)
ha9_int <- ha9_int %>% mutate(adj_spend = spend / uk_index_cards)
l4_int <- l4_int %>% mutate(adj_spend = spend / uk_index_cards)

uk_int <- uk_int %>% mutate(index_cards_uk = cardholders / first(cardholders))
uk_int <- uk_int %>% mutate(adj_spend = spend / index_cards_uk)

eh12_tsi_uk <- eh12_int %>% ts(start=c(2019,01),frequency=12)
eh12_spend_tsi_uk  = eh12_tsi_uk [,"adj_spend"]
cf10_tsi_uk  <- cf10_int %>% ts(start=c(2019,01),frequency=12)
cf10_spend_tsi_uk  = cf10_tsi_uk [,"adj_spend"]
ha9_tsi_uk  <- ha9_int %>% ts(start=c(2019,01),frequency=12)
ha9_spend_tsi_uk  = ha9_tsi_uk [,"adj_spend"]
l4_tsi_uk  <- l4_int %>% ts(start=c(2019,01),frequency=12)
l4_spend_tsi_uk  = l4_tsi_uk [,"adj_spend"]
uk_tsi_uk  <- uk_int %>% ts(start=c(2019,01),frequency=12)
uk_spend_tsi_uk  = uk_tsi_uk [,"adj_spend"]

In [None]:
eh12_int_model <- x13(eh12_spend_tsi_uk, spec = base_spec) 
cf10_int_model <- x13(cf10_spend_tsi_uk, spec = base_spec) 
ha9_int_model <- x13(ha9_spend_tsi_uk, spec = base_spec) 
l4_int_model <- x13(l4_spend_tsi_uk, spec = base_spec) 
uk_int_model <- x13(uk_spend_tsi_uk, spec = base_spec) 

In [None]:
# # Here we use RJDemetra to plot the components of the seasonal adjustment model.
# layout(matrix(1:6, 3, 2)); # create a matrix to plot the first six regarima components 
# plot(eh12_snint_model$regarima)
# title("UK", outer = TRUE, line = -2, cex.main = 2)

In [None]:
# Indexing spend to January 2019

# extract seasonally adjusted series
sa_eh12_int <- eh12_int_model$final$series[,"sa"]
sa_cf10_int <- cf10_int_model$final$series[,"sa"]
sa_ha9_int <- ha9_int_model$final$series[,"sa"]
sa_l4_int <- l4_int_model$final$series[,"sa"]
sa_uk_int <- uk_int_model$final$series[,"sa"]



sa_data_list <- list(
  sa_eh12_int = sa_eh12_int,
  sa_cf10_int = sa_cf10_int,
  sa_ha9_int = sa_ha9_int,
  sa_l4_int = sa_l4_int,
  sa_uk_int = sa_uk_int
)

sa_indexed_list <- list()

# Indexing the vals to first spend instance
for (name in names(sa_data_list)) {
  first_val <- sa_data_list[[name]][1]
  sa_indexed_list[[paste0(name, "_index")]] <- (sa_data_list[[name]] / first_val) * 100
}

# Pulling out the tables into separate variables
for (name in names(sa_indexed_list)) {
  assign(name, sa_indexed_list[[name]])
}


# combine into one xts object
combined_sa_int <- cbind(sa_eh12_int_index, sa_cf10_int_index, sa_ha9_int_index, sa_l4_int_index, sa_uk_int_index)
colnames(combined_sa_int) <- c("EH12", "CF10", "HA9", "L4", "UK")

# Plot with dygraph
dygraph(combined_sa_int, main = "Indexed seasonally adjusted international spend")


In [None]:

dates <- seq(from = as.Date("2019-01-01"), by = "month", length.out = nrow(combined_sa_int))
df <- data.frame(Date = dates, combined_sa_int)

write_csv(df, "ts_international_spend.csv")

----------------------

----------------------

# SN

### High level

In [None]:
sql_full_sn <- paste("SELECT *
FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
WHERE time_period = 'Month' AND
mcg = 'All' AND
mcc = 'All' AND
merchant_location_level != 'All' AND
cardholder_issuing_level = 'All' AND
merchant_location_level = 'POSTAL_DISTRICT' AND
merchant_location IN ('EH12', 'CF10', 'TW2') AND
time_period_value < '202504'
ORDER BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders", sep = "")
sn_full <- as.data.table(dbGetQuery(con, sql_full_sn))

In [None]:
eh12_df <- filter(sn_full, merchant_location == "EH12" )
cf10_df <- filter(sn_full, merchant_location == "CF10" )
tw2_df <- filter(sn_full, merchant_location == "TW2" )

uk_df <- all_spend_sml

In [None]:
# NA for missing data in TW2
tw2_missing <- data.frame(time_period = 'Month',
                        time_period_value = c('202004', '202005', '202011', '202012', '202101', '202102'),
                        merchant_location_level = 'POSTAL_DISTRICT',
                        merchant_location = 'TW2',
                        cardholder_issuing_level = 'All',
                        cardholder_issuing_country = 'All',
                        mcg = 'All',
                        mcc = 'All',
                        spend = NA,
                        transactions = NA,
                        cardholders = NA,
                        dist_merchants = NA,
                        pct_repeat_pan_cnt = NA
                        )
df3 <- rbind(tw2_df, tw2_missing)
tw2_df <- df3[order(time_period_value),]

In [None]:
# join UK wide cardholder data to locals
conflicts_prefer(dplyr::first)

eh12_df <- eh12_df %>% left_join(all_spend_sml %>% select(time_period_value, uk_index_cards = idx_cards), by = 'time_period_value')
cf10_df <- cf10_df %>% left_join(all_spend_sml %>% select(time_period_value, uk_index_cards = idx_cards), by = 'time_period_value')
tw2_df <- tw2_df %>% left_join(all_spend_sml %>% select(time_period_value, uk_index_cards = idx_cards), by = 'time_period_value')

# indexing the uk wide cardholders to first val
eh12_df <- eh12_df %>% mutate(adj_spend = spend / uk_index_cards)
cf10_df <- cf10_df %>% mutate(adj_spend = spend / uk_index_cards)
tw2_df <- tw2_df %>% mutate(adj_spend = spend / uk_index_cards)

uk_df <- uk_df %>% mutate(adj_spend = spend / idx_cards)

eh12_tsi_uk <- eh12_df %>% ts(start=c(2019,01),frequency=12)
eh12_spend_tsi_uk  = eh12_tsi_uk [,"adj_spend"]
cf10_tsi_uk  <- cf10_df %>% ts(start=c(2019,01),frequency=12)
cf10_spend_tsi_uk  = cf10_tsi_uk [,"adj_spend"]
tw2_tsi_uk  <- tw2_df %>% ts(start=c(2019,01),frequency=12)
tw2_spend_tsi_uk  = tw2_tsi_uk [,"adj_spend"]
uk_tsi_uk  <- uk_df %>% ts(start=c(2019,01),frequency=12)
uk_spend_tsi_uk  = uk_tsi_uk [,"adj_spend"]

In [None]:
eh12_sn_model <- x13(eh12_spend_tsi_uk, spec = base_spec) 
cf10_sn_model <- x13(cf10_spend_tsi_uk, spec = base_spec) 
tw2_sn_model <- x13(tw2_spend_tsi_uk, spec = base_spec) 
uk_sn_model <- x13(uk_spend_tsi_uk, spec = base_spec) 

In [None]:
# # Here we use RJDemetra to plot the components of the seasonal adjustment model.
# layout(matrix(1:6, 3, 2)); # create a matrix to plot the first six regarima components 
# plot(cf10_sn_model$regarima)
# title("UK", outer = TRUE, line = -2, cex.main = 2)

In [None]:
# Indexing spend to January 2019

# extract seasonally adjusted series
sa_eh12_sn <- eh12_sn_model$final$series[,"sa"]
sa_cf10_sn <- cf10_sn_model$final$series[,"sa"]
sa_tw2_sn <- tw2_sn_model$final$series[,"sa"]
sa_uk_sn <- uk_sn_model$final$series[,"sa"]


sa_data_list <- list(
  sa_eh12_sn = sa_eh12_sn,
  sa_cf10_sn = sa_cf10_sn,
  sa_tw2_sn = sa_tw2_sn,
  sa_uk_sn = sa_uk_sn
)

sa_indexed_list <- list()

# Indexing the vals to first spend instance
for (name in names(sa_data_list)) {
  first_val <- sa_data_list[[name]][1]
  sa_indexed_list[[paste0(name, "_index")]] <- (sa_data_list[[name]] / first_val) * 100
}

# Pulling out the tables into separate variables
for (name in names(sa_indexed_list)) {
  assign(name, sa_indexed_list[[name]])
}


# combine into one xts object
combined_sa_sn <- cbind(sa_eh12_sn_index, sa_cf10_sn_index, sa_tw2_sn_index, sa_uk_sn_index)
colnames(combined_sa_sn) <- c("EH12", "CF10", "TW2","UK")

# Plot with dygraph
dygraph(combined_sa_sn, main = "Indexed seasonally adjusted spend")


In [None]:

dates <- seq(from = as.Date("2019-01-01"), by = "month", length.out = nrow(combined_sa_sn))
df <- data.frame(Date = dates, combined_sa_sn)

write_csv(df, "sn_high_level.csv")

### International spend

In [None]:
# load international country spend
sql_all_spend <- paste("SELECT *
FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
WHERE time_period = 'Month' AND
mcg = 'All' AND
mcc = 'All' AND
merchant_location_level = 'POSTAL_DISTRICT' AND
cardholder_issuing_level != 'All' AND
cardholder_issuing_country IN ('FRANCE', 'ITALY', 'REPUBLIC OF IRELAND') AND
merchant_location IN ('CF10') AND
time_period_value <= '202503'
ORDER BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders", sep = "")
sn_countries <- as.data.table(dbGetQuery(con, sql_all_spend))


france_df <- filter(sn_countries, cardholder_issuing_country == "FRANCE" )
italy_df <- filter(sn_countries, cardholder_issuing_country == "ITALY" )
ireland_df <- filter(sn_countries, cardholder_issuing_country == "REPUBLIC OF IRELAND" )

In [None]:
# Load UK data
sql_sn_int_uk <- paste("SELECT *
  FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
  WHERE time_period = 'Month' AND 
  merchant_location_level = 'All' AND
  cardholder_issuing_level = 'International' AND
  cardholder_issuing_country = 'All' AND
  mcg = 'All' AND
  time_period_value <= '202503'
  ORDER BY time_period_value, merchant_location", sep = "")


sn_highlevel <- as.data.table(dbGetQuery(con, sql_sn_int_uk))

sn_highlevel <- sn_highlevel %>%
  mutate(idx_cards = cardholders / first(cardholders)) 

In [None]:
# for full series up to Q1 2025 - 75 months
italy_missing <- data.frame(time_period = 'Month',
                        time_period_value = c('202004', '202005', '202006', '202011', '202012', '202104'),
                        merchant_location_level = 'POSTAL_DISTRICT',
                        merchant_location = 'CF10',
                        cardholder_issuing_level = 'International',
                        cardholder_issuing_country = 'ITALY',
                        mcg = 'All',
                        mcc = 'All',
                        spend = NA,
                        transactions = NA,
                        cardholders = NA,
                        dist_merchants = NA,
                        pct_repeat_pan_cnt = NA
                        )
df3 <- rbind(italy_df, italy_missing)
italy_df <- df3[order(time_period_value),]

ireland_missing <- data.frame(time_period = 'Month',
                        time_period_value = c('202004', '202103'),
                        merchant_location_level = 'POSTAL_DISTRICT',
                        merchant_location = 'CF10',
                        cardholder_issuing_level = 'International',
                        cardholder_issuing_country = 'REPUBLIC OF IRELAND',
                        mcg = 'All',
                        mcc = 'All',
                        spend = NA,
                        transactions = NA,
                        cardholders = NA,
                        dist_merchants = NA,
                        pct_repeat_pan_cnt = NA
                        )
df3 <- rbind(ireland_df, ireland_missing)
ireland_df <- df3[order(time_period_value),]


In [None]:
# join UK wide cardholder data to locals
conflicts_prefer(dplyr::first)

france_df <- france_df %>% left_join(sn_highlevel %>% select(time_period_value, uk_index_cards = idx_cards), by = 'time_period_value')
italy_df <- italy_df %>% left_join(sn_highlevel %>% select(time_period_value, uk_index_cards = idx_cards), by = 'time_period_value')
ireland_df <- ireland_df %>% left_join(sn_highlevel %>% select(time_period_value, uk_index_cards = idx_cards), by = 'time_period_value')

# indexing the uk wide cardholders to first val
france_df <- france_df %>% mutate(adj_spend = spend / uk_index_cards)

italy_df <- italy_df %>% mutate(adj_spend = spend / uk_index_cards)

ireland_df <- ireland_df %>% mutate(adj_spend = spend / uk_index_cards)

france_tsi_uk <- france_df %>% ts(start=c(2019,01),frequency=12)
france_spend_tsi_uk  = france_tsi_uk [,"adj_spend"]
italy_tsi_uk  <- italy_df %>% ts(start=c(2019,01),frequency=12)
italy_spend_tsi_uk  = italy_tsi_uk [,"adj_spend"]
ireland_tsi_uk  <- ireland_df %>% ts(start=c(2019,01),frequency=12)
ireland_spend_tsi_uk  = ireland_tsi_uk [,"adj_spend"]

In [None]:
france_model <- x13(france_spend_tsi_uk, spec = base_spec) 
ireland_model <- x13(ireland_spend_tsi_uk, spec = base_spec) 
italy_model <- x13(italy_spend_tsi_uk, spec = base_spec) 

In [None]:
# # Here we use RJDemetra to plot the components of the seasonal adjustment model.
# layout(matrix(1:6, 3, 2)); # create a matrix to plot the first six regarima components 
# plot(ireland_model$regarima)
# title("Ireland", outer = TRUE, line = -2, cex.main = 2)

In [None]:
dashboard_data <- simple_dashboard2(italy_model)
plot(dashboard_data, main = "France dashboard",
     subtitle = "SA with X13 RSA4c")

In [None]:
# Indexing spend to January 2019

# extract seasonally adjusted series
sa_france <- france_model$final$series[,"sa"]
sa_italy <- italy_model$final$series[,"sa"]
sa_ireland <- ireland_model$final$series[,"sa"]


sa_data_list <- list(
  sa_france = sa_france,
  sa_italy = sa_italy,
  sa_ireland = sa_ireland
)

sa_indexed_list <- list()

# Indexing the vals to first spend instance
for (name in names(sa_data_list)) {
  first_val <- sa_data_list[[name]][1]
  sa_indexed_list[[paste0(name, "_index")]] <- (sa_data_list[[name]] / first_val) * 100
}

# Pulling out the tables into separate variables
for (name in names(sa_indexed_list)) {
  assign(name, sa_indexed_list[[name]])
}


# combine into one xts object
combined_sa_int_sn <- cbind(sa_france_index, sa_italy_index, sa_ireland_index)
colnames(combined_sa_int_sn) <- c("France", "Italy", "Ireland")

# Plot with dygraph
dygraph(combined_sa_int_sn, main = "Indexed seasonally adjusted spend")

In [None]:

dates <- seq(from = as.Date("2019-01-01"), by = "month", length.out = nrow(combined_sa_int_sn))
df <- data.frame(Date = dates, combined_sa_int_sn)

write_csv(df, "sn_international_spend.csv")

In [None]:
dygraph(italy_model$final$series[,1:2], main = "Italy spend : SA and Non-SA")

In [None]:
# seasonal-irregular ratios

plot(ireland_model$decomposition)
title("Italy", outer = TRUE, line = -2, cex.main = 1.5)

-------------------------------

-------------------------------

## HH

### High-level

In [None]:
# Load HH venue district data
sql_all_spend <- paste("SELECT *
FROM ons-fintrans-data-prod.fintrans_visa.spend_merchant_location
WHERE time_period = 'Month' AND
mcg = 'All' AND
mcc = 'All' AND
merchant_location_level = 'POSTAL_DISTRICT' AND
cardholder_issuing_level = 'All' AND
merchant_location = 'LL65' AND
time_period_value <= '202503'
ORDER BY time_period_value, merchant_location_level, cardholder_issuing_country, merchant_location, mcg, mcc, spend, transactions, cardholders", sep = "")
ll65_df <- as.data.table(dbGetQuery(con, sql_all_spend))

In [None]:
uk_df <- all_spend_sml

In [None]:
ll65_df <- ll65_df %>% left_join(all_spend_sml %>% select(time_period_value, uk_index_cards = idx_cards), by = 'time_period_value')
ll65_df <- ll65_df %>% mutate(adj_spend = spend / uk_index_cards)
uk_df <- uk_df %>% mutate(adj_spend = spend / idx_cards)


In [None]:
ll65_tsi_uk <- ll65_df %>% ts(start=c(2019,01),frequency=12)
ll65_spend_tsi_uk  = ll65_tsi_uk[,"adj_spend"]

uk_tsi_uk  <- uk_df %>% ts(start=c(2019,01),frequency=12)
uk_spend_tsi_uk  = uk_tsi_uk [,"adj_spend"]

In [None]:
ll65_x13_model <- x13(ll65_spend_tsi_uk, spec = base_spec) 
uk_x13_model <- x13(uk_spend_tsi_uk, spec = base_spec) 

In [None]:
# Indexing spend to January 2019

# extract seasonally adjusted series
sa_ll65 <- ll65_x13_model$final$series[,"sa"]
sa_uk <- uk_x13_model$final$series[,"sa"]

sa_data_list <- list(
  sa_ll65 = sa_ll65,
  sa_uk = sa_uk
)

sa_indexed_list <- list()

# Indexing the vals to first spend instance
for (name in names(sa_data_list)) {
  first_val <- sa_data_list[[name]][1]
  sa_indexed_list[[paste0(name, "_index")]] <- (sa_data_list[[name]] / first_val) * 100
}

# Pulling out the tables into separate variables
for (name in names(sa_indexed_list)) {
  assign(name, sa_indexed_list[[name]])
}


# combine into one xts object
combined_sa <- cbind(sa_ll65_index, sa_uk_index)
colnames(combined_sa) <- c("LL65","UK")

# Plot with dygraph
dygraph(combined_sa, main = "Indexed seasonally adjusted spend")


In [None]:
dates <- seq(from = as.Date("2019-01-01"), by = "month", length.out = nrow(combined_sa_int_sn))
df <- data.frame(Date = dates, combined_sa)

write_csv(df, "hh_high_level.csv")