In [1]:
library(dlnm)
library(mgcv)
library(data.table)
library(plotly)
library(ggplot2)
library(splines)
library(parallel)
library(doParallel)
library(foreach)
library(coda)
library(lubridate)
library(xts)
library(timetk)
library(forecast)
library(gridExtra)
library(tidyr)
library(dplyr)
library(Hmisc)
library(xtable)
library(MuMIn)
library(dlnm) ; library(splines) ; library(MASS) ; library(tsModel)

# READ DATA
data <- read.csv("modelling_data/monthly_data.csv")

data <- data %>% dplyr:::select(
                                at_code,
                                events,
                                temp = max_temp,
                                hum = min_hum,
                                is_holiday,
                                month,
                                year,
                                pm10 = mean_PM10,
                                o3 = mean_O3,
                                no2 = mean_NO2,
                                so2 = mean_SO2,
                                co = mean_CO
                                )

# Filter data for AT01
data <- data[data$at_code == "AT01", ]

# Initialize an empty data frame to store results
results_df <- data.frame(
  model = character(),  # Initialize as character vector
  aic = numeric(),      # Initialize as numeric vector
  deviance = numeric(), # Initialize as numeric vector
  year_df = integer(),  # Initialize as integer vector for year degrees of freedom
  data_split = character(),  # Initialize as character vector for data split type
  spline_fun = character(),
  df = character()
)

# Define combinations of degrees of freedom and spline functions
dfs <- list(c(2, 2),c(3, 3), c(4, 4), c(5, 5), c(6, 6), c(7, 7)) # degrees of freedom combinations
spline_funs <- c("ns", "bs")           # spline functions
year_dfs <- c(1, 2, 3, 4, 5, 6, 7, 8)  # degrees of freedom combinations for year

# Define variable combinations for modeling
variable_sets <- list(
  c("hum"),
  c("hum", "temp"),
  c("hum", "temp", "pm10"),
  c("hum", "temp", "pm10", "o3"),
  c("hum", "temp", "pm10", "o3", "no2"),
  c("hum", "temp", "pm10", "no2"),
  c("hum", "temp", "pm10", "is_holiday", "no2"),
  c("hum", "temp", "pm10", "so2", "no2"),
  c("hum", "temp", "so2", "no2"),
  c("hum", "temp", "no2"),
  c("hum", "temp", "so2"),
  c("hum", "temp", "o3"),
  c("hum", "temp", "co"),
  c("temp"),
  c("pm10"),
  c("hum", "temp", "pm10", "o3", "no2", "so2"),
  c("hum", "temp", "pm10", "o3", "no2", "so2", "co")
)

# Function to create crossbasis object based on spline function and dfs
create_crossbasis <- function(var, spline_fun, dfs) {
  crossbasis(data[[var]], lag=12, 
             argvar=list(fun=spline_fun, df=dfs[1]), 
             arglag=list(fun=spline_fun, df=dfs[2]))
}

# Iterate over all combinations of spline functions, dfs, variable sets, and year dfs
for (spline_fun in spline_funs) {
  for (df_year in year_dfs) {
    for (df_other in dfs) {
      for (var_set in variable_sets) {
        
        # Fit the model including is_holiday
        model_formula_with_holiday <- as.formula(paste("events ~ ns(year, df=", df_year, ") + is_holiday +", paste(var_set, collapse = " + ")))
        model_with_holiday <- glm(model_formula_with_holiday, data = data, na.action = na.exclude, family = poisson)
        
        # Calculate deviance of residuals for model with is_holiday
        deviance_resid_with_holiday <- model_with_holiday$deviance
        
        # Store the results directly in results_df
        results_df <- rbind(results_df, data.frame(
          model = paste(spline_fun, df_year, paste(df_other, collapse = "_"), paste(var_set, collapse = "_"), "with_holiday", sep="_"),
          aic = AIC(model_with_holiday),
          deviance = deviance_resid_with_holiday,
          year_df = df_year,
          has_holiday = TRUE,
          spline_fun = spline_fun,
          df = df_other
        ))
        
        # Fit the model without is_holiday
        model_formula_without_holiday <- as.formula(paste("events ~ ns(year, df=", df_year, ") +", paste(var_set, collapse = " + ")))
        model_without_holiday <- glm(model_formula_without_holiday, data = data, na.action = na.exclude, family = poisson)
        
        # Calculate deviance of residuals for model without is_holiday
        deviance_resid_without_holiday <- model_without_holiday$deviance
        
        # Store the results directly in results_df
        results_df <- rbind(results_df, data.frame(
          model = paste(spline_fun, df_year, paste(df_other, collapse = "_"), paste(var_set, collapse = "_"), "without_holiday", sep="_"),
          aic = AIC(model_without_holiday),
          deviance = deviance_resid_without_holiday,
          year_df = df_year,
          has_holiday = FALSE,
          spline_fun = spline_fun,
          df = df_other
        ))
      }
    }
  }
}

results_df_AT01_mix <- results_df %>% arrange(aic, deviance)
results_df_AT01_mix['data_split'] <- 'mix'
# READ DATA
data <- read.csv("modelling_data/monthly_data.csv")

data <- data %>% dplyr:::mutate(
  mean_temp = (max_temp + min_temp) / 2,
  mean_hum = (max_hum + min_hum) / 2
        ) %>% dplyr:::select(
                              at_code,
                              events,
                              temp = mean_temp,
                              hum = mean_hum,
                              is_holiday,
                              month,
                              year,
                              pm10 = mean_PM10,
                              o3 = mean_O3,
                              no2 = mean_NO2,
                              so2 = mean_SO2,
                              co = mean_CO
                              )
                              
# Filter data for AT01
data <- data[data$at_code == "AT01", ]

# Initialize an empty data frame to store results
results_df <- data.frame(
  model = character(),  # Initialize as character vector
  aic = numeric(),      # Initialize as numeric vector
  deviance = numeric(), # Initialize as numeric vector
  year_df = integer(),  # Initialize as integer vector for year degrees of freedom
  data_split = character(),  # Initialize as character vector for data split type
  spline_fun = character(),
  df = character()
)

# Define combinations of degrees of freedom and spline functions
dfs <- list(c(2, 2),c(3, 3), c(4, 4), c(5, 5), c(6, 6), c(7, 7)) # degrees of freedom combinations
spline_funs <- c("ns", "bs")           # spline functions
year_dfs <- c(1, 2, 3, 4, 5, 6, 7, 8)  # degrees of freedom combinations for year

# Define variable combinations for modeling
variable_sets <- list(
  c("hum"),
  c("hum", "temp"),
  c("hum", "temp", "pm10"),
  c("hum", "temp", "pm10", "o3"),
  c("hum", "temp", "pm10", "o3", "no2"),
  c("hum", "temp", "pm10", "no2"),
  c("hum", "temp", "pm10", "is_holiday", "no2"),
  c("hum", "temp", "pm10", "so2", "no2"),
  c("hum", "temp", "so2", "no2"),
  c("hum", "temp", "no2"),
  c("hum", "temp", "so2"),
  c("hum", "temp", "o3"),
  c("hum", "temp", "co"),
  c("temp"),
  c("pm10"),
  c("hum", "temp", "pm10", "o3", "no2", "so2"),
  c("hum", "temp", "pm10", "o3", "no2", "so2", "co")
)

# Function to create crossbasis object based on spline function and dfs
create_crossbasis <- function(var, spline_fun, dfs) {
  crossbasis(data[[var]], lag=12, 
             argvar=list(fun=spline_fun, df=dfs[1]), 
             arglag=list(fun=spline_fun, df=dfs[2]))
}

# Iterate over all combinations of spline functions, dfs, variable sets, and year dfs
for (spline_fun in spline_funs) {
  for (df_year in year_dfs) {
    for (df_other in dfs) {
      for (var_set in variable_sets) {
        
        # Fit the model including is_holiday
        model_formula_with_holiday <- as.formula(paste("events ~ ns(year, df=", df_year, ") + is_holiday +", paste(var_set, collapse = " + ")))
        model_with_holiday <- glm(model_formula_with_holiday, data = data, na.action = na.exclude, family = poisson)
        
        # Calculate deviance of residuals for model with is_holiday
        deviance_resid_with_holiday <- model_with_holiday$deviance
        
        # Store the results directly in results_df
        results_df <- rbind(results_df, data.frame(
          model = paste(spline_fun, df_year, paste(df_other, collapse = "_"), paste(var_set, collapse = "_"), "with_holiday", sep="_"),
          aic = AIC(model_with_holiday),
          deviance = deviance_resid_with_holiday,
          year_df = df_year,
          has_holiday = TRUE,
          spline_fun = spline_fun,
          df = df_other
        ))
        
        # Fit the model without is_holiday
        model_formula_without_holiday <- as.formula(paste("events ~ ns(year, df=", df_year, ") +", paste(var_set, collapse = " + ")))
        model_without_holiday <- glm(model_formula_without_holiday, data = data, na.action = na.exclude, family = poisson)
        
        # Calculate deviance of residuals for model without is_holiday
        deviance_resid_without_holiday <- model_without_holiday$deviance
        
        # Store the results directly in results_df
        results_df <- rbind(results_df, data.frame(
          model = paste(spline_fun, df_year, paste(df_other, collapse = "_"), paste(var_set, collapse = "_"), "without_holiday", sep="_"),
          aic = AIC(model_without_holiday),
          deviance = deviance_resid_without_holiday,
          year_df = df_year,
          has_holiday = FALSE,
          spline_fun = spline_fun,
          df = df_other
        ))
      }
    }
  }
}

results_df_AT01_mean <- results_df %>% arrange(aic, deviance)
results_df_AT01_mean['data_split'] <- 'mean'
# READ DATA
data <- read.csv("modelling_data/monthly_data.csv")

data <- data %>% dplyr:::select(
                                at_code,
                                events,
                                temp = min_temp,
                                hum = min_hum,
                                is_holiday,
                                month,
                                year,
                                pm10 = min_PM10,
                                o3 = min_O3,
                                no2 = min_NO2,
                                so2 = min_SO2,
                                co = min_CO
                                )

# Filter data for AT01
data <- data[data$at_code == "AT01", ]

# Initialize an empty data frame to store results
results_df <- data.frame(
  model = character(),  # Initialize as character vector
  aic = numeric(),      # Initialize as numeric vector
  deviance = numeric(), # Initialize as numeric vector
  year_df = integer(),  # Initialize as integer vector for year degrees of freedom
  data_split = character(),  # Initialize as character vector for data split type
  spline_fun = character(),
  df = character()
)

# Define combinations of degrees of freedom and spline functions
dfs <- list(c(2, 2),c(3, 3), c(4, 4), c(5, 5), c(6, 6), c(7, 7)) # degrees of freedom combinations
spline_funs <- c("ns", "bs")           # spline functions
year_dfs <- c(1, 2, 3, 4, 5, 6, 7, 8)  # degrees of freedom combinations for year

# Define variable combinations for modeling
variable_sets <- list(
  c("hum"),
  c("hum", "temp"),
  c("hum", "temp", "pm10"),
  c("hum", "temp", "pm10", "o3"),
  c("hum", "temp", "pm10", "o3", "no2"),
  c("hum", "temp", "pm10", "no2"),
  c("hum", "temp", "pm10", "is_holiday", "no2"),
  c("hum", "temp", "pm10", "so2", "no2"),
  c("hum", "temp", "so2", "no2"),
  c("hum", "temp", "no2"),
  c("hum", "temp", "so2"),
  c("hum", "temp", "o3"),
  c("hum", "temp", "co"),
  c("temp"),
  c("pm10"),
  c("hum", "temp", "pm10", "o3", "no2", "so2"),
  c("hum", "temp", "pm10", "o3", "no2", "so2", "co")
)

# Function to create crossbasis object based on spline function and dfs
create_crossbasis <- function(var, spline_fun, dfs) {
  crossbasis(data[[var]], lag=12, 
             argvar=list(fun=spline_fun, df=dfs[1]), 
             arglag=list(fun=spline_fun, df=dfs[2]))
}

# Iterate over all combinations of spline functions, dfs, variable sets, and year dfs
for (spline_fun in spline_funs) {
  for (df_year in year_dfs) {
    for (df_other in dfs) {
      for (var_set in variable_sets) {
        
        # Fit the model including is_holiday
        model_formula_with_holiday <- as.formula(paste("events ~ ns(year, df=", df_year, ") + is_holiday +", paste(var_set, collapse = " + ")))
        model_with_holiday <- glm(model_formula_with_holiday, data = data, na.action = na.exclude, family = poisson)
        
        # Calculate deviance of residuals for model with is_holiday
        deviance_resid_with_holiday <- model_with_holiday$deviance
        
        # Store the results directly in results_df
        results_df <- rbind(results_df, data.frame(
          model = paste(spline_fun, df_year, paste(df_other, collapse = "_"), paste(var_set, collapse = "_"), "with_holiday", sep="_"),
          aic = AIC(model_with_holiday),
          deviance = deviance_resid_with_holiday,
          year_df = df_year,
          has_holiday = TRUE,
          spline_fun = spline_fun,
          df = df_other
        ))
        
        # Fit the model without is_holiday
        model_formula_without_holiday <- as.formula(paste("events ~ ns(year, df=", df_year, ") +", paste(var_set, collapse = " + ")))
        model_without_holiday <- glm(model_formula_without_holiday, data = data, na.action = na.exclude, family = poisson)
        
        # Calculate deviance of residuals for model without is_holiday
        deviance_resid_without_holiday <- model_without_holiday$deviance
        
        # Store the results directly in results_df
        results_df <- rbind(results_df, data.frame(
          model = paste(spline_fun, df_year, paste(df_other, collapse = "_"), paste(var_set, collapse = "_"), "without_holiday", sep="_"),
          aic = AIC(model_without_holiday),
          deviance = deviance_resid_without_holiday,
          year_df = df_year,
          has_holiday = FALSE,
          spline_fun = spline_fun,
          df = df_other
        ))
      }
    }
  }
}

results_df_AT01_min <- results_df %>% arrange(aic, deviance)
results_df_AT01_min['data_split'] <- 'min'
# READ DATA
data <- read.csv("modelling_data/monthly_data.csv")

data <- data %>% dplyr:::select(
                                at_code,
                                events,
                                temp = max_temp,
                                hum = max_hum,
                                is_holiday,
                                month,
                                year,
                                pm10 = max_PM10,
                                o3 = max_O3,
                                no2 = max_NO2,
                                so2 = max_SO2,
                                co = max_CO
                                )

# Filter data for AT01
data <- data[data$at_code == "AT01", ]

# Initialize an empty data frame to store results
results_df <- data.frame(
  model = character(),  # Initialize as character vector
  aic = numeric(),      # Initialize as numeric vector
  deviance = numeric(), # Initialize as numeric vector
  year_df = integer(),  # Initialize as integer vector for year degrees of freedom
  data_split = character(),  # Initialize as character vector for data split type
  spline_fun = character(),
  df = character()
)

# Define combinations of degrees of freedom and spline functions
dfs <- list(c(2, 2),c(3, 3), c(4, 4), c(5, 5), c(6, 6), c(7, 7)) # degrees of freedom combinations
spline_funs <- c("ns", "bs")           # spline functions
year_dfs <- c(1, 2, 3, 4, 5, 6, 7, 8)  # degrees of freedom combinations for year

# Define variable combinations for modeling
variable_sets <- list(
  c("hum"),
  c("hum", "temp"),
  c("hum", "temp", "pm10"),
  c("hum", "temp", "pm10", "o3"),
  c("hum", "temp", "pm10", "o3", "no2"),
  c("hum", "temp", "pm10", "no2"),
  c("hum", "temp", "pm10", "is_holiday", "no2"),
  c("hum", "temp", "pm10", "so2", "no2"),
  c("hum", "temp", "so2", "no2"),
  c("hum", "temp", "no2"),
  c("hum", "temp", "so2"),
  c("hum", "temp", "o3"),
  c("hum", "temp", "co"),
  c("temp"),
  c("pm10"),
  c("hum", "temp", "pm10", "o3", "no2", "so2"),
  c("hum", "temp", "pm10", "o3", "no2", "so2", "co")
)

# Function to create crossbasis object based on spline function and dfs
create_crossbasis <- function(var, spline_fun, dfs) {
  crossbasis(data[[var]], lag=12, 
             argvar=list(fun=spline_fun, df=dfs[1]), 
             arglag=list(fun=spline_fun, df=dfs[2]))
}

# Iterate over all combinations of spline functions, dfs, variable sets, and year dfs
for (spline_fun in spline_funs) {
  for (df_year in year_dfs) {
    for (df_other in dfs) {
      for (var_set in variable_sets) {
        
        # Fit the model including is_holiday
        model_formula_with_holiday <- as.formula(paste("events ~ ns(year, df=", df_year, ") + is_holiday +", paste(var_set, collapse = " + ")))
        model_with_holiday <- glm(model_formula_with_holiday, data = data, na.action = na.exclude, family = poisson)
        
        # Calculate deviance of residuals for model with is_holiday
        deviance_resid_with_holiday <- model_with_holiday$deviance
        
        # Store the results directly in results_df
        results_df <- rbind(results_df, data.frame(
          model = paste(spline_fun, df_year, paste(df_other, collapse = "_"), paste(var_set, collapse = "_"), "with_holiday", sep="_"),
          aic = AIC(model_with_holiday),
          deviance = deviance_resid_with_holiday,
          year_df = df_year,
          has_holiday = TRUE,
          spline_fun = spline_fun,
          df = df_other
        ))
        
        # Fit the model without is_holiday
        model_formula_without_holiday <- as.formula(paste("events ~ ns(year, df=", df_year, ") +", paste(var_set, collapse = " + ")))
        model_without_holiday <- glm(model_formula_without_holiday, data = data, na.action = na.exclude, family = poisson)
        
        # Calculate deviance of residuals for model without is_holiday
        deviance_resid_without_holiday <- model_without_holiday$deviance
        
        # Store the results directly in results_df
        results_df <- rbind(results_df, data.frame(
          model = paste(spline_fun, df_year, paste(df_other, collapse = "_"), paste(var_set, collapse = "_"), "without_holiday", sep="_"),
          aic = AIC(model_without_holiday),
          deviance = deviance_resid_without_holiday,
          year_df = df_year,
          has_holiday = FALSE,
          spline_fun = spline_fun,
          df = df_other
        ))
      }
    }
  }
}

results_df_AT01_max <- results_df %>% arrange(aic, deviance)
results_df_AT01_max['data_split'] <- 'max'


combined_df <- rbind(results_df_AT01_max, results_df_AT01_min, results_df_AT01_mean, results_df_AT01_mix)
combined_df <- combined_df %>% arrange(aic, deviance)

# Save the data frame to a CSV file
write.csv(combined_df, "dlnm_tuning/AT01_dlnm_model_selection.csv", row.names = FALSE)

This is dlnm 2.4.7. For details: help(dlnm) and vignette('dlnmOverview').

Loading required package: nlme

This is mgcv 1.9-1. For overview type 'help("mgcv-package")'.

Loading required package: ggplot2


Attaching package: 'plotly'


The following object is masked from 'package:ggplot2':

    last_plot


The following object is masked from 'package:stats':

    filter


The following object is masked from 'package:graphics':

    layout


Loading required package: foreach

Loading required package: iterators


Attaching package: 'lubridate'


The following objects are masked from 'package:data.table':

    hour, isoweek, mday, minute, month, quarter, second, wday, week,
    yday, year


The following objects are masked from 'package:base':

    date, intersect, setdiff, union


Loading required package: zoo


Attaching package: 'zoo'


The following objects are masked from 'package:base':

    as.Date, as.Date.numeric



Attaching package: 'xts'


The following objects are masked fro