# Leader data analysis

In [1]:
## Import packages
library(glmnet)
library(zeallot) #enable %<-%
library(mvtnorm)
library(causl)
library(survivl)
library(npcausal)
library(dplyr)
library(haven)
library(survival)

Loading required package: Matrix

Loaded glmnet 4.1-8

Loading required package: rje

Loading required package: VineCopula


Attaching package: 'dplyr'


The following object is masked from 'package:rje':

    last


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union




In [2]:

## baseline ####
bsl_vars <- c("COUNTRY","SEX", "AGE", "RACE", "SMOKER", "DIABDUR", "BMIBL", "HBA1CBL",
              "HDL1BL", "LDL1BL", "CHOL1BL", "TRIG1BL", "CREATBL")

mh_vars <- c("MIFL","STROKEFL","STENFL", "NEPSCRFL", "KIDFL")

## longitudinal ####
lb_vars <- c("HBA1C", "EGFRCKD")
vs_vars <- "BMI"

## time to event ####
tte_vars <- c("ALDTHTM", "MACEEVTM", "MACEMITM")


# Load script that goes through both folders - credit to Jens
path = "../../../../../Project/LEADER/Box/"
folder = list(start = paste0(path, "Analysis Ready Datasets/SAS_analysis/"),
              fin = paste0(path, "Analysis Ready Datasets/R_analysis/"))

load_func <- function(ds, folder){
  if (file.exists(paste0(folder$fin, ds, ".rds"))){
    out <- readRDS(paste0(folder$fin, ds, ".rds"))
  } else {
    out <- haven::read_sas(paste0(folder$start, ds, ".sas7bdat"))
    saveRDS(out, paste0(folder$fin, ds, ".rds"))
  }
  return(out)
}


In [3]:
# Load in tables 
adsl <- load_func("adsl", folder) # demographics
advs <- load_func("advs", folder) # vitals
adtte <- load_func("adtte", folder) # primary outcomes
adlb <- load_func("adlb", folder) # labs; can be slow

In [4]:

df_bsl <- adsl %>%
  filter(FASFL == "Y") %>% 
  select(USUBJID, ARM, all_of(bsl_vars), all_of(mh_vars))

df_long <- rbind(
  adlb %>% 
    filter(FASFL == "Y") %>% 
    filter(PARAMCD %in% lb_vars) %>% 
    mutate(DS = "adlb") %>% 
    select(DS, USUBJID, ABLFL, AVISIT, AVISITN, ADY,
           PARAM, PARAMCD, AVALU, AVAL, CHG, PCHG, DTYPE),
  advs %>% 
    filter(FASFL == "Y") %>% 
    filter(PARAMCD %in% vs_vars) %>% 
    mutate(DS = "advs") %>% 
    select(DS, USUBJID, ABLFL, AVISIT, AVISITN, ADY,
           PARAM, PARAMCD, AVALU, AVAL, CHG, PCHG, DTYPE)
)

death <- adtte %>% 
  filter(FASFL == "Y") %>% 
  filter(PARAMCD == "ALDTHTM") %>% 
  mutate(death = ifelse(CNSR == 1, 0, 1))

df_out <- adtte %>% 
  filter(FASFL == "Y") %>% 
  filter(PARAMCD %in% tte_vars) %>% 
  mutate(event = ifelse(CNSR == 1, 0, 1)) %>% 
  select(USUBJID, PARAM, PARAMCD, AVAL, event) %>% 
  left_join(death %>% select(USUBJID, death)) %>% 
  mutate(death = ifelse(event == 1, 0, death))

out <- list(
  df_bsl = df_bsl,
  df_long = df_long, 
  df_out = df_out
)

[1m[22mJoining with `by = join_by(USUBJID)`


In [5]:
nrow(df_bsl)

In [6]:
df_bsl

USUBJID,ARM,COUNTRY,SEX,AGE,RACE,SMOKER,DIABDUR,BMIBL,HBA1CBL,HDL1BL,LDL1BL,CHOL1BL,TRIG1BL,CREATBL,MIFL,STROKEFL,STENFL,NEPSCRFL,KIDFL
<chr>,<chr>,<chr>,<chr>,<dbl>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>
EX2211-3748/5,Liraglutide,,F,62,WHITE,NEVER SMOKED,3.5,32.7,9.5,1.17,2.80,4.84,1.90,55,N,N,N,N,N
EX2211-3748/9,Placebo,,M,56,WHITE,CURRENT SMOKER,3.2,26.9,6.8,0.87,1.82,3.27,1.28,77,N,N,N,N,N
EX2211-3748/16,Liraglutide,,M,60,WHITE,PREVIOUS SMOKER,13.9,31.5,11.9,0.85,2.42,4.17,1.97,70,Y,N,N,N,N
EX2211-3748/20,Placebo,,F,59,WHITE,NEVER SMOKED,2.1,43.9,9.6,1.14,2.49,4.84,2.64,55,N,N,Y,N,N
EX2211-3748/62,Liraglutide,,M,71,BLACK OR AFRICAN AMERICAN,PREVIOUS SMOKER,11.5,28.4,8.5,1.06,2.02,4.53,3.14,81,N,N,N,N,N
EX2211-3748/65,Placebo,,M,56,OTHER,NEVER SMOKED,8.0,28.2,7.6,0.88,1.63,3.08,1.27,114,N,N,N,Y,N
EX2211-3748/71,Placebo,,M,60,BLACK OR AFRICAN AMERICAN,CURRENT SMOKER,19.3,25.1,8.0,1.04,2.03,4.49,3.13,80,N,N,N,Y,N
EX2211-3748/73,Placebo,,M,67,WHITE,PREVIOUS SMOKER,11.2,35.1,8.8,0.98,1.74,3.76,2.25,76,N,N,N,N,N
EX2211-3748/89,Placebo,,M,64,ASIAN,NEVER SMOKED,17.1,23.3,7.0,0.96,1.89,3.60,1.65,116,N,N,N,N,N
EX2211-3748/116,Liraglutide,,F,69,BLACK OR AFRICAN AMERICAN,PREVIOUS SMOKER,6.8,36.9,8.3,1.37,3.42,5.44,1.44,157,N,Y,N,Y,Y


In [7]:
df_long

DS,USUBJID,ABLFL,AVISIT,AVISITN,ADY,PARAM,PARAMCD,AVALU,AVAL,CHG,PCHG,DTYPE
<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>
adlb,EX2211-3748/5,,"VISIT 1, V10",10,-35,EGFR using CKD-EPI Method (ml/min/1.73m2),EGFRCKD,ml/min/1.73m2,97.7,,,
adlb,EX2211-3748/5,Y,"VISIT 3 (DAY 0), V30",30,2,EGFR using CKD-EPI Method (ml/min/1.73m2),EGFRCKD,ml/min/1.73m2,96.6,0.0,0.00,
adlb,EX2211-3748/5,,"VISIT 6 (MONTH 6), V60",60,181,EGFR using CKD-EPI Method (ml/min/1.73m2),EGFRCKD,ml/min/1.73m2,94.9,-1.7,-1.76,
adlb,EX2211-3748/5,,"VISIT 7 (MONTH 12), V70",70,371,EGFR using CKD-EPI Method (ml/min/1.73m2),EGFRCKD,ml/min/1.73m2,94.4,-2.2,-2.28,
adlb,EX2211-3748/5,,"VISIT 9 (MONTH 24), V90",90,727,EGFR using CKD-EPI Method (ml/min/1.73m2),EGFRCKD,ml/min/1.73m2,93.8,-2.8,-2.90,
adlb,EX2211-3748/5,,"VISIT 11 (MONTH 36), V110",110,1098,EGFR using CKD-EPI Method (ml/min/1.73m2),EGFRCKD,ml/min/1.73m2,94.4,-2.2,-2.28,
adlb,EX2211-3748/5,,"VISIT 13 (MONTH 48), V130",130,1098,EGFR using CKD-EPI Method (ml/min/1.73m2),EGFRCKD,ml/min/1.73m2,94.4,-2.2,-2.28,LOCF
adlb,EX2211-3748/5,,"VISIT 15 (MONTH 60), V150",150,1358,EGFR using CKD-EPI Method (ml/min/1.73m2),EGFRCKD,ml/min/1.73m2,92.8,-3.8,-3.93,
adlb,EX2211-3748/5,,"VISIT 1, V10",10,-35,Hemoglobin A1C (%),HBA1C,%,13.3,,,
adlb,EX2211-3748/5,Y,"VISIT 3 (DAY 0), V30",30,2,Hemoglobin A1C (%),HBA1C,%,9.5,0.0,0.00,


In [7]:
df_out

USUBJID,PARAM,PARAMCD,AVAL,event,death
<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>
EX2211-3748/5,Time (months) to All Death Event,ALDTHTM,48.32854,0,0
EX2211-3748/5,Time (Months) to MACE Event,MACEEVTM,48.32854,0,0
EX2211-3748/5,Time (Months) to Non-Fatal MI MACE Event,MACEMITM,48.32854,0,0
EX2211-3748/9,Time (months) to All Death Event,ALDTHTM,43.53183,0,0
EX2211-3748/9,Time (Months) to MACE Event,MACEEVTM,43.53183,0,0
EX2211-3748/9,Time (Months) to Non-Fatal MI MACE Event,MACEMITM,43.53183,0,0
EX2211-3748/16,Time (months) to All Death Event,ALDTHTM,43.26899,0,0
EX2211-3748/16,Time (Months) to MACE Event,MACEEVTM,28.25462,1,0
EX2211-3748/16,Time (Months) to Non-Fatal MI MACE Event,MACEMITM,28.25462,1,0
EX2211-3748/20,Time (months) to All Death Event,ALDTHTM,44.84600,0,0


In [9]:
library(dplyr)
library(tidyr)
library(purrr)
# Determine maximum follow-up time and number of intervals (each of 4 months)
max_time <- max(df_out$AVAL)
num_intervals <- ceiling(max_time / 4)

# Separate the outcomes:
# MACE outcome (use PARAMCD "MACEEVTM")
mace <- df_out %>% 
  filter(PARAMCD == "MACEEVTM") %>% 
  select(USUBJID, time = AVAL, event)

# Death outcome (use PARAMCD "ALDTHTM")
death <- df_out %>% 
  filter(PARAMCD == "ALDTHTM") %>% 
  select(USUBJID, time = AVAL, event = death)

# Non-fatal MI outcome (use PARAMCD "MACEMITM")
mi <- df_out %>% 
  filter(PARAMCD == "MACEMITM") %>% 
  select(USUBJID, time = AVAL, event)

# Create a wide table that has one row per subject and merge outcomes
surv_table <- df_out %>% 
  distinct(USUBJID) %>% 
  left_join(mace, by = "USUBJID") %>% 
  rename(time_mace = time, event_mace = event) %>%
  left_join(death, by = "USUBJID") %>% 
  rename(time_death = time, event_death = event) %>%
  left_join(mi, by = "USUBJID") %>% 
  rename(time_mi = time, event_mi = event)

# Helper function to create the interval vector:
# - time: the time to event (in months)
# - event: indicator (1 = event occurred, 0 = censored/no event)
# - num_intervals: total number of intervals
# - interval_length: length of each interval (4 months here)
create_interval_vector <- function(time, event, num_intervals, interval_length = 4) {
  res <- rep(0, num_intervals)
  if(event == 1) {
    # Determine which interval the event falls into
    event_interval <- ceiling(time / interval_length)
    if(event_interval > num_intervals) event_interval <- num_intervals
    res[event_interval] <- 1
    # Set subsequent intervals to NA once the event occurs
    if(event_interval < num_intervals) {
      res[(event_interval + 1):num_intervals] <- NA
    }
  }
  return(res)
}

# Apply the function to each outcome
surv_table <- surv_table %>%
  mutate(
    Y = map2(time_mace, event_mace, ~create_interval_vector(.x, .y, num_intervals)),
    D = map2(time_death, event_death, ~create_interval_vector(.x, .y, num_intervals)),
    I = map2(time_mi, event_mi, ~create_interval_vector(.x, .y, num_intervals))
  )

# Expand the list columns into separate columns for each interval:
for (i in 1:num_intervals) {
  surv_table[[paste0("Y_", i)]] <- map_dbl(surv_table$Y, ~.x[i])
  surv_table[[paste0("D_", i)]] <- map_dbl(surv_table$D, ~.x[i])
  surv_table[[paste0("I_", i)]] <- map_dbl(surv_table$I, ~.x[i])
}

# Select the final columns: one row per subject and columns for each interval and outcome
final_table <- surv_table %>% 
  select(USUBJID, starts_with("Y_"), starts_with("D_"), starts_with("I_"))

# View the final table
print(final_table)


Attaching package: 'tidyr'


The following objects are masked from 'package:Matrix':

    expand, pack, unpack




[90m# A tibble: 9,340 x 49[39m
   USUBJID       Y_1   Y_2   Y_3   Y_4   Y_5   Y_6   Y_7   Y_8   Y_9  Y_10  Y_11
   [3m[90m<chr>[39m[23m       [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m
[90m 1[39m EX2211-374~     0     0     0     0     0     0     0     0     0     0     0
[90m 2[39m EX2211-374~     0     0     0     0     0     0     0     0     0     0     0
[90m 3[39m EX2211-374~     0     0     0     0     0     0     0     1    [31mNA[39m    [31mNA[39m    [31mNA[39m
[90m 4[39m EX2211-374~     0     0     0     0     0     0     0     0     0     0     0
[90m 5[39m EX2211-374~     0     0     0     0     0     0     0     0     0     0     0
[90m 6[39m EX2211-374~     0     0     0     0     0     0     0     0     0     0     0


In [10]:
final_table

USUBJID,Y_1,Y_2,Y_3,Y_4,Y_5,Y_6,Y_7,Y_8,Y_9,...,I_7,I_8,I_9,I_10,I_11,I_12,I_13,I_14,I_15,I_16
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,...,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
EX2211-3748/5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
EX2211-3748/9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
EX2211-3748/16,0,0,0,0,0,0,0,1,,...,0,1,,,,,,,,
EX2211-3748/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
EX2211-3748/62,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
EX2211-3748/65,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
EX2211-3748/71,0,1,,,,,,,,...,0,0,0,0,0,0,0,0,0,0
EX2211-3748/73,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
EX2211-3748/89,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
EX2211-3748/116,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
