## Log rank test

In [11]:
library(dplyr)
library(survival)

In [12]:
df1 <- read.csv("./data/df1.csv")
level_age_gp <- c("<55 years", "55-64 years", "65-74 years", ">=75 years")
level_ef_gp <- c("<40%", "40-49%", "50-74%", ">=75%")
level_ef_paper <- c("<=30%", "31-45%", ">45%")
level_sodium_gp <- c("<135mmol/L", "135-145mmol/L", ">145mmol/L")
level_platelets_gp <- c("Thrombocytopenia", "Normal", "Thrombocytosis")
level_platelets_paper <- c("<=Q1", "Q1-Q3", ">=Q3")

df1 <- df1 %>%
  mutate_at(vars(gender, smoking, diabetes, bp, anaemia), factor) %>%
  mutate(
    age_gp = factor(age_gp, levels = level_age_gp, ordered = TRUE),
    ef_gp = factor(ef_gp, levels = level_ef_gp, ordered = TRUE),
    ef_paper = factor(ef_paper, levels = level_ef_paper, ordered = TRUE),
    sodium_gp = factor(sodium_gp, levels = level_sodium_gp, ordered = TRUE),
    platelets_gp = factor(platelets_gp, levels = level_platelets_gp, ordered = TRUE),
    platelets_paper = factor(platelets_paper, levels = level_platelets_paper, ordered = TRUE),
    creatinine_excess = factor(creatinine_excess),
    creatinine_paper = factor(creatinine_paper),
    cpk_excess = factor(cpk_excess)
  )

In [9]:
variables <- c("gender", "smoking", "diabetes", "bp", "anaemia", "age_gp", "ef_gp", "sodium_gp", "creatinine_excess", "platelets_gp", "cpk_excess")

In [10]:
p_value <- NULL
for (var in variables) {
    lrt <- survdiff(as.formula(paste("Surv(time, event) ~", var)), na.omit(df1), subset = NULL, na.action=NULL)
    p_value <- c(p_value, lrt$pvalue)
}
data.frame(variable = variables, p_value = sprintf("%.4f", p_value)) %>%
  mutate(significant = (p_value < 0.05))

variable,p_value,significant
<chr>,<chr>,<lgl>
gender,0.8525,False
smoking,0.9741,False
diabetes,0.715,False
bp,0.0255,True
anaemia,0.0943,False
age_gp,0.0,True
ef_gp,0.0052,True
sodium_gp,0.0001,True
creatinine_excess,0.0,True
platelets_gp,0.7131,False


* Excess creatinine (p < 0.0001), Sodium (p = 0.0001),  Ejection fraction (p = 0.0052), and Blood pressure (p = 0.0255) show statistically significant results. These findings align with the trends observed in the Kaplan–Meier curves.
* Anaemia (p = 0.0943) and excess CPK levels (p = 0.3284) do not demonstrate statistical significance in the log-rank test although the Kaplan–Meier curves indicate they may still be important predictors.
* Age group are statistically significant (p < 0.0001), likely due to the notably lower survival probability among individuals aged 75 and above.