In [184]:
# VDJ chord plot

library(tidyverse)
library(circlize)

chord_plot <- function(df, title, col1=NA, col2=NA, default_col=NA, min_anno = 2) {
    df_summary <- df %>% 
      select(c(id, v_gene_H, v_gene_L)) %>%
      count(v_gene_H, v_gene_L) %>%
      rename(from = v_gene_L, to = v_gene_H, value = n)

    df_summary$from <- str_replace_all(df_summary$from, "IGLV", "VL")
    df_summary$from <- str_replace_all(df_summary$from, "IGKV", "VK")
    
    df_summary$to <- str_replace_all(df_summary$to, "IGHV", "VH")
    
    names(col1) <- str_replace_all(names(col1), "IGHV", "VH")
    names(col2) <- str_replace_all(names(col2), "IGKV", "VK")
    names(col2) <- str_replace_all(names(col2), "IGLV", "VL")
    
    v_gene_L_order <- df_summary %>%
      group_by(from) %>%
      summarize(total = sum(value)) %>%
      arrange(total) %>%
      pull(from)

    v_gene_H_order <- df_summary %>%
      group_by(to) %>%
      summarize(total = sum(value)) %>%
      arrange(desc(total)) %>%
      pull(to)

    order <- c(v_gene_L_order, v_gene_H_order)

    colors <- c(col1[v_gene_H_order], col2[v_gene_L_order])
    names(colors) <- c(v_gene_H_order,v_gene_L_order)
    colors[is.na(colors)] <- default_col
    
    circos.clear()
    
    chordDiagram(
      df_summary,
      order = order,  # Sort the sectors
      directional = TRUE, # If you want to show directionality
      annotationTrack = "grid", # Add grid annotation
      transparency = 0.5, # Set transparency of the chords
      preAllocateTracks = list(track.height = 0.1), # Allocate space for labels
      grid.col = colors %>% na.omit()
      # grid.col = rep(c("red", "blue"), c(length(v_gene_H_order), length(v_gene_L_order))) # Different colors for H and L
    )

    filtered_labels_L <- df_summary %>%
      group_by(from) %>%
      summarize(total = sum(value)) %>%
      filter(total >= min_anno) %>%
      pull(from)
    
    filtered_labels_H <- df_summary %>%
      group_by(to) %>%
      summarize(total = sum(value)) %>%
      filter(total >= min_anno) %>%
      pull(to)

    # Add labels conditionally
    circos.trackPlotRegion(
      track.index = 1,
      panel.fun = function(x, y) {
        sector.name <- get.cell.meta.data("sector.index")
        if (sector.name %in% c(filtered_labels_H, filtered_labels_L)) {
          circos.text(CELL_META$xcenter, CELL_META$ylim[1] + mm_y(5), adj=0.5,
                      sector.name, facing = "clockwise", niceFacing = TRUE)
        }
      },
      bg.border = NA
    )

    title(main = title)
    circos.clear()
}

set.seed(42)

In [185]:
# specific F3

sources <- c("XBB infection", "XBB BTI", "BA.5 + XBB infection", "BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 + JN.1 infection", "BA.5 BTI + JN.1 infection")
# sources <- c("BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 BTI + JN.1 infection")
df <- read.csv("../data/DMS/antibody/_clustering.csv") %>% filter(v_gene_H != "" & v_gene_L != "" & source %in% sources)

dev.off()
pdf(paste0("../plots/Figure5_related/F3_VDJ.pdf"), width=5, height=5)

colors <- c("VH2-5"='#AA2055',"VH5-51"="#7193E2","VH4-59"="#FFCC90", "VH3-30"="#cbaf70",
           "VK3-15"='#FF5733', "VK1-39"="#2E86C1", "VK1-33"="#A3E4D7", "VL1-40"="#F1C40F")

df_m <- df %>% filter(source %in% c("BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection") & new_group == "F3")
if (nrow(df_m) == 0) next
chord_plot(df_m, title=paste0("BA.5 BTI + XBB/HK.3 F3 (n = ", nrow(df_m), ")"), col1=colors, col2=colors, default_col="#cccccc",min_anno = 3)

for (src in sources) {
    df_m <- df %>% filter(source == src & new_group == "F3")
    if (nrow(df_m) == 0) next
    chord_plot(df_m, title=paste0(src, ' F3 (n = ', nrow(df_m), ')'), col1=colors, col2=colors, default_col="#cccccc",min_anno = 3)
}
dev.off()


In [186]:

df <- read.csv("../data/DMS/antibody/_clustering.csv") %>% filter(v_gene_H != "" & v_gene_L != "")

sources <- c("BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 BTI + JN.1 infection")
groups <- unique(df$new_group)

dev.off()

for (group in groups) {
    pdf(paste0("../plots/Figure4_related/VDJ_by_group/VDJ_chord_plots","_group_",str_replace(group,"/","_"),".pdf"))
    # all mAbs
    
    df_m <- df %>% filter(new_group == group)
    if (nrow(df_m) == 0) next
    chord_plot(df_m, title=paste0("All ",group, " (n = ", nrow(df_m), ")"))
    
    for (src in sources) {
        df_m <- df %>% filter(source == src & new_group == group)
        if (nrow(df_m) == 0) next
        chord_plot(df_m, title=paste0(src, ' ', group, ' (n = ', nrow(df_m), ')'))
    }
    dev.off()
}


In [187]:
# cross vs specific

library(ggplot2)
library(tidyverse)
library(ggsignif)
library(ggpubr)

colors <- c("cross"="#BC3C29", "specific"="#0072B5")
sources <- c("XBB infection", "XBB BTI", "BA.5 + XBB infection", "BA.5 + JN.1 infection", "BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 BTI + JN.1 infection")
df <- read.csv("../data/_mAb_info_clean.csv") %>% filter(source %in% sources & paper_reactivity != "none" & paper_reactivity != '')
df$source <- factor(df$source, levels=sources)
df_summary <- df %>%
  group_by(source, paper_reactivity) %>%
  summarise(count = n()) %>%
  mutate(percentage = count / sum(count) * 100)

df_summary$paper_reactivity <- factor(df_summary$paper_reactivity, levels=c("specific","cross"))

dev.off()
pdf("../plots/Figure3_related/cross_specific.pdf", width=4.5, height=3.3)

ggplot(df_summary, aes(fill = paper_reactivity, color = paper_reactivity, y = percentage, x = source)) + 
  geom_bar(position = "fill", stat = "identity", alpha=0.5, width=.8) + 
  scale_y_continuous(labels = scales::percent, expand = c(0,0)) + 
  scale_fill_manual(values=colors)+
  scale_color_manual(values=colors)+
  labs(y = "Percentage", x = "Source") +
  geom_text(data = df_summary%>%filter(paper_reactivity == 'cross'), aes(label = paste0(round(percentage), "%"), y = 0.2), color = "black")+
  geom_text(data = df_summary%>%filter(paper_reactivity == 'specific'), aes(label = paste0(round(percentage), "%"), y = 0.8), color = "black")+
  theme_classic()+theme(axis.text.x = element_text(angle=45, hjust=1),
      axis.title = element_blank())

dev.off()

df_summary$paper_reactivity <- factor(df_summary$paper_reactivity, levels=c("cross","specific"))
pdf("../plots/Figure3_related/SHM_HL.pdf", width=8, height=3.2)

dodge = 0.75

ggplot(df, aes(x = source, y = v_domain_shm_ratio_H, fill = paper_reactivity, color = paper_reactivity)) +
  geom_violin(trim = FALSE, alpha=0.5,scale = "width", position = position_dodge(dodge), width=0.8) +
  geom_boxplot(width = 0.2, position = position_dodge(dodge), fill='white') +
  # scale_fill_manual(values = c("red", "blue")) +
  labs(y = "Heavy chain V-domain SHM rate", x = "Source") +
  theme_classic() +  scale_fill_manual(values=colors)+
  scale_color_manual(values=colors)+scale_y_continuous(limits=c(-0.02,0.22), expand=c(0,0,0,0.06))+
  stat_compare_means(method = "wilcox.test", aes(group = paper_reactivity), label = "p.format") +
  geom_text(data = df_summary, aes(label = count, y = 0.2), 
            position = position_dodge(dodge), vjust = 1.5, color = "black")+
  theme(
      axis.text.x = element_text(angle=45, hjust=1),
      axis.ticks.x = element_blank(),
      axis.title.x = element_blank(),
  )
ggplot(df, aes(x = source, y = v_domain_shm_ratio_L, fill = paper_reactivity, color = paper_reactivity)) +
  geom_violin(trim = FALSE, alpha=0.5,scale = "width", position = position_dodge(dodge), width=0.8) +
  geom_boxplot(width = 0.2, position = position_dodge(dodge), fill='white') +
  # scale_fill_manual(values = c("red", "blue")) +
  labs(y = "Light chain V-domain SHM rate", x = "Source") +
  theme_classic() +  scale_fill_manual(values=colors)+
  scale_color_manual(values=colors)+scale_y_continuous(limits=c(-0.02,0.22), expand=c(0,0,0,0.06))+
  stat_compare_means(method = "wilcox.test", aes(group = paper_reactivity), label = "p.format") +
  geom_text(data = df_summary, aes(label = count, y = 0.2), 
            position = position_dodge(dodge), vjust = 1.5, color = "black")+
  theme(
      axis.text.x = element_text(angle=45, hjust=1),
      axis.ticks.x = element_blank(),
      axis.title.x = element_blank(),
  )

dev.off()

[1m[22m`summarise()` has grouped output by 'source'. You can override using the `.groups` argument.


“[1m[22mRemoved 9 rows containing missing values or values outside the scale range (`geom_violin()`).”


In [188]:
wilcox.test(
    (df%>%filter(source=="BA.5 BTI + XBB infection"&paper_reactivity=="cross"))$v_domain_shm_ratio_H,
    (df%>%filter(source=="BA.5 BTI + HK.3 infection"&paper_reactivity=="cross"))$v_domain_shm_ratio_H
)$p.value

wilcox.test(
    (df%>%filter(source=="BA.5 BTI + JN.1 infection"&paper_reactivity=="cross"))$v_domain_shm_ratio_H,
    (df%>%filter(source=="BA.5 BTI + HK.3 infection"&paper_reactivity=="cross"))$v_domain_shm_ratio_H
)$p.value

wilcox.test(
    (df%>%filter(source=="BA.5 BTI + XBB infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_H,
    (df%>%filter(source=="BA.5 BTI + HK.3 infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_H
)$p.value

wilcox.test(
    (df%>%filter(source=="BA.5 BTI + JN.1 infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_H,
    (df%>%filter(source=="BA.5 BTI + HK.3 infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_H
)$p.value


wilcox.test(
    (df%>%filter(source=="BA.5 BTI + JN.1 infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_H,
    (df%>%filter(source=="BA.5 BTI + XBB infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_H
)$p.value


wilcox.test(
    (df%>%filter(source=="XBB infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_H,
    (df%>%filter(source=="BA.5 + XBB infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_H
)$p.value

wilcox.test(
    (df%>%filter(source=="XBB infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_H,
    (df%>%filter(source=="BA.5 + JN.1 infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_H
)$p.value

wilcox.test(
    (df%>%filter(source=="BA.5 + JN.1 infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_H,
    (df%>%filter(source=="BA.5 + XBB infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_H
)$p.value

In [189]:
wilcox.test(
    (df%>%filter(source=="BA.5 BTI + XBB infection"&paper_reactivity=="cross"))$v_domain_shm_ratio_L,
    (df%>%filter(source=="BA.5 BTI + HK.3 infection"&paper_reactivity=="cross"))$v_domain_shm_ratio_L
)$p.value

wilcox.test(
    (df%>%filter(source=="BA.5 BTI + JN.1 infection"&paper_reactivity=="cross"))$v_domain_shm_ratio_L,
    (df%>%filter(source=="BA.5 BTI + HK.3 infection"&paper_reactivity=="cross"))$v_domain_shm_ratio_L
)$p.value

wilcox.test(
    (df%>%filter(source=="BA.5 BTI + XBB infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_L,
    (df%>%filter(source=="BA.5 BTI + HK.3 infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_L
)$p.value

wilcox.test(
    (df%>%filter(source=="BA.5 BTI + JN.1 infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_L,
    (df%>%filter(source=="BA.5 BTI + HK.3 infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_L
)$p.value


wilcox.test(
    (df%>%filter(source=="BA.5 BTI + JN.1 infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_L,
    (df%>%filter(source=="BA.5 BTI + XBB infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_L
)$p.value


wilcox.test(
    (df%>%filter(source=="XBB infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_L,
    (df%>%filter(source=="BA.5 + XBB infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_L
)$p.value

wilcox.test(
    (df%>%filter(source=="XBB infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_L,
    (df%>%filter(source=="BA.5 + JN.1 infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_L
)$p.value

wilcox.test(
    (df%>%filter(source=="BA.5 + JN.1 infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_L,
    (df%>%filter(source=="BA.5 + XBB infection"&paper_reactivity=="specific"))$v_domain_shm_ratio_L
)$p.value

In [190]:
# cross-specific donuts

colors <- c("cross"="#BC3C29", "specific"="#0072B5")
sources <- c("XBB infection", "XBB BTI", "BA.5 + XBB infection", "BA.5 + JN.1 infection", "BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 BTI + JN.1 infection")
data <- read.csv("../data/DMS/antibody/_clustering.csv") %>% filter(source %in% sources & paper_reactivity != "none" & paper_reactivity != '')

dev.off()
pdf("../plots/Figure3_related/cross_specific_by_group_donuts.pdf", width=4, height=6)
# Summarize the data
summarized_data <- data %>%
  group_by(new_group, paper_reactivity) %>%
  summarize(count = n()) %>%
  ungroup() %>%
  group_by(new_group) %>%
  mutate(percentage = count / sum(count) * 100)

# Function to create a donut plot for each group
create_donut_plot <- function(group_data) {
  ggplot(group_data, aes(x = 2, y = percentage, fill = paper_reactivity)) +
    geom_bar(stat = "identity", width = 1, color = "black") +
    coord_polar(theta = "y", start = 0) + scale_fill_manual(values=colors)+
    geom_text(aes(label = paste0(round(percentage, 1), "%")), 
              position = position_stack(vjust = 0.5)) +
    xlim(0.5, 2.5) +
    theme_void() +
    theme(legend.position = "none") +
    ggtitle(group_data$new_group[1]) +
    theme(plot.title = element_text(hjust = 0.5, vjust = -10, size = 16))
}

# Create donut plots for each group
donut_plots <- summarized_data %>%
  group_split(new_group) %>%
  lapply(create_donut_plot)

# Annotate the center of each donut with the new_group name
for (i in seq_along(donut_plots)) {
  donut_plots[[i]] <- donut_plots[[i]] +
    annotate("text", x = 0, y = 0, label = levels(summarized_data$new_group)[i], size = 5, hjust = 0.5)
}

# Combine all donut plots into one figure
combined_plot <- ggarrange(plotlist = donut_plots, ncol = 3, nrow = 4)

# Display the combined plot
print(combined_plot)
dev.off()

[1m[22m`summarise()` has grouped output by 'new_group'. You can override using the `.groups` argument.
“[1m[22m... is ignored in group_split(<grouped_df>), please use group_by(..., .add = TRUE) %>% group_split()”


In [191]:
# group SHM for KP3 IC50 < 0.05
library(ggplot2)
library(tidyverse)
library(ggsignif)
library(ggpubr)

use_groups <- c("A1","F3")
colors <- c("neut"="#BC3C29", "escape"="#0072B5")
sources <- c("BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 BTI + JN.1 infection")

v_genes <- list("A1"=c("IGHV3-53","IGHV3-66"), "F3"=c("IGHV2-5"))

comparisons <- list(c("neut", "escape"))

dev.off()

for (use_group in use_groups) {
    df <- read.csv("../data/DMS/antibody/_clustering.csv") %>% filter(source %in% sources & (v_gene_H %in% v_genes[[use_group]]))
    df <- rbind(
            df %>% filter(KP3_IC50 < 0.05 & new_group == use_group) %>% mutate(paper_reactivity = "neut"), 
            df %>% filter(KP3_IC50 > 0.05 & new_group == use_group) %>% mutate(paper_reactivity = "escape"))

    df$source <- factor(df$source, levels=sources)
    df$paper_reactivity <- factor(df$paper_reactivity, levels=names(colors))
    df_summary <- df %>%
      group_by(paper_reactivity) %>%
      summarise(count = n(), max_h=max(v_domain_shm_ratio_H), max_l=max(v_domain_shm_ratio_L)) 

    pdf(paste0("../plots/Figure4_related/SHM_KP3_",use_group,".pdf"), width=3.5, height=3)
    print(ggplot(df, aes(x = paper_reactivity, y = v_domain_shm_ratio_H, fill = paper_reactivity, color = paper_reactivity)) +
      geom_violin(trim = FALSE, alpha=0.5,scale = "width", width=0.8) +
      geom_boxplot(width = 0.2, fill='white') +
      labs(y = "Heavy chain V-domain SHM rate", x = "reactivity") +
      theme_classic() +  scale_fill_manual(values=colors)+
      scale_color_manual(values=colors)+scale_y_continuous(expand=c(0,0,0,0.06))+
      stat_compare_means(comparisons = comparisons, method = "wilcox.test", label = "p.signif", label.y = max(df_summary$max_h)+0.05, tip.length = 0) +
      geom_text(data = df_summary, aes(label = count), y = max(df_summary$max_h)+0.03, color = "black")+
      theme(
          axis.text.x = element_text(angle=45, hjust=1),
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
      ))
    print(ggplot(df, aes(x = paper_reactivity, y = v_domain_shm_ratio_L, fill = paper_reactivity, color = paper_reactivity)) +
      geom_violin(trim = FALSE, alpha=0.5,scale = "width", width=0.8) +
      geom_boxplot(width = 0.2, fill='white') +
      labs(y = "Light chain V-domain SHM rate", x = "reactivity") +
      theme_classic() +  scale_fill_manual(values=colors)+
      scale_color_manual(values=colors)+scale_y_continuous(expand=c(0,0,0,0.06))+
      stat_compare_means(comparisons = comparisons, method = "wilcox.test", label = "p.signif", label.y = max(df_summary$max_h)+0.05, tip.length = 0) +
      geom_text(data = df_summary, aes(label = count), y = max(df_summary$max_l)+0.03, color = "black")+
      theme(
          axis.text.x = element_text(angle=45, hjust=1),
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
      ))
}
dev.off()

In [192]:
# group SHM for A1 sources
library(ggplot2)
library(tidyverse)
library(ggsignif)
library(ggpubr)

sources <- c("XBB infection", "XBB BTI", "BA.5 + XBB infection", "BA.5 + JN.1 infection", "BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 BTI + JN.1 infection")
df <- read.csv("../data/DMS/antibody/_clustering.csv") %>% filter(source %in% sources & paper_reactivity == "cross" & new_group == 'A1')
colors <- c("#ABABCC", "#C06679", "#2789A1","#BE502D", "#4D8947", "#CCA645")

my_comp <- list(c("BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection"),c("BA.5 BTI + JN.1 infection", "BA.5 BTI + HK.3 infection"), c("BA.5 BTI + XBB infection", "BA.5 BTI + JN.1 infection"))

dev.off()

df$source <- factor(df$source, levels=sources)
df_summary <- df %>%
  group_by(source) %>%
  summarise(count = n(), medianH=median(v_domain_shm_ratio_H), medianL=median(v_domain_shm_ratio_L)) 

pdf(paste0("../plots/Figure4_related/SHM_HL_A1_all_src.pdf"), width=5.5, height=3)
ggplot(df, aes(x = source, y = v_domain_shm_ratio_H, fill = source, color = source)) +
  geom_violin(trim = FALSE, alpha=0.5,scale = "width", width=0.8) +
  geom_boxplot(width = 0.2, fill='white') +
  labs(y = "Heavy chain V-domain SHM rate", x = "reactivity") +
  theme_classic() +  scale_fill_manual(values=colors)+
  scale_color_manual(values=colors)+scale_y_continuous(expand=c(0,0,0,0.06))+
  stat_compare_means(comparisons = my_comp, method = "wilcox.test", label = "p.signif", label.y = 0.22, tip.length = 0, step.increase = 0.15) +
  geom_text(data = df_summary, aes(label = paste0("n=",count), y = 0.2), color = "black")+
  geom_text(data = df_summary, aes(label = formatC(medianH, digits = 3, format="f"), y = 0.17), color = "black")+
  theme(
      axis.text.x = element_text(angle=45, hjust=1),
      axis.ticks.x = element_blank(),
      axis.title.x = element_blank(),
  )
ggplot(df, aes(x = source, y = v_domain_shm_ratio_L, fill = source, color = source)) +
  geom_violin(trim = FALSE, alpha=0.5,scale = "width", width=0.8) +
  geom_boxplot(width = 0.2, fill='white') +
  labs(y = "Light chain V-domain SHM rate", x = "reactivity") +
  theme_classic() +  scale_fill_manual(values=colors)+
  scale_color_manual(values=colors)+scale_y_continuous(expand=c(0,0,0,0.06))+
  stat_compare_means(comparisons = my_comp, method = "wilcox.test", label = "p.signif", label.y = 0.22, tip.length = 0, step.increase = 0.15) +
  geom_text(data = df_summary, aes(label = paste0("n=",count), y = 0.2), color = "black")+
  geom_text(data = df_summary, aes(label = formatC(medianL, digits = 3, format="f"), y = 0.17), color = "black")+
  theme(
      axis.text.x = element_text(angle=45, hjust=1),
      axis.ticks.x = element_blank(),
      axis.title.x = element_blank(),
  )
dev.off()

“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”


In [193]:
# group SHM for A1 all sources
library(ggplot2)
library(tidyverse)
library(ggsignif)
library(ggpubr)

sources <- c("BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 BTI + JN.1 infection")
df <- read.csv("../data/DMS/antibody/_clustering.csv") %>% filter(source %in% sources & paper_reactivity == "cross" & new_group == 'A1')
colors <- c("#BE502D", "#4D8947", "#CCA645")

my_comp <- list(c("BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection"),c("BA.5 BTI + JN.1 infection", "BA.5 BTI + HK.3 infection"), c("BA.5 BTI + XBB infection", "BA.5 BTI + JN.1 infection"))

dev.off()

df$source <- factor(df$source, levels=sources)
df_summary <- df %>%
  group_by(source) %>%
  summarise(count = n(), medianH=median(v_domain_shm_ratio_H), medianL=median(v_domain_shm_ratio_L)) 

pdf(paste0("../plots/Figure4_related/SHM_HL_A1.pdf"), width=4, height=3)
ggplot(df, aes(x = source, y = v_domain_shm_ratio_H, fill = source, color = source)) +
  geom_violin(trim = FALSE, alpha=0.5,scale = "width", width=0.8) +
  geom_boxplot(width = 0.2, fill='white') +
  labs(y = "Heavy chain V-domain SHM rate", x = "reactivity") +
  theme_classic() +  scale_fill_manual(values=colors)+
  scale_color_manual(values=colors)+scale_y_continuous(expand=c(0,0,0,0.06))+
  stat_compare_means(comparisons = my_comp, method = "wilcox.test", label = "p.signif", label.y = 0.22, tip.length = 0, step.increase = 0.15) +
  geom_text(data = df_summary, aes(label = paste0("n=",count), y = 0.2), color = "black")+
  geom_text(data = df_summary, aes(label = formatC(medianH, digits = 3, format="f"), y = 0.17), color = "black")+
  theme(
      axis.text.x = element_text(angle=45, hjust=1),
      axis.ticks.x = element_blank(),
      axis.title.x = element_blank(),
  )
ggplot(df, aes(x = source, y = v_domain_shm_ratio_L, fill = source, color = source)) +
  geom_violin(trim = FALSE, alpha=0.5,scale = "width", width=0.8) +
  geom_boxplot(width = 0.2, fill='white') +
  labs(y = "Light chain V-domain SHM rate", x = "reactivity") +
  theme_classic() +  scale_fill_manual(values=colors)+
  scale_color_manual(values=colors)+scale_y_continuous(expand=c(0,0,0,0.06))+
  stat_compare_means(comparisons = my_comp, method = "wilcox.test", label = "p.signif", label.y = 0.22, tip.length = 0, step.increase = 0.15) +
  geom_text(data = df_summary, aes(label = paste0("n=",count), y = 0.2), color = "black")+
  geom_text(data = df_summary, aes(label = formatC(medianL, digits = 3, format="f"), y = 0.17), color = "black")+
  theme(
      axis.text.x = element_text(angle=45, hjust=1),
      axis.ticks.x = element_blank(),
      axis.title.x = element_blank(),
  )
dev.off()

“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”


In [194]:
# B/D3 SHM
library(ggplot2)
library(tidyverse)
library(ggsignif)
library(ggpubr)

use_groups <- c("B","D3")
colors <- c("cross"="#BC3C29", "group_cross"="#BC3C29", "group_specific"="#0072B5", "specific"="#0072B5")
sources <- c("BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 BTI + JN.1 infection")
data <- read.csv("../data/DMS/antibody/_clustering.csv") %>% filter(source %in% sources)

comparisons <- list(c("cross", "group_cross"),c("group_cross", "group_specific"),c("specific", "group_specific"))

dev.off()

for (use_group in use_groups) {
    df <- rbind(data,
            data %>% filter(paper_reactivity == "cross" & new_group == use_group) %>% mutate(paper_reactivity = "group_cross"), 
            data %>% filter(paper_reactivity == "specific" & new_group == use_group) %>% mutate(paper_reactivity = "group_specific"))

    df$source <- factor(df$source, levels=sources)
    df_summary <- df %>%
      group_by(paper_reactivity) %>%
      summarise(count = n()) 
    df_summary$paper_reactivity <- factor(df_summary$paper_reactivity, levels=names(colors))
    pdf(paste0("../plots/Figure4_related/SHM_H_",use_group,".pdf"), width=3.5, height=3)
    print(ggplot(df, aes(x = paper_reactivity, y = v_domain_shm_ratio_H, fill = paper_reactivity, color = paper_reactivity)) +
      # geom_violin(trim = FALSE, alpha=0.5,scale = "width", width=0.8) +
      geom_boxplot(width = 0.6, fill='white') +
      labs(y = "Heavy chain V-domain SHM rate", x = "reactivity") +
      theme_classic() +  scale_fill_manual(values=colors)+
      scale_color_manual(values=colors)+scale_y_continuous(expand=c(0,0,0,0.06))+
      stat_compare_means(comparisons = comparisons, method = "wilcox.test", label = "p.signif", tip.length = 0) +
      geom_text(data = df_summary, aes(label = count, y = 0.2), 
                vjust = 1.5, color = "black")+
      theme(
          axis.text.x = element_text(angle=45, hjust=1),
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
      ))
    print(ggplot(df, aes(x = paper_reactivity, y = v_domain_shm_ratio_L, fill = paper_reactivity, color = paper_reactivity)) +
      # geom_violin(trim = FALSE, alpha=0.5,scale = "width", width=0.8) +
      geom_boxplot(width = 0.6, fill='white') +
      labs(y = "Light chain V-domain SHM rate", x = "reactivity") +
      theme_classic() +  scale_fill_manual(values=colors)+
      scale_color_manual(values=colors)+scale_y_continuous(expand=c(0,0,0,0.06))+
      stat_compare_means(comparisons = comparisons, method = "wilcox.test", label = "p.signif", tip.length = 0) +
      geom_text(data = df_summary, aes(label = count, y = 0.2), 
                vjust = 1.5, color = "black")+
      theme(
          axis.text.x = element_text(angle=45, hjust=1),
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
      ))
}
dev.off()

df <- rbind(data,
        data %>% filter(paper_reactivity == "cross" & new_group %in% use_groups) %>% mutate(paper_reactivity = "group_cross"), 
        data %>% filter(paper_reactivity == "specific" & new_group %in% use_groups) %>% mutate(paper_reactivity = "group_specific"))

df$source <- factor(df$source, levels=sources)
df_summary <- df %>%
  group_by(paper_reactivity) %>%
  summarise(count = n()) 
df_summary$paper_reactivity <- factor(df_summary$paper_reactivity, levels=names(colors))
pdf(paste0("../plots/Figure4_related/SHM_H_B+D3.pdf"), width=3.5, height=3)
print(ggplot(df, aes(x = paper_reactivity, y = v_domain_shm_ratio_H, fill = paper_reactivity, color = paper_reactivity)) +
  geom_violin(trim = FALSE, alpha=0.5,scale = "width", width=0.8) +
  geom_boxplot(width = 0.2, fill='white') +
  labs(y = "Heavy chain V-domain SHM rate", x = "reactivity") +
  theme_classic() +  scale_fill_manual(values=colors)+
  scale_color_manual(values=colors)+scale_y_continuous(expand=c(0,0,0,0.06))+
  stat_compare_means(comparisons = comparisons, method = "wilcox.test", label = "p.signif", tip.length = 0) +
  geom_text(data = df_summary, aes(label = count, y = 0.2), 
            vjust = 1.5, color = "black")+
  theme(
      axis.text.x = element_text(angle=45, hjust=1),
      axis.ticks.x = element_blank(),
      axis.title.x = element_blank(),
  ))
print(ggplot(df, aes(x = paper_reactivity, y = v_domain_shm_ratio_L, fill = paper_reactivity, color = paper_reactivity)) +
  geom_violin(trim = FALSE, alpha=0.5,scale = "width", width=0.8) +
  geom_boxplot(width = 0.2, fill='white') +
  labs(y = "Light chain V-domain SHM rate", x = "reactivity") +
  theme_classic() +  scale_fill_manual(values=colors)+
  scale_color_manual(values=colors)+scale_y_continuous(expand=c(0,0,0,0.06))+
  stat_compare_means(comparisons = comparisons, method = "wilcox.test", label = "p.signif", tip.length = 0) +
  geom_text(data = df_summary, aes(label = count, y = 0.2), 
            vjust = 1.5, color = "black")+
  theme(
      axis.text.x = element_text(angle=45, hjust=1),
      axis.ticks.x = element_blank(),
      axis.title.x = element_blank(),
  ))
dev.off()

In [195]:
# JN.1 effective VDJ

df <- read.csv("../data/DMS/antibody/_clustering.csv") %>% filter(v_gene_H != "" & v_gene_L != "") %>% filter(source %in% c("XBB infection", "XBB BTI", "BA.5 + XBB infection", "BA.5 + JN.1 infection", "BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 BTI + JN.1 infection"))


df <- rbind(
    df %>% filter(JN1_IC50 < 1) %>% mutate(eff = "effective"),
    df %>% filter(JN1_IC50 > 1) %>% mutate(eff = "escaped")
)
sources <- c("BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 BTI + JN.1 infection")
groups <- unique(df$new_group)

dev.off()

for (group in groups) {
    
    for (use_eff in c("effective", "escaped")) {
        pdf(paste0("../plots/Figure4_related/VDJ_by_group_JN1/VDJ_chord_plots","_group_",str_replace(group,"/","_"),'_',use_eff,".pdf"))
        # all mAbs

        df_m <- df %>% filter(new_group == group & eff == use_eff)
        if (nrow(df_m) == 0) next
        chord_plot(df_m, title=paste0("All ",group, " (n = ", nrow(df_m), ")"))

        for (src in sources) {
            df_m <- df %>% filter(source == src & new_group == group & eff == use_eff)
            if (nrow(df_m) == 0) next
            chord_plot(df_m, title=paste0(src, ' ', group, ' (n = ', nrow(df_m), ')'))
        }
        dev.off()
    }
}


In [196]:
# JN.1 effective VDJ

df <- read.csv("../data/DMS/antibody/_clustering.csv") %>% filter(v_gene_H != "" & v_gene_L != "") %>% filter(source %in% c("XBB infection", "XBB BTI", "BA.5 + XBB infection", "BA.5 + JN.1 infection", "BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 BTI + JN.1 infection"))


df <- rbind(
    df %>% filter(KP3_IC50 < 1) %>% mutate(eff = "effective"),
    df %>% filter(KP3_IC50 > 1) %>% mutate(eff = "escaped")
)
sources <- c("BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 BTI + JN.1 infection")
groups <- unique(df$new_group)

dev.off()

for (group in groups) {
    
    for (use_eff in c("effective", "escaped")) {
        pdf(paste0("../plots/Figure4_related/VDJ_by_group_KP3/VDJ_chord_plots","_group_",str_replace(group,"/","_"),'_',use_eff,".pdf"))
        # all mAbs

        df_m <- df %>% filter(new_group == group & eff == use_eff)
        if (nrow(df_m) == 0) next
        chord_plot(df_m, title=paste0("All ",group, " (n = ", nrow(df_m), ")"))

        for (src in sources) {
            df_m <- df %>% filter(source == src & new_group == group & eff == use_eff)
            if (nrow(df_m) == 0) next
            chord_plot(df_m, title=paste0(src, ' ', group, ' (n = ', nrow(df_m), ')'))
        }
        dev.off()
    }
}


In [197]:
# cross-specific VDJ
# add two groups: merge BTI & merge non-BTI

sources <- c("XBB infection", "XBB BTI", "BA.5 + XBB infection", "BA.5 + JN.1 infection", "BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 BTI + JN.1 infection")
src2g <- c(
    "XBB infection"="non-BTI", 
    "XBB BTI"="BTI", 
    "BA.5 + XBB infection"="non-BTI", 
    "BA.5 + JN.1 infection"="non-BTI", 
    "BA.5 BTI + XBB infection"="BTI", 
    "BA.5 BTI + HK.3 infection"="BTI", 
    "BA.5 BTI + JN.1 infection"="BTI")
df <- read.csv("../data/DMS/antibody/_clustering.csv") %>% filter(v_gene_H != "" & v_gene_L != "") %>% filter(source %in% sources)
groups <- unique(df$new_group)
df$sg <- src2g[df$source]

dev.off()

for (group in groups) {
    for (use_eff in c("cross", "specific")) {
        pdf(paste0("../plots/Figure4_related/VDJ_by_group_cross_specific/VDJ_chord_plots","_group_",str_replace(group,"/","_"),'_',use_eff,".pdf"))
        # all mAbs

        df_m <- df %>% filter(new_group == group & paper_reactivity == use_eff)
        if (nrow(df_m) == 0) next
        chord_plot(df_m, title=paste0("All ",group, " (n = ", nrow(df_m), ")"))

        for (src in c("BTI", "non-BTI")) {
            df_m <- df %>% filter(sg == src & new_group == group & paper_reactivity == use_eff)
            if (nrow(df_m) == 0) next
            chord_plot(df_m, title=paste0(src, ' ', group, ' (n = ', nrow(df_m), ')'), min_anno = 1)
        }
        for (src in sources) {
            df_m <- df %>% filter(source == src & new_group == group & paper_reactivity == use_eff)
            if (nrow(df_m) == 0) next
            chord_plot(df_m, title=paste0(src, ' ', group, ' (n = ', nrow(df_m), ')'))
        }
        dev.off()
    }
}


In [211]:
# VDJ for A1 breadth
sources <- c("XBB infection", "XBB BTI", "BA.5 + XBB infection", "BA.5 + JN.1 infection", "BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 BTI + JN.1 infection")
df <- read.csv("../data/DMS/antibody/_clustering.csv") %>% 
    filter(source %in% sources & new_group == "A1" & paper_reactivity == "cross") %>%
    merge(read.csv("../data/_A1_breadth.csv")%>%select(c('id', 'breadth')), by='id', all.x=T)

l1 <- length(unique(df$v_gene_H))
l2 <- length(unique(df$v_gene_L))
l3 <- length(unique(df$d_gene_H))
l4 <- length(unique(df$j_gene_H))

library(RColorBrewer)

all_colors <- unique(c(
    brewer.pal(n=9, name='Set1'),brewer.pal(n=8, name='Set2'),brewer.pal(n=11, name='Set3'),
    brewer.pal(n=9, name='Pastel1'), brewer.pal(n=8, name='Pastel2'), brewer.pal(n=11, name='Paired'), 
    brewer.pal(n=8, name='Dark2'), brewer.pal(n=8, name='Accent')
))

col1 <- all_colors[1:l1]
names(col1) <- unique(df$v_gene_H)
col2 <- all_colors[(l1+1):(l1+l2)]
names(col2) <- unique(df$v_gene_L)
col3 <- all_colors[(l1+l2+1):(l1+l2+l3)]
names(col3) <- unique(df$d_gene_H)
col4 <- all_colors[(l1+l2+l3+1):(l1+l2+l3+l4)]
names(col4) <- unique(df$j_gene_H)


dev.off()
pdf("../plots/Extended/A1_breadth_V.pdf", width=4.8, height=4.8)

df_broad <- df%>%filter(breadth == 'broad' & v_gene_H != "" & v_gene_L != "")
df_escaped <- df%>%filter(breadth == 'escaped' & v_gene_H != "" & v_gene_L != "")

chord_plot(df_broad, title=paste0('A1 broad (', nrow(df_broad), ' mAbs)'), col1=col1,col2=col2, min_anno = 3)
chord_plot(df_escaped, title=paste0('A1 escaped (', nrow(df%>%filter(breadth == 'escaped')), ' mAbs)'), col1=col1,col2=col2, min_anno = 3)

df_broad <- df%>%filter(breadth == 'broad' & v_gene_H != "" & d_gene_H != "")
df_escaped <- df%>%filter(breadth == 'escaped' & v_gene_H != "" & d_gene_H != "")

df_broad$v_gene_L <- df_broad$d_gene_H
df_escaped$v_gene_L <- df_escaped$d_gene_H

chord_plot(df_broad, title=paste0('A1 broad (', nrow(df_broad), ' mAbs)'), col1=col1,col2=col3, min_anno = 3)
chord_plot(df_escaped, title=paste0('A1 escaped (', nrow(df_escaped), ' mAbs)'), col1=col1,col2=col3, min_anno = 3)

df_broad <- df%>%filter(breadth == 'broad' & v_gene_H != "" & j_gene_H != "")
df_escaped <- df%>%filter(breadth == 'escaped' & v_gene_H != "" & j_gene_H != "")

df_broad$v_gene_L <- df_broad$j_gene_H
df_escaped$v_gene_L <- df_escaped$j_gene_H

chord_plot(df_broad, title=paste0('A1 broad (', nrow(df_broad), ' mAbs)'), col1=col1,col2=col4, min_anno = 3)
chord_plot(df_escaped, title=paste0('A1 escaped (', nrow(df_escaped), ' mAbs)'), col1=col1,col2=col4, min_anno = 3)
dev.off()

In [212]:
# group SHM for A1 breadth
library(ggplot2)
library(tidyverse)
library(ggsignif)
library(ggpubr)

# sources <- c("BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 BTI + JN.1 infection")
# sources <- c("XBB infection", "XBB BTI", "BA.5 + XBB infection", "BA.5 + JN.1 infection")
sources <- c("XBB infection", "XBB BTI", "BA.5 + XBB infection", "BA.5 + JN.1 infection", "BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 BTI + JN.1 infection")
df <- read.csv("../data/DMS/antibody/_clustering.csv") %>% 
    filter(source %in% sources & paper_reactivity == "cross" & new_group == 'A1') %>% merge(
    read.csv("../data/_A1_breadth.csv")[,c('id', 'breadth')], by='id', how='left'
)

colors <-c("broad"="#BC3C29", "escaped"="#0072B5")

my_comp <- list(c("broad", "escaped"))

dev.off()

df_summary <- df %>%
  group_by(breadth) %>%
  summarise(count = n(), medianH=median(v_domain_shm_ratio_H), medianL=median(v_domain_shm_ratio_L)) 

pdf(paste0("../plots/Extended/SHM_A1_breadth.pdf"), width=3, height=3)
ggplot(df, aes(x = breadth, y = v_domain_shm_ratio_H, fill = breadth, color = breadth)) +
  geom_violin(trim = FALSE, alpha=0.5,scale = "width", width=0.8) +
  geom_boxplot(width = 0.2, fill='white') +
  labs(y = "Heavy chain V-domain SHM rate", x = "reactivity") +
  theme_classic() +  scale_fill_manual(values=colors)+
  scale_color_manual(values=colors)+scale_y_continuous(expand=c(0,0,0,0.06))+
  stat_compare_means(comparisons = my_comp, method = "wilcox.test", label = "p.format", label.y = 0.22, tip.length = 0, step.increase = 0.15) +
  geom_text(data = df_summary, aes(label = paste0("n=",count), y = 0.2), color = "black")+
  geom_text(data = df_summary, aes(label = formatC(medianH, digits = 3, format="f"), y = 0.17), color = "black")+
  theme(
      axis.text.x = element_text(angle=45, hjust=1),
      axis.ticks.x = element_blank(),
      axis.title.x = element_blank(),
  )
ggplot(df, aes(x = breadth, y = v_domain_shm_ratio_L, fill = breadth, color = breadth)) +
  geom_violin(trim = FALSE, alpha=0.5,scale = "width", width=0.8) +
  geom_boxplot(width = 0.2, fill='white') +
  labs(y = "Light chain V-domain SHM rate", x = "reactivity") +
  theme_classic() +  scale_fill_manual(values=colors)+
  scale_color_manual(values=colors)+scale_y_continuous(expand=c(0,0,0,0.06))+
  stat_compare_means(comparisons = my_comp, method = "wilcox.test", label = "p.format", label.y = 0.22, tip.length = 0, step.increase = 0.15) +
  geom_text(data = df_summary, aes(label = paste0("n=",count), y = 0.2), color = "black")+
  geom_text(data = df_summary, aes(label = formatC(medianL, digits = 3, format="f"), y = 0.17), color = "black")+
  theme(
      axis.text.x = element_text(angle=45, hjust=1),
      axis.ticks.x = element_blank(),
      axis.title.x = element_blank(),
  )
dev.off()

In [200]:
# group SHM for F3 source
library(ggplot2)
library(tidyverse)
library(ggsignif)
library(ggpubr)

sources <- c("BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 BTI + JN.1 infection")
df <- read.csv("../data/DMS/antibody/_clustering.csv") %>% filter(source %in% sources & paper_reactivity == "specific" & new_group == 'F3')
df$source <- factor(df$source, levels=sources)
colors <- c("#EE6439", "#60AB59", "#FFCF56", '#AA2055',"#7193E2")
my_comp <- list(c("BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection"),c("BA.5 BTI + JN.1 infection", "BA.5 BTI + HK.3 infection"), c("BA.5 BTI + XBB infection", "BA.5 BTI + JN.1 infection"))

dev.off()

df_summary <- df %>%
  group_by(source) %>%
  summarise(count = n(), medianH=median(v_domain_shm_ratio_H), medianL=median(v_domain_shm_ratio_L)) 

pdf(paste0("../plots/Figure5_related/SHM_F3_source.pdf"), width=4, height=3)
ggplot(df, aes(x = source, y = v_domain_shm_ratio_H, fill = source, color = source)) +
  geom_violin(trim = FALSE, alpha=0.5,scale = "width", width=0.8) +
  geom_boxplot(width = 0.2, fill='white') +
  labs(y = "Heavy chain V-domain SHM rate", x = "reactivity") +
  theme_classic() +  scale_fill_manual(values=colors)+
  scale_color_manual(values=colors)+scale_y_continuous(expand=c(0,0,0,0.06))+
  stat_compare_means(comparisons = my_comp, method = "wilcox.test", label = "p.format", label.y = 0.22, tip.length = 0, step.increase = 0.15) +
  geom_text(data = df_summary, aes(label = paste0("n=",count), y = 0.2), color = "black")+
  geom_text(data = df_summary, aes(label = formatC(medianH, digits = 3, format="f"), y = 0.17), color = "black")+
  theme(
      axis.text.x = element_text(angle=45, hjust=1),
      axis.ticks.x = element_blank(),
      axis.title.x = element_blank(),
  )
ggplot(df, aes(x = source, y = v_domain_shm_ratio_L, fill = source, color = source)) +
  geom_violin(trim = FALSE, alpha=0.5,scale = "width", width=0.8) +
  geom_boxplot(width = 0.2, fill='white') +
  labs(y = "Light chain V-domain SHM rate", x = "reactivity") +
  theme_classic() +  scale_fill_manual(values=colors)+
  scale_color_manual(values=colors)+scale_y_continuous(expand=c(0,0,0,0.06))+
  stat_compare_means(comparisons = my_comp, method = "wilcox.test", label = "p.format", label.y = 0.22, tip.length = 0, step.increase = 0.15) +
  geom_text(data = df_summary, aes(label = paste0("n=",count), y = 0.2), color = "black")+
  geom_text(data = df_summary, aes(label = formatC(medianL, digits = 3, format="f"), y = 0.17), color = "black")+
  theme(
      axis.text.x = element_text(angle=45, hjust=1),
      axis.ticks.x = element_blank(),
      axis.title.x = element_blank(),
  )
dev.off()

In [201]:
# overall usage

sources <- c("XBB infection", "XBB BTI", "BA.5 + XBB infection", "BA.5 + JN.1 infection", "BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 BTI + JN.1 infection")
df <- read.csv("../data/_mAb_info_clean.csv") %>% filter(source %in% sources)
df$source <- factor(df$source, levels=sources)

x <- df%>% group_by(source, v_gene_H) %>% summarise(count=n())
x$v_gene_H = factor(x$v_gene_H, levels=(df%>% group_by(v_gene_H) %>% summarise(count=n()) %>% arrange(-count))$v_gene_H)

dev.off()
pdf("../plots/Extended/overall_v_H_usage.pdf", width=6, height=7)

ggplot(x, aes(v_gene_H, count))+facet_wrap(~source, ncol=1, scales = "free_y")+geom_bar(stat="identity", fill=NA, color="black")+theme_classic()+
    theme(
        strip.background = element_blank(),
        axis.text.x = element_text(angle=45,hjust=1)
    )
dev.off()

[1m[22m`summarise()` has grouped output by 'source'. You can override using the `.groups` argument.


In [202]:
# IGHV5-51 L vs group

df <- read.csv("../data/DMS/antibody/_clustering.csv") %>% filter(v_gene_H == "IGHV5-51" & v_gene_L != "") %>% 
    filter(source %in% c("XBB infection", "XBB BTI", "BA.5 + XBB infection", "BA.5 + JN.1 infection", "BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection", "BA.5 BTI + JN.1 infection"))

cb_palette <- c(
    "A1"="#004400","A2"="#237732","B"="#CC3119",
    "D2"="#33aa11", "D3"="#99AA11","D4"="#80A07A",
    "E1/E2.1"="#664088", "E2.2"='#aa1937', 
    "E3"="#5c0a02", 
    "F1.1"="#c58233", "F1.2"="#253257",
    "F3"="#7093c8"
)
dev.off()

my_comp <- list(c("E3", "D3"),c("D3", "F3"), c("E3", "F3"))
pdf("../plots/Figure5_related/IGHV5-51_groups.pdf",width=5, height=5)

df_m <- df %>% mutate(v_gene_H = new_group)
chord_plot(df_m, title=paste0("IGHV5-51 (n = ", nrow(df_m), ")"), col1=cb_palette, min_anno = 3)
dev.off()

df <- df %>% filter(new_group %in% c("D3","F3", "E3"))
df_summary <- df %>%
  group_by(new_group) %>%
  summarise(count = n(), medianH=median(v_domain_shm_ratio_H), medianL=median(v_domain_shm_ratio_L)) 

pdf(paste0("../plots/Figure5_related/SHM_5-51_group.pdf"), width=3, height=3)
ggplot(df, aes(x = new_group, y = v_domain_shm_ratio_H, fill = new_group, color = new_group)) +
  geom_violin(trim = FALSE, alpha=0.5,scale = "width", width=0.8) +
  geom_boxplot(width = 0.2, fill='white') +
  labs(y = "Heavy chain V-domain SHM rate", x = "reactivity") +
  theme_classic() +  scale_fill_manual(values=cb_palette)+
  scale_color_manual(values=cb_palette)+scale_y_continuous(expand=c(0,0,0,0.06))+
  stat_compare_means(comparisons = my_comp, method = "wilcox.test", label = "p.format", label.y = 0.22, tip.length = 0, step.increase = 0.15) +
  geom_text(data = df_summary, aes(label = paste0("n=",count), y = 0.2), color = "black")+
  geom_text(data = df_summary, aes(label = formatC(medianH, digits = 3, format="f"), y = 0.17), color = "black")+
  theme(
      axis.text.x = element_text(angle=45, hjust=1),
      axis.ticks.x = element_blank(),
      axis.title.x = element_blank(),
  )

dev.off()

“Since you have set `order`, you should better set `grid.col` as a named
vector where sector names are the vector names (should contain all
sectors).”


“cannot compute exact p-value with ties”


In [203]:
# B/D3 cross/specific
df <- read.csv("../data/DMS/antibody/_clustering.csv")

for (g in c("B", "D3")) { 
    for (src in c("BA.5 + XBB infection", "BA.5 BTI + XBB infection", "BA.5 + JN.1 infection", "BA.5 BTI + JN.1 infection")) {
        for (react in c("cross", "specific")) {
            print(c(g, src, react))
            print(
                nrow(df %>% filter(source == src & new_group == g & paper_reactivity == react))
            )
        }
    }
}

[1] "B"                    "BA.5 + XBB infection" "cross"               
[1] 3
[1] "B"                    "BA.5 + XBB infection" "specific"            
[1] 31
[1] "B"                        "BA.5 BTI + XBB infection"
[3] "cross"                   
[1] 4
[1] "B"                        "BA.5 BTI + XBB infection"
[3] "specific"                
[1] 13
[1] "B"                     "BA.5 + JN.1 infection" "cross"                
[1] 1
[1] "B"                     "BA.5 + JN.1 infection" "specific"             
[1] 19
[1] "B"                         "BA.5 BTI + JN.1 infection"
[3] "cross"                    
[1] 2
[1] "B"                         "BA.5 BTI + JN.1 infection"
[3] "specific"                 
[1] 22
[1] "D3"                   "BA.5 + XBB infection" "cross"               
[1] 2
[1] "D3"                   "BA.5 + XBB infection" "specific"            
[1] 13
[1] "D3"                       "BA.5 BTI + XBB infection"
[3] "cross"                   
[1] 6
[1] "D3"                       "BA

In [204]:
fisher.test(matrix(c(3,31,4, 13), nrow=2),alternative = "less")
fisher.test(matrix(c(1,19,2,22), nrow=2),alternative = "less")
fisher.test(matrix(c(4,50,6,35), nrow=2),alternative = "less")

fisher.test(matrix(c(2,13,6,15), nrow=2),alternative = "less")
fisher.test(matrix(c(2,14,8,33), nrow=2),alternative = "less")
fisher.test(matrix(c(4,27,14,38), nrow=2),alternative = "less")

fisher.test(matrix(c(8,77,20,63), nrow=2),alternative = "less")



	Fisher's Exact Test for Count Data

data:  matrix(c(3, 31, 4, 13), nrow = 2)
p-value = 0.1568
alternative hypothesis: true odds ratio is less than 1
95 percent confidence interval:
 0.000000 1.683551
sample estimates:
odds ratio 
 0.3225973 



	Fisher's Exact Test for Count Data

data:  matrix(c(1, 19, 2, 22), nrow = 2)
p-value = 0.5696
alternative hypothesis: true odds ratio is less than 1
95 percent confidence interval:
 0.000000 8.010814
sample estimates:
odds ratio 
 0.5859126 



	Fisher's Exact Test for Count Data

data:  matrix(c(4, 50, 6, 35), nrow = 2)
p-value = 0.2114
alternative hypothesis: true odds ratio is less than 1
95 percent confidence interval:
 0.000000 1.741305
sample estimates:
odds ratio 
 0.4704962 



	Fisher's Exact Test for Count Data

data:  matrix(c(2, 13, 6, 15), nrow = 2)
p-value = 0.2527
alternative hypothesis: true odds ratio is less than 1
95 percent confidence interval:
 0.000000 2.136906
sample estimates:
odds ratio 
 0.3944393 



	Fisher's Exact Test for Count Data

data:  matrix(c(2, 14, 8, 33), nrow = 2)
p-value = 0.4213
alternative hypothesis: true odds ratio is less than 1
95 percent confidence interval:
 0.00000 2.83887
sample estimates:
odds ratio 
 0.5943447 



	Fisher's Exact Test for Count Data

data:  matrix(c(4, 27, 14, 38), nrow = 2)
p-value = 0.1089
alternative hypothesis: true odds ratio is less than 1
95 percent confidence interval:
 0.000000 1.252756
sample estimates:
odds ratio 
 0.4062401 



	Fisher's Exact Test for Count Data

data:  matrix(c(8, 77, 20, 63), nrow = 2)
p-value = 0.00897
alternative hypothesis: true odds ratio is less than 1
95 percent confidence interval:
 0.0000000 0.7410201
sample estimates:
odds ratio 
 0.3294205 
