In [1]:
library(ggplot2)
library(tidyverse)
library(circlize)
library(ggrastr)

src <- "../data/DMS/antibody/_clustering.csv"
ref_FC <- 'JN.1'

sources <- c('BA.5 BTI + XBB infection','BA.5 + XBB infection', 
           'XBB BTI','XBB infection', 'BA.5 BTI + HK.3 infection','BA.5 + JN.1 infection', 'BA.5 BTI + JN.1 infection')

data_all <- read.csv(src, check.names = F) %>% filter(source %in% sources)
cgroups <- c("F3"="F3", "A1"="A1", "A2"="A2", "B"="B", "D2"="D2", "D3"="D3", "D4"="D4", "E1/E2.1"="E1/E2.1", "E2.2"="E2.2/E3/F1.1", "E3"="E2.2/E3/F1.1", "F1.1"="E2.2/E3/F1.1", "F1.2"="F1.2")
data_all$cgroup <- cgroups[data_all$new_group]


── [1mAttaching core tidyverse packages[22m ──────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mpurrr    [39m 1.0.2     [32m✔[39m [34mtidyr    [39m 1.3.1
── [1mConflicts[22m ────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors
circlize version 0.4.16
CRAN page: https://cran.r-project.org/package=circli

In [2]:

wilcox.test((data_all %>% filter(new_group == "A1" & source == "BA.5 BTI + XBB infection" & paper_reactivity == 'cross'))$JN1_F456L_A475V_IC50, 
           (data_all %>% filter(new_group == "A1" & source == "BA.5 BTI + JN.1 infection" & paper_reactivity == 'cross'))$JN1_F456L_A475V_IC50)


	Wilcoxon rank sum test with continuity correction

data:  (data_all %>% filter(new_group == "A1" & source == "BA.5 BTI + XBB infection" & paper_reactivity == "cross"))$JN1_F456L_A475V_IC50 and (data_all %>% filter(new_group == "A1" & source == "BA.5 BTI + JN.1 infection" & paper_reactivity == "cross"))$JN1_F456L_A475V_IC50
W = 1395, p-value = 0.07251
alternative hypothesis: true location shift is not equal to 0


In [56]:
retain_trailing_zeros <- function(number, sig_digits) {
    rounded_number <- signif(number, sig_digits)
    num_decimal_digits <- sig_digits - floor(log10(rounded_number)) - 1
    if (num_decimal_digits < 0) num_decimal_digits <- 0
    formatted_number <- formatC(rounded_number, format = "f", digits = num_decimal_digits)
    return(formatted_number)
}
my_signif <- function(x, thres) {
    digits <- 1+(x>=thres)
    return(mapply(retain_trailing_zeros, x, digits))
}

get_lineplot <- function(data, ref_FC, colors, full) {
    colnames(data) <- variants[colnames(data)]
    data[data > 10] <- 10
    data[data < 0.0005] <- 0.0005

    df_test = data.frame()

    for (ag2 in colnames(data)[1:ncol(data)]){
        if (ref_FC == ag2) {next}
        # test <- na.omit(log10(data[,ref_FC]/data[,ag2]))
        # if (length(test) == 0) {
        #     label <- "NS"
        # }
        else {
            pval <- wilcox.test(data[,ref_FC], data[,ag2], paired=TRUE, exact=FALSE)$p.value
            # pval <- wilcox.test(test)$p.value

            if (is.na(pval) || pval >= 0.05) label <- "NS"
            else if (pval >= 0.01) label <- "*"
            else if (pval >= 0.001) label <- "**"
            else label <- "***"
        }
        df_test <- rbind(df_test, data.frame(ref=ref_FC, antigen=ag2, pval=pval, label=label))
    }

    data$id <- rownames(data)
    samples <- colnames(data)[1:ncol(data)]
    data <- data %>% pivot_longer(!id) %>% na.omit()
    data$name <- factor(data$name, levels=samples)

    count_eff <- data %>% group_by(name) %>% summarise(cnt = sum(value < 1), total = n())
    count_eff$ratio_eff <- count_eff$cnt / count_eff$total

    data_gmean <- as.data.frame(data %>% group_by(name) %>% summarise(gmean=exp(mean(log(value)))))
    data_gmean$FC <- data_gmean$gmean / data_gmean[data_gmean$name == ref_FC, 'gmean']
    data_gmean$show_FC <- paste(my_signif(data_gmean$FC, 1),'x',sep='')
    data_gmean[data_gmean$name == ref_FC, 'show_FC'] <- ''
    data_gmean$show_gmean <- my_signif(data_gmean$gmean,0.01)
    data_gmean$percent <- paste(round(count_eff$ratio_eff*100), '%', sep='')
    
    p <- ggplot(data, aes(name, log10(value)))+geom_line(aes(group=id), alpha=0.02)+
        geom_hline(yintercept = 0, linetype='dashed', color='red')+
        geom_hline(yintercept = 1, linetype='dashed')+geom_hline(yintercept = log10(0.0005), linetype='dashed')+
        scale_y_continuous(limits=c(-3.5,2.8), breaks=-3:1, labels=c(expression('10'^{-3}),expression('10'^{-2}),expression('10'^{-1}),expression('10'^{0}),expression('10'^{1})))+
        # scale_y_reverse(limits=c(1,-5.6), breaks=c(1,0,-1,-2,-3), labels=c(expression('10'^{1}),expression('10'^{0}),expression('10'^{-1}),expression('10'^{-2}),expression('10'^{-3})))+
        geom_point_rast(aes(fill=name), color='#333333', size=2, alpha=0.2, shape=21,show.legend = F)+scale_fill_manual(values=colors)+theme_classic()+
        stat_summary(fun=mean, fun.min=mean, fun.max=mean, geom="errorbar", size=0.5, width=0.3, color="black", ) + labs(y = "Pseudovirus IC50 (\u00b5g/mL)")+
        geom_text(data=data_gmean, aes(x=name, label=percent), y=1.75, size=3.3)+
        geom_text(data=data_gmean, aes(x=name, label=show_gmean), y=2.6, size=3.3)+
        ggtitle(paste(g, '(n = ', length(unique(data$id)), ')', sep=''))+
    theme(axis.title=element_blank(), axis.text.x=element_text(angle=45, hjust=1, vjust=1), plot.title=element_text(size=11, hjust=0.4))
    
    if (full) {
        p <- p + geom_text(data=data_gmean, aes(x=name, label=show_FC), y=3.5, size=3.3)+
            geom_text(data=df_test, aes(x=antigen, label=label), y=4.2, size=3.3)+
            annotate(geom = 'segment', x = 1, xend=length(colors), y=3, yend=3)+
        scale_y_continuous(limits=c(-3.5,4.2), breaks=-3:1, labels=c(expression('10'^{-3}),expression('10'^{-2}),expression('10'^{-1}),expression('10'^{0}),expression('10'^{1})))
    }
    return(p)
}

In [4]:
# generate line plots of mAbs against variants, grouped by group

src <- "../data/DMS/antibody/_clustering.csv"
sources <- c('BA.5 BTI + XBB infection','BA.5 + XBB infection', 
           'XBB BTI','XBB infection', 'BA.5 BTI + HK.3 infection','BA.5 + JN.1 infection', 'BA.5 BTI + JN.1 infection')

data_all <- read.csv(src, check.names = F) %>% filter(source %in% sources)
cgroups <- c("F3"="F3", "A1"="A1", "A2"="A2", "B"="B", "D2"="D2", "D3"="D3", "D4"="D4", "E1/E2.1"="E1/E2.1", "E2.2"="E2.2/E3/F1.1", "E3"="E2.2/E3/F1.1", "F1.1"="E2.2/E3/F1.1", "F1.2"="F1.2")
variants <- c('D614G_IC50'='D614G', 'XBB1_5_IC50'='XBB.1.5', 'JN1_IC50'='JN.1', 'JN1_R346T_F456L_IC50'='KP.2','KP3_IC50'='KP.3')
data_all$cgroup <- cgroups[data_all$new_group]

dev.off()
pdf("../plots/Figure3_related/mAb_neut_lines_by_group.pdf", width=2.3, height=1.8)
for (g in unique(data_all$cgroup)) {
    data <- (data_all %>% filter(cgroup == g & source %in% sources))[,names(variants)]
    colors <- c("#7193E2", "#EE6439", "#60AB59", "#FFCF56", '#AA2055')

    p <- get_lineplot(data, ref_FC, colors, full=F)
    print(p)
}
dev.off()


“[1m[22mUsing `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
[36mℹ[39m Please use `linewidth` instead.”


In [6]:
# overall neutralization

sources <- c('XBB BTI','XBB infection', 'BA.5 + XBB infection', 'BA.5 + JN.1 infection', 
           'BA.5 BTI + XBB infection','BA.5 BTI + HK.3 infection','BA.5 BTI + JN.1 infection')

src <- "../data/DMS/antibody/_clustering.csv"
data_all <- read.csv(src, check.names = F) %>% filter(source %in% sources)
data_all$source <- factor(data_all$source, levels=sources)
variants <- c('JN1_IC50'='JN.1', 'JN1_R346T_F456L_IC50'='KP.2', 'KP3_IC50'='KP.3')
colors <- c("cross"="#BC3C29", "specific"="#0072B5")
dev.off()
pdf("../plots/Extended/overall_neut.pdf", width=5.5, height=5)
data <- data_all[,c('id',names(variants))]
rownames(data) <- data$id
data <- data[,names(variants)]
colnames(data) <- variants[colnames(data)]
data[data > 10] <- 10
data[data < 0.0005] <- 0.0005

data$id <- rownames(data)
samples <- colnames(data)[1:ncol(data)]
data <- data %>% pivot_longer(!id) %>% na.omit()
data$name <- factor(data$name, levels=samples)
data <- merge(data, data_all[,c('id','source','paper_reactivity')], all.x=T, by='id')

data_summary <- data %>% group_by(source, paper_reactivity, name) %>% summarise(gmean=exp(mean(log(value))), 
                            percent = paste0(round(sum(value < 1)/n()*100), '%'))

p <- ggplot(data, aes(source, log10(value)))+facet_wrap(~name, ncol=1)+#geom_line(aes(group=id, color=new_group), alpha=0.1)+
    geom_hline(yintercept = 0, linetype='dashed', color='red')+
    geom_hline(yintercept = 1, linetype='dashed')+geom_hline(yintercept = log10(0.0005), linetype='dashed')+
    scale_y_continuous(limits=c(-3.9,1.9), breaks=-3:1, labels=c(expression('10'^{-3}),expression('10'^{-2}),expression('10'^{-1}),expression('10'^{0}),expression('10'^{1})))+
    geom_point_rast(aes(fill=paper_reactivity), position=position_dodge(.9),color='#333333', size=1.5, alpha=0.1, shape=21,show.legend = F)+
    geom_point(data=data_summary, aes(y=log10(gmean), group=paper_reactivity), color='black', shape=21, size=2.5, position= position_dodge(0.9))+
    geom_text(data=data_summary, aes(label=my_signif(gmean, 0.1), y=1.7, group=paper_reactivity), position= position_dodge(0.9), size=3.5)+
    geom_text(data=data_summary, aes(label=percent, y=-3.7, group=paper_reactivity), position= position_dodge(0.9), size=3.5)+
    scale_color_manual(values=colors)+
    scale_fill_manual(values=colors)+
    theme_classic()+
theme(axis.title=element_blank(), axis.text.x=element_text(angle=45, hjust=1, vjust=1),axis.ticks.x=element_blank())

print(p)
dev.off()


[1m[22m`summarise()` has grouped output by 'source', 'paper_reactivity'. You can override using the `.groups` argument.
“[1m[22mNo shared levels found between `names(values)` of the manual scale and the data's [32mcolour[39m values.”


In [7]:
# V5-51 color by group

sources <- c('BA.5 BTI + XBB infection','BA.5 + XBB infection', 
           'XBB BTI','XBB infection', 'BA.5 BTI + HK.3 infection','BA.5 + JN.1 infection', 'BA.5 BTI + JN.1 infection')

data_all <- read.csv(src, check.names = F) %>% filter(source %in% sources)
variants <- c('D614G_IC50'='D614G', 'XBB1_5_IC50'='XBB.1.5', 'JN1_IC50'='JN.1', 'JN1_R346T_F456L_IC50'='KP.2','KP3_IC50'='KP.3')
groups <- data_all$new_group
names(groups) <- data_all$id

dev.off()
pdf("../plots/Extended/VH5-51_neut_lines.pdf", width=5, height=2.5)
data <- (data_all %>% filter(v_gene_H == 'IGHV5-51'))[,c('id',names(variants))]
colors <- c(
    "A1"="#004400","A2"="#237732","B"="#CC3119",
    "D2"="#33aa11", "D3"="#99AA11","D4"="#80A07A",
    "E1/E2.1"="#664088", "E2.2"='#aa1937', 
    "E3"="#5c0a02", 
    "F1.1"="#c58233", "F1.2"="#253257",
    "F3"="#7093c8"
)
rownames(data) <- data$id
data <- data[,names(variants)]
colnames(data) <- variants[colnames(data)]
data[data > 10] <- 10
data[data < 0.0005] <- 0.0005

data$id <- rownames(data)
samples <- colnames(data)[1:ncol(data)]
data <- data %>% pivot_longer(!id) %>% na.omit()
data$name <- factor(data$name, levels=samples)
data$new_group <- groups[data$id]

p <- ggplot(data, aes(name, log10(value)))+geom_line(aes(group=id, color=new_group), alpha=0.1)+
    geom_hline(yintercept = 0, linetype='dashed', color='red')+
    geom_hline(yintercept = 1, linetype='dashed')+geom_hline(yintercept = log10(0.0005), linetype='dashed')+
    scale_y_continuous(limits=c(-3.5,1.5), breaks=-3:1, labels=c(expression('10'^{-3}),expression('10'^{-2}),expression('10'^{-1}),expression('10'^{0}),expression('10'^{1})))+
    geom_point_rast(aes(fill=new_group), color='#333333', size=2, alpha=0.1, shape=21,show.legend = F)+
    scale_color_manual(values=colors)+
    scale_fill_manual(values=colors)+
    theme_classic()+ggtitle('IGHV5-51 mAbs')+
    stat_summary(fun=mean, fun.min=mean, fun.max=mean, geom="errorbar", size=0.5, width=0.3, color="black", ) + labs(y = "Pseudovirus IC50 (\u00b5g/mL)")+
theme(axis.title=element_blank(), axis.text.x=element_text(angle=45, hjust=1, vjust=1), plot.title=element_text(size=11, hjust=0.4))+
guides(color=guide_legend(title="epitope", override.aes = list(size=2,alpha=1)))

print(p)
dev.off()


In [8]:
# Extended IGHV3-7 A1


data_all <- read.csv(src, check.names = F)
variants <- c('D614G_IC50'='D614G', 'XBB1_5_IC50'='XBB.1.5', 'HK3_1_IC50'='HK.3.1', 'JN1_IC50'='JN.1',
              'JN1_F456L_IC50'='JN.1+F456L',
              'JN1_R346T_F456L_IC50'='KP.2','KP3_IC50'='KP.3', 'JN1_F456L_A475V_IC50'='JN.1+F456L+A475V')
groups <- ifelse(data_all$v_gene_H == "IGHV3-7", "IGHV3-7","others")
names(groups) <- data_all$id

dev.off()
pdf("../plots/Extended/VH3-7-A1_neut_lines.pdf", width=5, height=2.5)
data <- (data_all %>% filter(source == 'BA.5 BTI + XBB infection' & new_group == "A1" & paper_reactivity=="cross"))[,c('id',names(variants))]
colors <- c(
    "IGHV3-7"= "red",
    "others"="#DDDDDD"
)
rownames(data) <- data$id
data <- data[,names(variants)]
colnames(data) <- variants[colnames(data)]
data[data > 10] <- 10
data[data < 0.0005] <- 0.0005

data$id <- rownames(data)
samples <- colnames(data)[1:ncol(data)]
data <- data %>% pivot_longer(!id) %>% na.omit()
data$name <- factor(data$name, levels=samples)
data$new_group <- groups[data$id]

data_summary <- data %>% group_by(name, new_group) %>% summarise(percent=paste0(round(100*sum(value<1)/n()), "%"))

p <- ggplot(data, aes(name, log10(value)))+geom_line(aes(group=id, color=new_group), alpha=0.4)+
    geom_hline(yintercept = 0, linetype='dashed', color='red')+
    geom_hline(yintercept = 1, linetype='dashed')+geom_hline(yintercept = log10(0.0005), linetype='dashed')+
    scale_y_continuous(limits=c(-3.5,1.5), breaks=-3:1, labels=c(expression('10'^{-3}),expression('10'^{-2}),expression('10'^{-1}),expression('10'^{0}),expression('10'^{1})))+
    geom_point_rast(aes(fill=new_group), color='#333333', size=2, alpha=0.5, shape=21,show.legend = F)+
    scale_color_manual(values=colors)+
    scale_fill_manual(values=colors)+
    geom_text(data=data_summary, aes(label=percent, color=new_group, y=1.4))+
    theme_classic()+ggtitle('BA.5 BTI + XBB infection A1')+
    stat_summary(fun=mean, fun.min=mean, fun.max=mean, geom="errorbar", size=0.5, width=0.3, color="black", ) + labs(y = "Pseudovirus IC50 (\u00b5g/mL)")+
theme(axis.title=element_blank(), axis.text.x=element_text(angle=45, hjust=1, vjust=1), plot.title=element_text(size=11, hjust=0.4))+
guides(color=guide_legend(title="epitope", override.aes = list(size=2,alpha=1)))

print(p)
dev.off()


[1m[22m`summarise()` has grouped output by 'name'. You can override using the `.groups` argument.


In [59]:
# A1 neutralization compare

refine_val <- function(x) {
    x[x > 10] <- 10
    x[x < 0.0005] <- 0.0005
    x
}

library(ggpubr)

variants <- c('XBB1_5_IC50'='XBB.1.5', 'HK3_1_IC50'='HK.3.1', 'JN1_IC50'='JN.1',#'JN1_F456L_IC50'='JN.1+F456L','JN1_F456L_A475V_IC50'='JN.1+F456L+A475V',
              'JN1_R346T_F456L_IC50'='KP.2', 
              'KP3_IC50'='KP.3','KP3_A475V_IC50'='KP.3+A475V')
sources <- c('BA.5 BTI + XBB infection', 'BA.5 BTI + HK.3 infection','BA.5 BTI + JN.1 infection')

data_all <- read.csv(src, check.names = F) %>% filter(source %in% sources)
dev.off()
pdf("../plots/Figure4_related/A1_neutralization_compare.pdf", width=8.5, height=3)
data <- data_all %>% filter(new_group == "A1" & source %in% sources & paper_reactivity == 'cross') %>% select(
    c("source", names(variants))
) %>% pivot_longer(!source) %>% mutate(name = factor(variants[name], levels=variants), value = refine_val(value), source=factor(source, levels=sources))

df_summary <- data %>% group_by(source, name) %>% 
    summarise(logmean=mean(log10(value)), percent=paste0(round(sum(value < 1)/n()*100), '%'))

my_comp <- list(c("BA.5 BTI + XBB infection", "BA.5 BTI + JN.1 infection"))
# my_comp <- list(c("BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection"),c("BA.5 BTI + JN.1 infection", "BA.5 BTI + HK.3 infection"), c("BA.5 BTI + XBB infection", "BA.5 BTI + JN.1 infection"))

colors <- c("#EE6439", "#60AB59", "#FFCF56", '#AA2055',"#7193E2")
ggplot(data, aes(source, log10(value)))+facet_wrap(~name, nrow=1)+
    geom_hline(yintercept = 0, linetype='dashed', color='red')+
    geom_hline(yintercept = 1, linetype='dashed')+geom_hline(yintercept = log10(0.0005), linetype='dashed')+
    scale_y_continuous(limits=c(-3.5,4.1), breaks=-3:1, labels=c(expression('10'^{-3}),expression('10'^{-2}),expression('10'^{-1}),expression('10'^{0}),expression('10'^{1})))+
    geom_point_rast(aes(fill=source), color='#333333', size=1.5, alpha=0.2, shape=21)+scale_fill_manual(values=colors)+theme_classic()+
    geom_point(data=df_summary, aes(source, logmean), shape=21, size=2)+
    geom_text(data=df_summary, aes(label=percent), y=2.3, size=3.3)+
    geom_text(data=df_summary, aes(label=my_signif(10^logmean, 0.1)), y=1.6, size=3.3)+
    labs(y = "Pseudovirus IC50 (\u00b5g/mL)")+
    stat_compare_means(comparisons = my_comp, label = "p.signif",  method = "wilcox.test",step.increase = 0.135, tip.length = 0, label.y = 2.5,size=3.3) +
    # ggtitle(paste(g, '(n = ', length(unique(data$id)), ')', sep=''))+
theme(axis.title=element_blank(), axis.text.x=element_text(angle=45, hjust=1, vjust=1),strip.background = element_blank())

dev.off()


[1m[22m`summarise()` has grouped output by 'source'. You can override using the `.groups` argument.


In [68]:

JN1_data <- data_all %>% filter(new_group == "A1" & source == 'BA.5 BTI + JN.1 infection' & paper_reactivity == 'cross')
print(wilcox.test(JN1_data$JN1_IC50, JN1_data$JN1_R346T_F456L_IC50, paired=T))
print(wilcox.test(JN1_data$JN1_R346T_F456L_IC50, JN1_data$KP3_IC50, paired=T))
print(wilcox.test(JN1_data$KP3_IC50, JN1_data$KP3_A475V_IC50, paired=T))



	Wilcoxon signed rank exact test

data:  JN1_data$JN1_IC50 and JN1_data$JN1_R346T_F456L_IC50
V = 113, p-value = 7.936e-07
alternative hypothesis: true location shift is not equal to 0



“cannot compute exact p-value with ties”



	Wilcoxon signed rank test with continuity correction

data:  JN1_data$JN1_R346T_F456L_IC50 and JN1_data$KP3_IC50
V = 212, p-value = 0.0009724
alternative hypothesis: true location shift is not equal to 0



“cannot compute exact p-value with ties”



	Wilcoxon signed rank test with continuity correction

data:  JN1_data$KP3_IC50 and JN1_data$KP3_A475V_IC50
V = 11, p-value = 9.807e-08
alternative hypothesis: true location shift is not equal to 0



In [70]:
library(ggplot2)
library(tidyverse)
library(circlize)
library(ggrastr)

src <- "../data/DMS/antibody/_clustering.csv"
ref_FC <- 'JN.1'

sources <- c('BA.5 BTI + XBB infection','BA.5 + XBB infection', 
           'XBB BTI','XBB infection', 'BA.5 BTI + HK.3 infection','BA.5 + JN.1 infection', 'BA.5 BTI + JN.1 infection')

data_all <- read.csv(src, check.names = F) %>% filter(source %in% sources)
# generate line plots of mAbs against variants, grouped by source and group
dev.off()
groups <- c("A1")

variants <- c('D614G_IC50'='D614G', 'BA1_IC50'='BA.1', 'BA2_IC50'='BA.2','BA5_IC50'='BA.5',
              'XBB1_5_IC50'='XBB.1.5', 'HK3_1_IC50'='HK.3.1', 'JN1_IC50'='JN.1','JN1_F456L_IC50'='JN.1+F456L', 
              'JN1_R346T_F456L_IC50'='KP.2',
              'KP3_IC50'='KP.3','JN1_F456L_A475V_IC50'='JN.1+F456L+A475V','KP3_A475V_IC50'='KP.3+A475V')

for (use_src in sources){
    pdf(paste0("../plots/Figure4_related/neut_lines/",use_src, "_group_lines.pdf"), width=4.8, height=3)

    for (g in groups) {
        cR <- colorRamp2(c(1,4,8,14),c("#D7301F", "#08519C", "#5AA1CE", "#99C5DF"))
        colors <- cR(1:length(variants))
        data <- (data_all %>% filter(new_group == g & source == use_src & paper_reactivity == 'cross'))[,names(variants)] %>% na.omit()
        if (nrow(data) == 0) next
        
        p <- get_lineplot(data, ref_FC, colors, full=T) + ggtitle(paste0(use_src, ' WT-reactive ', g))
        print(p)
    }
dev.off()
}

[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.


In [11]:
library(ggplot2)
library(tidyverse)
library(circlize)
library(ggrastr)

src <- "../data/DMS/antibody/_clustering.csv"
ref_FC <- 'JN.1'

sources <- c('BA.5 BTI + XBB infection','BA.5 + XBB infection', 
           'XBB BTI','XBB infection', 'BA.5 BTI + HK.3 infection','BA.5 + JN.1 infection', 'BA.5 BTI + JN.1 infection')

data_all <- read.csv(src, check.names = F) %>% filter(source %in% sources)
# generate line plots of mAbs against variants, grouped by source and group
dev.off()
groups <- c("F3")

variants <- c('D614G_IC50'='D614G', 'XBB1_5_IC50'='XBB.1.5', 'HK3_1_IC50'='HK.3.1', 'JN1_IC50'='JN.1',
              'JN1_R346T_F456L_IC50'='KP.2',
              'KP3_IC50'='KP.3')

for (use_src in sources){
    pdf(paste0("../plots/Figure5_related/neut_lines/",use_src, "_group_lines_F3.pdf"), width=3, height=2.5)

    for (g in groups) {
        cR <- colorRamp2(c(1,4,8,14),c("#D7301F", "#08519C", "#5AA1CE", "#99C5DF"))
        colors <- cR(1:length(variants))
        data <- (data_all %>% filter(new_group == g & source == use_src))[,names(variants)] %>% na.omit()
        if (nrow(data) == 0) next
        
        p <- get_lineplot(data, ref_FC, colors, full=T)
        print(p)
    }
dev.off()
}

[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.


In [12]:
library(ggplot2)
library(tidyverse)
library(circlize)
library(ggrastr)

src <- "../data/DMS/antibody/_clustering.csv"
ref_FC <- 'JN.1'

sources <- c('BA.5 BTI + XBB infection','BA.5 BTI + HK.3 infection','BA.5 BTI + JN.1 infection')

data_all <- read.csv(src, check.names = F) %>% filter(source %in% sources)
# generate line plots of mAbs against variants, grouped by source and group
dev.off()
groups <- c("A1", "A2", "B", "D3")

variants <- c('JN1_IC50'='JN.1',
              'JN1_R346T_F456L_IC50'='KP.2',
              'KP3_IC50'='KP.3')

for (use_src in sources){
    pdf(paste0("../plots/Figure4_related/neut_lines/",use_src, "_group_lines_few_strains.pdf"), width=2, height=2)

    for (g in groups) {
        cR <- colorRamp2(c(1,4,8,14),c("#D7301F", "#08519C", "#5AA1CE", "#99C5DF"))
        colors <- cR(1:length(variants))
        data <- (data_all %>% filter(new_group == g & source == use_src))[,names(variants)] %>% na.omit()
        if (nrow(data) == 0) next
        
        p <- get_lineplot(data, ref_FC, colors, full=T)
        print(p)
    }
dev.off()
}

[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will repl

In [13]:
# cross B/D3 line
library(ggplot2)
library(tidyverse)
library(circlize)
library(ggrastr)

src <- "../data/DMS/antibody/_clustering.csv"
ref_FC <- 'BA.5'

sources <- c('BA.5 BTI + XBB infection','BA.5 BTI + HK.3 infection','BA.5 BTI + JN.1 infection')
data_all <- read.csv(src, check.names = F) %>% filter(source %in% sources)
# generate line plots of mAbs against variants, grouped by source and group
dev.off()
groups <- c("D3", "B")

variants <- c('D614G_IC50'='D614G', 'BA5_IC50'='BA.5',
              'XBB1_5_IC50'='XBB.1.5',  'JN1_IC50'='JN.1')

pdf(paste0("../plots/Figure4_related/WT_B_D3_neut_lines.pdf"), width=2, height=2.5)

for (g in groups) {
    cR <- colorRamp2(c(1,4,8,14),c("#D7301F", "#08519C", "#5AA1CE", "#99C5DF"))
    colors <- cR(1:length(variants))
    data <- (data_all %>% filter(new_group == g & paper_reactivity == "cross"))[,names(variants)] %>% na.omit()
    if (nrow(data) == 0) next

    p <- get_lineplot(data, ref_FC, colors, full=T)
    print(p)
}
dev.off()


[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.


In [14]:
# B specific line
library(ggplot2)
library(tidyverse)
library(circlize)
library(ggrastr)

src <- "../data/DMS/antibody/_clustering.csv"
ref_FC <- 'JN.1'

sources <- c('BA.5 BTI + XBB infection','BA.5 BTI + HK.3 infection','BA.5 BTI + JN.1 infection')
data_all <- read.csv(src, check.names = F) %>% filter(source %in% sources)
# generate line plots of mAbs against variants, grouped by source and group
dev.off()
groups <- c("A2", "B","D3")

variants <- c('D614G_IC50'='D614G', 'BA1_IC50'='BA.1', 'BA2_IC50'='BA.2', 'BA5_IC50'='BA.5',
              'XBB1_5_IC50'='XBB.1.5',  'JN1_IC50'='JN.1',
              'JN1_R346T_F456L_IC50'='KP.2',
              'KP3_IC50'='KP.3')

pdf(paste0("../plots/Figure5_related/B_early_variants_group_lines.pdf"), width=3.5, height=2.5)

for (g in groups) {
    cR <- colorRamp2(c(1,4,8,14),c("#D7301F", "#08519C", "#5AA1CE", "#99C5DF"))
    colors <- cR(1:length(variants))
    data <- (data_all %>% filter(new_group == g & paper_reactivity == "specific"))[,names(variants)] %>% na.omit()
    if (nrow(data) == 0) next

    p <- get_lineplot(data, ref_FC, colors, full=T)
    print(p)
}
dev.off()


[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.
[1m[22mScale for [32my[39m is already present.
Adding another scale for [32my[39m, which will replace the existing scale.


In [73]:
# Fig 3 all cross neutralization compare

refine_val <- function(x) {
    x[x > 10] <- 10
    x[x < 0.0005] <- 0.0005
    x
}

library(ggpubr)

variants <- c('JN1_IC50'='JN.1',#'XBB1_5_IC50'='XBB.1.5', 'HK3_1_IC50'='HK.3.1', 'JN1_F456L_IC50'='JN.1+F456L', 
              'JN1_R346T_F456L_IC50'='KP.2', 'KP3_IC50'='KP.3')
sources <- c('BA.5 BTI + XBB infection', 'BA.5 BTI + HK.3 infection','BA.5 BTI + JN.1 infection')
data_all <- read.csv(src, check.names = F) %>% filter(source %in% sources)

dev.off()

pdf(paste0("../plots/Figure4_related/all_cross_neutralization_compare.pdf"), width=6, height=3.2)
data <- data_all %>% filter(source %in% sources & paper_reactivity == 'cross') %>% select(
    c("source", names(variants))
) %>% pivot_longer(!source) %>% mutate(name = factor(variants[name], levels=variants), value = refine_val(value), source=factor(source, levels=sources))

df_summary <- data %>% group_by(source, name) %>% summarise(logmean=mean(log10(value)), percent=paste0(round(sum(value < 1)/n()*100), '%'))

my_comp <- list(c("BA.5 BTI + XBB infection", "BA.5 BTI + JN.1 infection"))
colors <- c("#EE6439", "#60AB59", "#FFCF56", '#AA2055',"#7193E2")
p <- ggplot(data, aes(source, log10(value)))+facet_wrap(~name, nrow=1)+
    geom_hline(yintercept = 0, linetype='dashed', color='red')+
    geom_hline(yintercept = 1, linetype='dashed')+geom_hline(yintercept = log10(0.0005), linetype='dashed')+
    scale_y_continuous(limits=c(-3.5,4.2), breaks=-3:1, labels=c(expression('10'^{-3}),expression('10'^{-2}),expression('10'^{-1}),expression('10'^{0}),expression('10'^{1})))+
    geom_point_rast(aes(fill=source), color='#333333', size=1.5, alpha=0.2, shape=21)+scale_fill_manual(values=colors)+theme_classic()+
    geom_point(data=df_summary, aes(source, logmean), shape=21, size=2)+
    geom_text(data=df_summary, aes(label=percent), y=1.75, size=3.3)+
        geom_text(data=df_summary, aes(label=round(10^logmean,2)), y=2.5, size=3.3)+
        stat_compare_means(comparisons = my_comp, label = "p.signif",  method = "wilcox.test",step.increase = 0.18, tip.length = 0, label.y = 2.7,size=3.3) +
    ggtitle('all cross')+
theme(axis.title=element_blank(), axis.text.x=element_text(angle=45, hjust=1, vjust=1),strip.background = element_blank())
print(p)
dev.off()

pdf(paste0("../plots/Figure5_related/all_specific_neutralization_compare.pdf"), width=6, height=3.2)
data <- data_all %>% filter(source %in% sources & paper_reactivity == 'specific') %>% select(
    c("source", names(variants))
) %>% pivot_longer(!source) %>% mutate(name = factor(variants[name], levels=variants), value = refine_val(value), source=factor(source, levels=sources))

df_summary <- data %>% group_by(source, name) %>% summarise(logmean=mean(log10(value)), percent=paste0(round(sum(value < 1)/n()*100), '%'))

my_comp <- list(c("BA.5 BTI + XBB infection", "BA.5 BTI + JN.1 infection"))
colors <- c("#EE6439", "#60AB59", "#FFCF56", '#AA2055',"#7193E2")
p <- ggplot(data, aes(source, log10(value)))+facet_wrap(~name, nrow=1)+
    geom_hline(yintercept = 0, linetype='dashed', color='red')+
    geom_hline(yintercept = 1, linetype='dashed')+geom_hline(yintercept = log10(0.0005), linetype='dashed')+
    scale_y_continuous(limits=c(-3.5,4.2), breaks=-3:1, labels=c(expression('10'^{-3}),expression('10'^{-2}),expression('10'^{-1}),expression('10'^{0}),expression('10'^{1})))+
    geom_point_rast(aes(fill=source), color='#333333', size=1.5, alpha=0.2, shape=21)+scale_fill_manual(values=colors)+theme_classic()+
    geom_point(data=df_summary, aes(source, logmean), shape=21, size=2)+
    geom_text(data=df_summary, aes(label=percent), y=1.75, size=3.3)+
        geom_text(data=df_summary, aes(label=my_signif(10^logmean,0.1)), y=2.5, size=3.3)+
        stat_compare_means(comparisons = my_comp, label = "p.signif",  method = "wilcox.test",step.increase = 0.18, tip.length = 0, label.y = 2.7,size=3.3) +
    ggtitle('all specific')+
theme(axis.title=element_blank(), axis.text.x=element_text(angle=45, hjust=1, vjust=1),strip.background = element_blank())
print(p)

[1m[22m`summarise()` has grouped output by 'source'. You can override using the `.groups` argument.


[1m[22m`summarise()` has grouped output by 'source'. You can override using the `.groups` argument.


In [16]:
# Fig 3 B/D3 neutralization compare

refine_val <- function(x) {
    x[x > 10] <- 10
    x[x < 0.0005] <- 0.0005
    x
}

library(ggpubr)

variants <- c('XBB1_5_IC50'='XBB.1.5', 'HK3_1_IC50'='HK.3.1', 'JN1_IC50'='JN.1',#'JN1_F456L_IC50'='JN.1+F456L', 
              'JN1_R346T_F456L_IC50'='KP.2', 'KP3_IC50'='KP.3')
sources <- c('BA.5 BTI + XBB infection', 'BA.5 BTI + HK.3 infection','BA.5 BTI + JN.1 infection')
data_all <- read.csv(src, check.names = F) %>% filter(source %in% sources)

dev.off()

for (g in c("B", "D3")){
    pdf(paste0("../plots/Figure4_related/",g,"_neutralization_compare.pdf"), width=7.5, height=3.2)
    data <- data_all %>% filter(new_group == g & source %in% sources & paper_reactivity == 'cross') %>% select(
        c("source", names(variants))
    ) %>% pivot_longer(!source) %>% mutate(name = factor(variants[name], levels=variants), value = refine_val(value), source=factor(source, levels=sources))

    df_summary <- data %>% group_by(source, name) %>% summarise(logmean=mean(log10(value)), percent=paste0(round(sum(value < 1)/n()*100), '%'))

    my_comp <- list(c("BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection"),c("BA.5 BTI + JN.1 infection", "BA.5 BTI + HK.3 infection"), c("BA.5 BTI + XBB infection", "BA.5 BTI + JN.1 infection"))

    colors <- c("#EE6439", "#60AB59", "#FFCF56", '#AA2055',"#7193E2")
    p <- ggplot(data, aes(source, log10(value)))+facet_wrap(~name, nrow=1)+
        geom_hline(yintercept = 0, linetype='dashed', color='red')+
        geom_hline(yintercept = 1, linetype='dashed')+geom_hline(yintercept = log10(0.0005), linetype='dashed')+
        scale_y_continuous(limits=c(-3.5,4.2), breaks=-3:1, labels=c(expression('10'^{-3}),expression('10'^{-2}),expression('10'^{-1}),expression('10'^{0}),expression('10'^{1})))+
        geom_point_rast(aes(fill=source), color='#333333', size=1.5, alpha=0.2, shape=21)+scale_fill_manual(values=colors)+theme_classic()+
        geom_point(data=df_summary, aes(source, logmean), shape=21, size=2)+
        geom_text(data=df_summary, aes(label=percent), y=1.75, size=3.3)+
        geom_text(data=df_summary, aes(label=round(10^logmean,2)), y=2.5, size=3.3)+
        stat_compare_means(comparisons = my_comp, label = "p.signif",  method = "wilcox.test",step.increase = 0.18, tip.length = 0, label.y = 2.7,size=3.3) +
        ggtitle(paste0(g, ' cross'))+
    theme(axis.title=element_blank(), axis.text.x=element_text(angle=45, hjust=1, vjust=1),strip.background = element_blank())
    print(p)
    dev.off()
}

[1m[22m`summarise()` has grouped output by 'source'. You can override using the `.groups` argument.
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“[1m[22mRemoved 15 rows containing missing values or values outside the scale range (`geom_signif()`).”
[1m[22m`summarise()` has grouped output by 'source'. You can override using the `.groups` argument.
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“[1m[22mRemoved 15 rows containing missing values or values outside the s

In [74]:
# Fig 4 B/D3/F3 neutralization compare

refine_val <- function(x) {
    x[x > 10] <- 10
    x[x < 0.0005] <- 0.0005
    x
}

library(ggpubr)

variants <- c('XBB1_5_IC50'='XBB.1.5', 'HK3_1_IC50'='HK.3.1', 'JN1_IC50'='JN.1',#'JN1_F456L_IC50'='JN.1+F456L', 'JN1_F456L_A475V_IC50'='JN.1+F456L+A475V'
              'JN1_R346T_F456L_IC50'='KP.2', 'KP3_IC50'='KP.3','KP3_A475V_IC50'='KP.3+A475V')
sources <- c('BA.5 BTI + XBB infection', 'BA.5 BTI + HK.3 infection','BA.5 BTI + JN.1 infection')
data_all <- read.csv(src, check.names = F) %>% filter(source %in% sources)

dev.off()

for (g in c("B", "D3", "F3")){
    pdf(paste0("../plots/Figure5_related/",g,"_neutralization_compare.pdf"), width=9, height=3.2)
    data <- data_all %>% filter(new_group == g & source %in% sources & paper_reactivity == 'specific') %>% select(
        c("source", names(variants))
    ) %>% pivot_longer(!source) %>% mutate(name = factor(variants[name], levels=variants), value = refine_val(value), source=factor(source, levels=sources))

    df_summary <- data %>% group_by(source, name) %>% summarise(logmean=mean(log10(value)), percent=paste0(round(sum(value < 1)/n()*100), '%'))

    my_comp <- list(c("BA.5 BTI + XBB infection", "BA.5 BTI + JN.1 infection"))
    # my_comp <- list(c("BA.5 BTI + XBB infection", "BA.5 BTI + HK.3 infection"),c("BA.5 BTI + JN.1 infection", "BA.5 BTI + HK.3 infection"), c("BA.5 BTI + XBB infection", "BA.5 BTI + JN.1 infection"))

    colors <- c("#EE6439", "#60AB59", "#FFCF56", '#AA2055',"#7193E2")
    p <- ggplot(data, aes(source, log10(value)))+facet_wrap(~name, nrow=1)+
        geom_hline(yintercept = 0, linetype='dashed', color='red')+
        geom_hline(yintercept = 1, linetype='dashed')+geom_hline(yintercept = log10(0.0005), linetype='dashed')+
        scale_y_continuous(limits=c(-3.5,4.2), breaks=-3:1, labels=c(expression('10'^{-3}),expression('10'^{-2}),expression('10'^{-1}),expression('10'^{0}),expression('10'^{1})))+
        geom_point_rast(aes(fill=source), color='#333333', size=1.5, alpha=0.2, shape=21)+scale_fill_manual(values=colors)+theme_classic()+
        geom_point(data=df_summary, aes(source, logmean), shape=21, size=2)+
        geom_text(data=df_summary, aes(label=percent), y=1.75, size=3.3)+
        geom_text(data=df_summary, aes(label=round(10^logmean,2)), y=2.5, size=3.3)+
        stat_compare_means(comparisons = my_comp, label = "p.signif",  method = "wilcox.test",step.increase = 0.18, tip.length = 0, label.y = 2.7,size=3.3) +
        ggtitle(paste0(g, ' specific'))+
    theme(axis.title=element_blank(), axis.text.x=element_text(angle=45, hjust=1, vjust=1),strip.background = element_blank())
    print(p)
    dev.off()
}

[1m[22m`summarise()` has grouped output by 'source'. You can override using the `.groups` argument.
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
[1m[22m`summarise()` has grouped output by 'source'. You can override using the `.groups` argument.
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”
[1m[22m`summarise()` has grouped output by 'source'. You can override using the `.groups` argument.


In [18]:
# Fig 4 F3 2-5/5-51 neutralization compare

refine_val <- function(x) {
    x[x > 10] <- 10
    x[x < 0.0005] <- 0.0005
    x
}

library(ggpubr)

variants <- c('XBB1_5_IC50'='XBB.1.5', 'HK3_1_IC50'='HK.3.1', 'JN1_IC50'='JN.1',#'JN1_F456L_IC50'='JN.1+F456L', 
              'JN1_R346T_F456L_IC50'='KP.2', 'KP3_IC50'='KP.3')
sources <- c('BA.5 BTI + XBB infection', 'BA.5 BTI + HK.3 infection','BA.5 BTI + JN.1 infection')
data_all <- read.csv(src, check.names = F) %>% filter(source %in% sources & new_group == "F3" & paper_reactivity == 'specific' & v_gene_H %in% c("IGHV2-5", "IGHV5-51")) %>% mutate(
    src_group = ifelse(source == "BA.5 BTI + JN.1 infection", "JN.1", "XBB/HK.3")
) %>% mutate(source = paste0(src_group, "(", v_gene_H, ")"))

dev.off()
pdf("../plots/Figure5_related/F3_IGHV_neutralization_compare.pdf", width=7, height=2.8)
data <- data_all %>% select(
    c("source", names(variants))
) %>% pivot_longer(!source) %>% mutate(name = factor(variants[name], levels=variants), value = refine_val(value))

df_summary <- data %>% group_by(source, name) %>% summarise(logmean=mean(log10(value)), percent=paste0(round(sum(value < 1)/n()*100), '%'))

my_comp <- list(c("XBB/HK.3(IGHV2-5)", "XBB/HK.3(IGHV5-51)"),
                c("XBB/HK.3(IGHV2-5)", "JN.1(IGHV2-5)"), 
                c("XBB/HK.3(IGHV5-51)", "JN.1(IGHV5-51)"), 
                c("JN.1(IGHV2-5)", "JN.1(IGHV5-51)"))

colors <- c("#EE6439", "#60AB59", "#FFCF56", '#AA2055',"#7193E2")
ggplot(data, aes(source, log10(value)))+facet_wrap(~name, nrow=1)+
    geom_hline(yintercept = 0, linetype='dashed', color='red')+
    geom_hline(yintercept = 1, linetype='dashed')+geom_hline(yintercept = log10(0.0005), linetype='dashed')+
    scale_y_continuous(limits=c(-3.5,4.1), breaks=-3:1, labels=c(expression('10'^{-3}),expression('10'^{-2}),expression('10'^{-1}),expression('10'^{0}),expression('10'^{1})))+
    geom_point_rast(aes(fill=source), color='#333333', size=1.5, alpha=0.2, shape=21)+scale_fill_manual(values=colors)+theme_classic()+
    geom_point(data=df_summary, aes(source, logmean), shape=21, size=2)+
    # geom_text(data=df_summary, aes(label=percent), y=1.75, size=3.3)+
    labs(y = "Pseudovirus IC50 (\u00b5g/mL)")+
    stat_compare_means(comparisons = my_comp, label = "p.signif",  method = "wilcox.test",step.increase = 0.17, tip.length = 0, label.y = 1.4,size=3.3) +
    # ggtitle(paste(g, '(n = ', length(unique(data$id)), ')', sep=''))+
theme(axis.title=element_blank(), axis.text.x=element_text(angle=45, hjust=1, vjust=1),strip.background = element_blank())

dev.off()

[1m[22m`summarise()` has grouped output by 'source'. You can override using the `.groups` argument.
“cannot compute exact p-value with ties”
“cannot compute exact p-value with ties”


In [19]:
dev.off()

sources <- c('BA.5 BTI + XBB infection', 'BA.5 BTI + HK.3 infection','BA.5 BTI + JN.1 infection')
data_all <- read.csv(src, check.names = F) %>% filter(source %in% sources) %>% mutate(
    src_group = (source == "BA.5 BTI + JN.1 infection")
)

for (sg in c(TRUE, FALSE)) {
    for (prop in c("IGHV2-5", "IGHV5-51")){
        g = 'F3'
        pdf(paste0("../plots/Figure5_related/",sg, prop, g, "_lines.pdf"), width=3.5, height=3.0)
        data <- (data_all %>% filter(new_group == g & v_gene_H == prop & src_group == sg))[,names(variants)]

        if (nrow(data) == 0) next

        colnames(data) <- variants[colnames(data)]
        data[data > 10] <- 10
        data[data < 0.0005] <- 0.0005
        # colors = c("XBB.1.5"="#D7301F", "XBB.1.5+F456L"="#08519C", "XBB.1.5+L455F"="#5AA1CE", "XBB.1.5+L455F+F456L"="#99C5DF")

        df_test = data.frame()
        ref_FC <- 'JN.1'
        for (ag2 in colnames(data)[1:ncol(data)]){
            if (ref_FC == ag2) {next}
            test <- na.omit(log10(data[,ref_FC]/data[,ag2]))

            pval <- wilcox.test(test)$p.value

            if (is.na(pval) || pval >= 0.05) label <- "NS"
            else if (pval >= 0.01) label <- "*"
            else if (pval >= 0.001) label <- "**"
            else label <- "***"

            df_test <- rbind(df_test, data.frame(ref=ref_FC, antigen=ag2, pval=pval, label=label))
        }

        cR <- colorRamp2(c(1,4,8,14),c("#D7301F", "#08519C", "#5AA1CE", "#99C5DF"))
        colors <- cR(1:10)

        # colors <- c("#7193E2", "#EE6439", "#60AB59", "#FFCF56")

        data$id <- rownames(data)
        samples <- colnames(data)[1:ncol(data)]
        data <- data %>% pivot_longer(!id) %>% na.omit()
        data$name <- factor(data$name, levels=samples)

        count_eff <- data %>% group_by(name) %>% summarise(cnt = sum(value < 1), total = n())
        count_eff$ratio_eff <- count_eff$cnt / count_eff$total

        my_signif <- function(x, thres) {
            signif(x, 1+(x>thres))
        }
        data_gmean <- as.data.frame(data %>% group_by(name) %>% summarise(gmean=exp(mean(log(value)))))
        data_gmean$FC <- data_gmean$gmean / data_gmean[data_gmean$name == ref_FC, 'gmean']
        data_gmean$show_FC <- paste(my_signif(data_gmean$FC, 1),'x',sep='')

        data_gmean[data_gmean$name == ref_FC, 'show_FC'] <- ''
        data_gmean$label <- paste(data_gmean$show_FC, '\n', my_signif(data_gmean$gmean,0.01), '\n', round(count_eff$ratio_eff*100), '%', sep='')

        p <- ggplot(data, aes(name, log10(value)))+geom_line(aes(group=id), alpha=0.02)+
        geom_hline(yintercept = 0, linetype='dashed', color='red')+
        geom_hline(yintercept = 1, linetype='dashed')+geom_hline(yintercept = log10(0.0005), linetype='dashed')+
        scale_y_continuous(limits=c(-3.5,2.7), breaks=-3:1, labels=c(expression('10'^{-3}),expression('10'^{-2}),expression('10'^{-1}),expression('10'^{0}),expression('10'^{1})))+
        geom_point_rast(aes(fill=name), color='#333333', size=2, alpha=0.2, shape=21,show.legend = F)+scale_fill_manual(values=colors)+theme_classic()+
        stat_summary(fun=mean, fun.min=mean, fun.max=mean, geom="errorbar", size=0.5, width=0.3, color="black", ) + labs(y = "Pseudovirus IC50 (\u00b5g/mL)")+
        geom_text(data=data_gmean, aes(x=name, label=label), y=2.2, size=3.3)+
        # geom_text(data=df_test %>% filter(ref=="XBB.1.5"), aes(x=antigen, label=label), y=3.45, size=3.3)+
        annotate(geom = 'segment', x = 1, xend=length(unique(data$name)), y=2.4, yend=2.4)+
        ggtitle(paste(sg, g, prop, '(n = ', length(unique(data$id)), ')', sep=' '))+
        theme(axis.title=element_blank(), axis.text.x=element_text(angle=45, hjust=1, vjust=1), plot.title=element_text(size=11, hjust=0.4))

        print(p)
        dev.off()
    }
}

“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
