In [115]:
library(tidyverse)
library(ggrepel)
library(ggrastr)
library(ggplot2)

dev.off()
pdf("../plots/Extended/DMS/RBD_expression/compare.pdf", width=3, height=3)

df_merge <- read.csv("../data/DMS/RBD_expression/_JN1_DMS_single_mutation_expr.csv")

for (col in c("expr", "expr_single")) {
    correlation <- cor.test(df_merge[,paste0(col, "_lib1")], df_merge[,paste0(col, "_lib2")], method = "pearson")
    
    print(paste0("R=",signif(correlation$estimate[['cor']], 3)))
    print(paste0("p=",correlation$p.value))
    
    print(
        ggplot(df_merge, aes_string(x=paste0(col, "_lib1"), y=paste0(col, "_lib2"))) +
            geom_point_rast(shape=21, alpha=0.5) +
            geom_point_rast(data=df_merge[is.na(df_merge$expr_avg),], color="red") + 
            # geom_text_repel(aes(label=mutant), max.overlaps = 15, min.segment.length = 0)+
            theme_classic() +
            annotate(geom = 'text', x = -2, y=0.5, color='red', label=paste0("R=",signif(correlation$estimate[['cor']], 3), "\np < 2.2e-16"), hjust=0) + 
            theme(aspect.ratio=1.0) +
            xlab(paste0(col, " JN.1 lib1 sort-seq")) +
            ylab(paste0(col, " JN.1 lib2 sort-seq"))
    )
}

df1 <- read.csv("../data/DMS/TStarrLab_RBD_DMS_scores.csv") %>% filter(target %in% c("Omicron_BA2", "Omicron_XBB15"))
df2 <- read.csv("../data/DMS/RBD_expression/_JN1_DMS_single_mutation_expr.csv")
print(nrow(df_merge))

for (tg in c("BA2", "XBB15")) {
df_merge <- merge(df1 %>% filter(target == paste0("Omicron_", tg)), df2, by.x="mutation", by.y="mutant", all=TRUE)
correlation <- cor.test(df_merge$delta_expr, df_merge$expr_avg, method = "pearson")
print(paste0("R=",signif(correlation$estimate[['cor']], 3)))
print(paste0("p=",correlation$p.value))

print(
    ggplot(df_merge, aes(x=delta_expr, y=expr_avg)) +
        geom_point_rast(shape=21, alpha=0.5) +
        # geom_text_repel(aes(label=mutation), max.overlaps = 15, min.segment.length = 0)+
        geom_point_rast(data=df_merge[is.na(df_merge$expr_avg),], color="red") + 
        theme_classic() +
        annotate(geom = 'text', x = -2, y=0.5, color='red', label=paste0("R=",signif(correlation$estimate[['cor']], 3), "\np < 2.2e-16"), hjust=0) + 
        theme(aspect.ratio=1.0) +
        xlab(paste0(tg, "RBD expression (T. Starr Lab)")) +
        ylab("JN.1 RBD expression")
)
}
dev.off()

[1] "R=0.874"
[1] "p=0"


“[1m[22mRemoved 15 rows containing missing values or values outside the scale range (`geom_point()`).”


[1] "R=0.901"
[1] "p=0"


“[1m[22mRemoved 772 rows containing missing values or values outside the scale range (`geom_point()`).”


[1] 4196
[1] "R=0.842"
[1] "p=0"


“[1m[22mRemoved 1003 rows containing missing values or values outside the scale range (`geom_point()`).”
“[1m[22mRemoved 505 rows containing missing values or values outside the scale range (`geom_point()`).”


[1] "R=0.877"
[1] "p=0"


“[1m[22mRemoved 906 rows containing missing values or values outside the scale range (`geom_point()`).”
“[1m[22mRemoved 465 rows containing missing values or values outside the scale range (`geom_point()`).”


In [114]:
library(pheatmap)
library(circlize)

df <- read.csv("../data/DMS/RBD_expression/_JN1_DMS_single_mutation_expr.csv")[, c("site", "mutation", "expr_avg")] %>% filter(mutation != "*") %>% pivot_wider(id_cols=site, names_from = mutation, values_from = expr_avg)
df_mat <- as.matrix(df[,-1])
rownames(df_mat) <- df$site

min_val = min(df_mat, na.rm = T)
max_val = max(df_mat, na.rm = T)

colors = colorRamp2(c(min_val, 0, max_val), c("#AA4031", "#FFFFFC", "#93A8F0"))(seq(min_val, max_val,0.05))

dev.off()
pdf("../plots/Extended/DMS/RBD_expression/heatmap.pdf", width=18, height=3)
pheatmap(t(df_mat), cluster_cols = F, cluster_rows = F, color = colors)
dev.off()