In [None]:
quiet_library <- function(...) {
    suppressPackageStartupMessages(library(...))
}
quiet_library(Seurat)
quiet_library(ggplot2)
quiet_library(Matrix)
quiet_library(H5weaver)
quiet_library(dplyr)
quiet_library(viridis)
quiet_library(harmony)
# quiet_library(Nebulosa)

In [None]:
t_cells <- readRDS('../../03_scRNA_Comparisons/Pipeline/clean_aging_t_cells.rds')

# Pipeline CD4NA

In [None]:
t_cells <- SetIdent(t_cells, value = 'predicted.celltype.l2')
cd4na_pipeline <- subset(t_cells, idents = 'CD4 Naive')
highcon_cd4na_pipeline <- subset(cd4na_pipeline, predicted.celltype.l2.score > 0.7)
table(highcon_cd4na_pipeline$age)

In [None]:
rm(t_cells)

## Clustering pipeline cd4na

In [None]:
highcon_cd4na_pipeline <- NormalizeData(highcon_cd4na_pipeline) %>% FindVariableFeatures() %>% ScaleData() %>% RunPCA()

In [None]:
ElbowPlot(highcon_cd4na_pipeline, ndims = 50)

In [None]:
highcon_cd4na_pipeline <- RunUMAP(highcon_cd4na_pipeline, dims = 1:10)

## Age UMAPs

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
DimPlot(highcon_cd4na_pipeline, group.by = 'age', cols = c('#d95f02','#1b9e77','#fb9a99'), shuffle = TRUE, raster = FALSE) & NoLegend()

In [None]:
umap_coords <- as.data.frame(highcon_cd4na_pipeline@reductions$umap@cell.embeddings)

In [None]:
head(umap_coords)

In [None]:
umap_df <- data.frame(umap1 = umap_coords$UMAP_1,
                      umap2 = umap_coords$UMAP_2,
                      age = highcon_cd4na_pipeline$age,
                      age2 = paste0(highcon_cd4na_pipeline$age,'_2'))

In [None]:
head(umap_df)

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
umap_df %>%
    mutate(age2 = factor(age2, levels = c('Pediatric_2','Older Adult_2','Young Adult_2'))) %>%
    mutate(age = factor(age, levels = c('Pediatric','Older Adult','Young Adult'))) %>%
    ggplot(aes(x=umap1, y=umap2, color=age)) +
        stat_density_2d(geom = 'polygon', aes(alpha = 0.1, fill = age2), bins = 4) +
        # geom_point(shape = '.') + 
        scale_fill_manual(values = c('#1b9e77','#d95f02','#9e9ac8')) + 
        scale_color_manual(values = c('#006837','#d73027','#807dba')) +
        theme_bw() + 
        theme(axis.text.x = element_blank(),
              axis.text.y = element_blank(),
              axis.title.y = element_blank(),
              axis.title.x = element_blank(),
              legend.position = 'none')

In [None]:
# # Open a pdf file
# pdf("plots/cd4na_pipeline_contour_umap.pdf", width = 4, height = 6) 
# # 2. Create a plot
# umap_df %>%
#     mutate(age2 = factor(age2, levels = c('Pediatric_2','Older Adult_2','Young Adult_2'))) %>%
#     mutate(age = factor(age, levels = c('Pediatric','Older Adult','Young Adult'))) %>%
#     ggplot(aes(x=umap1, y=umap2, color=age)) +
#         stat_density_2d(geom = 'polygon', aes(alpha = 0.1, fill = age2), bins = 4) +
#         # geom_point(shape = '.') + 
#         scale_fill_manual(values = c('#1b9e77','#d95f02','#9e9ac8')) + 
#         scale_color_manual(values = c('#006837','#d73027','#807dba')) +
#         theme_bw() + 
#         theme(axis.text.x = element_blank(),
#               axis.text.y = element_blank(),
#               axis.title.y = element_blank(),
#               axis.title.x = element_blank(),
#               legend.position = 'none')
# # Close the pdf file
# dev.off() 

## Gene exp heatmap

In [None]:
gene_list <- c('TSHZ2','CPQ','STAT4',
                   'TOX','TCF12','LEF1','BACH2','SOX4','IKZF2',
                   'FAM13A','BCL2','CDK6','IGF1R','NFKB1',
                   'NELL2','PABPC1','PDCD4','TGFBR2','PDE4D',
                   'PCNX1','INPP4B','DACH1','PLCB1','AOAH',
                  'AFF3')

In [None]:
pipe_avg_exp_data <- AverageExpression(object = highcon_cd4na_pipeline, assays = 'RNA', features = gene_list, group.by = 'age')
pipe_avg_exp_data <- as.data.frame(pipe_avg_exp_data)

In [None]:
head(pipe_avg_exp_data)

In [None]:
colnames(pipe_avg_exp_data) <- c('OA','Ped','YA')
head(pipe_avg_exp_data)

In [None]:
order_avg_exp <- data.frame('Ped' = pipe_avg_exp_data['Ped'],
                            'YA' = pipe_avg_exp_data['YA'],
                            'OA' = pipe_avg_exp_data['OA'])
head(order_avg_exp)

In [None]:
library(pheatmap)

In [None]:
options(repr.plot.width = 12, repr.plot.height = 3)
pipeline_heatmap <- pheatmap(t(order_avg_exp), cluster_rows = FALSE, cluster_cols = TRUE, scale = 'column', 
                         #annotation_col = cell_state_genes, annotation_colors = anno_colors,
                         color = colorRampPalette(c('#67a9cf','white','#d6604d'))(100))

In [None]:
save_pheatmap_pdf <- function(x, filename, width=12, height=3) {
   stopifnot(!missing(x))
   stopifnot(!missing(filename))
   pdf(filename, width=width, height=height)
   grid::grid.newpage()
   grid::grid.draw(x$gtable)
   dev.off()
}
save_pheatmap_pdf(pipeline_heatmap, "plots/expanded_gex_scRNAseq.pdf")

# Cord Blood Dataset

## Load Data

In [None]:
cb_ya_so <- readRDS('../../03_scRNA_Comparisons/CB/cb_ya_so.rds')

In [None]:
table(cb_ya_so$orig.ident)
cb_ya_so <- SetIdent(cb_ya_so, value = 'orig.ident')
cb_only_so <- subset(cb_ya_so, idents = c('/home/jupyter/published_datasets/cord_blood_young_adult/GSM4750306_F016/',
                                          '/home/jupyter/published_datasets/cord_blood_young_adult/GSM4750307_F017/',
                                          '/home/jupyter/published_datasets/cord_blood_young_adult/GSM4750308_F024/'))

In [None]:
cb_only_so <- SetIdent(cb_only_so, value = 'predicted.celltype.l2')
cb_only_cd4na <- subset(cb_only_so, idents = 'CD4 Naive')
cb_only_cd4na_highCon <- subset(cb_only_cd4na, subset = predicted.celltype.l2.score > 0.7)

In [None]:
cb_only_cd4na_highCon$age <- rep('Cord Blood',length(colnames(cb_only_cd4na_highCon[['RNA']])))
cb_only_cd4na_highCon$pbmc_sample_id <- cb_only_cd4na_highCon$orig.ident
cb_only_cd4na_highCon

## Integration Clustering

In [None]:
# normalize and identify variable features for each dataset independently
cd4na.list <- lapply(X = list(cb_only_cd4na_highCon, highcon_cd4na_pipeline), FUN = function(x) {
    DefaultAssay(x) <- 'RNA'
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

# select features that are repeatedly variable across datasets for integration
features <- SelectIntegrationFeatures(object.list = cd4na.list)

In [None]:
immune.anchors <- FindIntegrationAnchors(object.list = cd4na.list, anchor.features = features)

In [None]:
# this command creates an 'integrated' data assay
immune.combined <- IntegrateData(anchorset = immune.anchors)

In [None]:
# specify that we will perform downstream analysis on the corrected data note that the
# original unmodified data still resides in the 'RNA' assay
DefaultAssay(immune.combined) <- "integrated"

# Run the standard workflow for visualization and clustering
immune.combined <- ScaleData(immune.combined, verbose = FALSE)
immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE)
immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:10)
# immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:30)
# immune.combined <- FindClusters(immune.combined, resolution = 0.5)

## Plots

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
DimPlot(immune.combined, group.by = 'age', shuffle = TRUE, raster = FALSE)

In [None]:
umap_coords <- as.data.frame(immune.combined@reductions$umap@cell.embeddings)

In [None]:
head(umap_coords)

In [None]:
umap_df <- data.frame(umap1 = umap_coords$UMAP_1,
                      umap2 = umap_coords$UMAP_2,
                      age = immune.combined$age,
                      age2 = paste0(immune.combined$age,'_2'))

In [None]:
head(umap_df)

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
umap_df %>%
    mutate(age2 = factor(age2, levels = c('Pediatric_2','Cord Blood_2','Older Adult_2','Young Adult_2'))) %>%
    mutate(age = factor(age, levels = c('Pediatric','Cord Blood','Older Adult','Young Adult'))) %>%
    ggplot(aes(x=umap1, y=umap2, color=age)) +
        stat_density_2d(geom = 'polygon', aes(alpha = 0.1, fill = age2), bins = 4) +
        # geom_point(shape = '.') + 
        scale_fill_manual(values = c('#1b9e77','#fa9fb5','#d95f02','#9e9ac8')) + 
        scale_color_manual(values = c('#006837','#f768a1','#d73027','#807dba')) +
        theme_bw() + 
        theme(axis.text.x = element_blank(),
              axis.text.y = element_blank(),
              axis.title.y = element_blank(),
              axis.title.x = element_blank(),
              legend.position = 'none')

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
umap_df %>%
    mutate(age2 = factor(age2, levels = c('Pediatric_2','Cord Blood_2','Older Adult_2','Young Adult_2'))) %>%
    mutate(age = factor(age, levels = c('Pediatric','Cord Blood','Older Adult','Young Adult'))) %>%
    ggplot(aes(umap1, umap2)) + 
        geom_density_2d(aes(color = age)) +
        stat_density_2d(aes(alpha = ..level.., fill=age), bins = 6, geom='polygon') +
        scale_fill_manual(values = c('#1b9e77','#fa9fb5','#d95f02','#9e9ac8')) + 
        scale_color_manual(values = c('#006837','#f768a1','#d73027','#807dba')) +
        xlim(-6,7.5) +
        ylim(-6,6) +
        theme_bw() + 
        theme(axis.text.x = element_blank(),
              axis.text.y = element_blank(),
              axis.title.y = element_blank(),
              axis.title.x = element_blank(),
              legend.position = 'none')

In [None]:
# # Open a pdf file
# pdf("plots/cd4na_pipeline_CB_contour_umap.pdf", width = 10, height = 10) 
# # 2. Create a plot
# umap_df %>%
#     mutate(age2 = factor(age2, levels = c('Pediatric_2','Cord Blood_2','Older Adult_2','Young Adult_2'))) %>%
#     mutate(age = factor(age, levels = c('Pediatric','Cord Blood','Older Adult','Young Adult'))) %>%
#     ggplot(aes(umap1, umap2)) + 
#         geom_density_2d(aes(color = age)) +
#         stat_density_2d(aes(alpha = ..level.., fill=age), bins = 6, geom='polygon') +
#         scale_fill_manual(values = c('#1b9e77','#fa9fb5','#d95f02','#9e9ac8')) + 
#         scale_color_manual(values = c('#006837','#f768a1','#d73027','#807dba')) +
#         xlim(-6,7.5) +
#         ylim(-6,6) +
#         theme_bw() + 
#         theme(axis.text.x = element_blank(),
#               axis.text.y = element_blank(),
#               axis.title.y = element_blank(),
#               axis.title.x = element_blank(),
#               legend.position = 'none')
# # Close the pdf file
# dev.off() 

In [None]:
options(repr.plot.width = 20, repr.plot.height = 10)
VlnPlot(immune.combined, features = 'rna_DDX3Y', group.by = 'pbmc_sample_id', pt.size = 0) & NoLegend()
VlnPlot(immune.combined, features = 'rna_RPS4Y1', group.by = 'pbmc_sample_id', pt.size = 0) & NoLegend()
VlnPlot(immune.combined, features = 'rna_DDX3X', group.by = 'pbmc_sample_id', pt.size = 0) & NoLegend()

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
umap_df %>%
    mutate(age2 = factor(age2, levels = c('Pediatric_2','Cord Blood_2','Older Adult_2','Young Adult_2'))) %>%
    mutate(age = factor(age, levels = c('Pediatric','Cord Blood','Older Adult','Young Adult'))) %>%
    ggplot(aes(x=umap1, y=umap2, color=age)) +
        stat_density_2d(geom = 'polygon', aes(alpha = 0.1, fill = age2), bins = 4) +
        # geom_point(shape = '.') + 
        facet_wrap(vars(age)) + 
        scale_fill_manual(values = c('#1b9e77','#fa9fb5','#d95f02','#9e9ac8')) + 
        scale_color_manual(values = c('#006837','#f768a1','#d73027','#807dba')) +
        theme_bw() + 
        theme(axis.text.x = element_blank(),
              axis.text.y = element_blank(),
              axis.title.y = element_blank(),
              axis.title.x = element_blank(),
              legend.position = 'none')

# Expression Data

In [None]:
DefaultAssay(immune.combined) <- 'RNA'
immune.combined <- NormalizeData(immune.combined, assay = 'RNA') %>% FindVariableFeatures() %>% ScaleData(features = rownames(immune.combined[['RNA']]))

In [None]:
cb_avg_exp_data <- AverageExpression(object = immune.combined, assays = 'RNA', features = c('TOX','CPQ','STAT4','TSHZ2','SOX4'), group.by = 'pbmc_sample_id')
cb_avg_exp_data <- as.data.frame(cb_avg_exp_data)
cb_avg_exp_data

In [None]:
immune.combined <- SetIdent(immune.combined, value = 'age')
immune.combined <- RenameIdents(immune.combined,
                                 'Cord Blood' = 'CB',
                                 'Pediatric' = 'Ped',
                                 'Young Adult' = 'YA',
                                 'Older Adult' = 'OA')
immune.combined$age_short <- Idents(immune.combined)

## Plot

In [None]:
t_avg_exp <- t(cb_avg_exp_data)
head(t_avg_exp)

In [None]:
meta <- immune.combined@meta.data

In [None]:
meta_df <- data.frame('sampleID' = meta$pbmc_sample_id,
                         'age' = meta$age_short)
head(meta_df)

In [None]:
unique_df <- unique(meta_df)

In [None]:
unique_df$sampleID <- paste0('RNA.',unique_df$sampleID)
head(unique_df)

In [None]:
unique_df$sampleID <- gsub('-','.',unique_df$sampleID)
unique_df$sampleID <- gsub('/','.',unique_df$sampleID)
head(unique_df)

In [None]:
t_avg_exp <- as.data.frame(t_avg_exp)
t_avg_exp$sampleID <- rownames(t_avg_exp)
head(t_avg_exp)

In [None]:
final_df <- left_join(t_avg_exp, unique_df, by = 'sampleID')
head(final_df)

In [None]:
joined_counts <- read.csv(file = '../Figure4/mnp2_scrna_seq_perc_age.csv', row.names = 1)
trim_age <- joined_counts[,c(1,3)]
trim_age$pbmc_sample_id <- gsub('-','.',trim_age$pbmc_sample_id)
trim_age$pbmc_sample_id <- paste0('RNA.',trim_age$pbmc_sample_id)
colnames(trim_age) <- c('sampleID','actual_age')
head(trim_age)

In [None]:
final_df <- left_join(x = final_df, y = trim_age, by = 'sampleID')
head(final_df)

In [None]:
write.csv(final_df, file = 'cb_avg_exp_data_cd4na_updated.csv')

In [None]:
final_df_2 <- read.csv('cb_avg_exp_data_cd4na_updated2.csv')

In [None]:
options(repr.plot.width = 8, repr.plot.height = 4)
ggplot(final_df_2, aes(x=actual_age, y=TOX)) + 
    geom_point(size = 3, aes(color=age)) + 
    geom_smooth(formula=y ~ x^2,alpha=.15, se = TRUE, color='darkgrey') + scale_y_log10() +
    scale_color_manual(values=c('#fa9fb5','#d95f02','#1b9e77','#9e9ac8')) + 
    xlab('Age') + ylab('AvgExp') +
    theme_bw() + 
    theme(axis.text.x = element_text(size = 15),
          axis.text.y = element_text(size = 15),
          axis.title.y = element_text(size = 20),
          axis.title.x = element_text(size = 20),
          # legend.key.size = unit(2, 'cm'), #change legend key size
          # legend.key.height = unit(1, 'cm'), #change legend key height
          # legend.key.width = unit(1, 'cm'), #change legend key width
          legend.title = element_text(size=20), #change legend title font size
          legend.text = element_text(size=15))

In [None]:
# Open a pdf file
pdf("plots/cd4na_pipeline_CB_TOX.pdf", width = 8, height = 4) 
# 2. Create a plot
ggplot(final_df_2, aes(x=actual_age, y=TOX)) + 
    geom_point(size = 3, aes(color=age)) + 
    geom_smooth(formula=y ~ x^2,alpha=.15, se = TRUE, color='darkgrey') + scale_y_log10() +
    scale_color_manual(values=c('#fa9fb5','#d95f02','#1b9e77','#9e9ac8')) + 
    xlab('Age') + ylab('AvgExp') +
    theme_bw() + 
    theme(axis.text.x = element_text(size = 15),
          axis.text.y = element_text(size = 15),
          axis.title.y = element_text(size = 20),
          axis.title.x = element_text(size = 20),
          # legend.key.size = unit(2, 'cm'), #change legend key size
          # legend.key.height = unit(1, 'cm'), #change legend key height
          # legend.key.width = unit(1, 'cm'), #change legend key width
          legend.title = element_text(size=20), #change legend title font size
          legend.text = element_text(size=15))
# Close the pdf file
dev.off() 

In [None]:
options(repr.plot.width = 8, repr.plot.height = 4)
ggplot(final_df_2, aes(x=actual_age, y=CPQ)) + 
    geom_point(size = 3, aes(color=age)) + 
    geom_smooth(formula=y ~ x^2,alpha=.15, se = TRUE, color='darkgrey') + 
    scale_color_manual(values=c('#fa9fb5','#d95f02','#1b9e77','#9e9ac8')) + 
    xlab('Age') + ylab('AvgExp') +
    theme_bw() + 
    theme(axis.text.x = element_text(size = 15),
          axis.text.y = element_text(size = 15),
          axis.title.y = element_text(size = 20),
          axis.title.x = element_text(size = 20),
          # legend.key.size = unit(2, 'cm'), #change legend key size
          # legend.key.height = unit(1, 'cm'), #change legend key height
          # legend.key.width = unit(1, 'cm'), #change legend key width
          legend.title = element_text(size=20), #change legend title font size
          legend.text = element_text(size=15))

In [None]:
# Open a pdf file
pdf("plots/cd4na_pipeline_CB_CPQ.pdf", width = 8, height = 4) 
# 2. Create a plot
ggplot(final_df_2, aes(x=actual_age, y=CPQ)) + 
    geom_point(size = 3, aes(color=age)) + 
    geom_smooth(formula=y ~ x^2,alpha=.15, se = TRUE, color='darkgrey') + 
    scale_color_manual(values=c('#fa9fb5','#d95f02','#1b9e77','#9e9ac8')) + 
    xlab('Age') + ylab('AvgExp') +
    theme_bw() + 
    theme(axis.text.x = element_text(size = 15),
          axis.text.y = element_text(size = 15),
          axis.title.y = element_text(size = 20),
          axis.title.x = element_text(size = 20),
          # legend.key.size = unit(2, 'cm'), #change legend key size
          # legend.key.height = unit(1, 'cm'), #change legend key height
          # legend.key.width = unit(1, 'cm'), #change legend key width
          legend.title = element_text(size=20), #change legend title font size
          legend.text = element_text(size=15))
# Close the pdf file
dev.off() 

In [None]:
ggplot(final_df_2, aes(x=actual_age, y=STAT4)) + 
    geom_point(size = 3, aes(color=age)) + 
    geom_smooth(formula=y ~ x^2,alpha=.15, se = TRUE, color='darkgrey') + 
    scale_color_manual(values=c('#fa9fb5','#d95f02','#1b9e77','#9e9ac8')) + 
    xlab('Age') + ylab('AvgExp') +
    theme_bw() + 
    theme(axis.text.x = element_text(size = 15),
          axis.text.y = element_text(size = 15),
          axis.title.y = element_text(size = 20),
          axis.title.x = element_text(size = 20),
          # legend.key.size = unit(2, 'cm'), #change legend key size
          # legend.key.height = unit(1, 'cm'), #change legend key height
          # legend.key.width = unit(1, 'cm'), #change legend key width
          legend.title = element_text(size=20), #change legend title font size
          legend.text = element_text(size=15))

In [None]:
# Open a pdf file
pdf("plots/cd4na_pipeline_CB_STAT4.pdf", width = 8, height = 4) 
# 2. Create a plot
ggplot(final_df_2, aes(x=actual_age, y=STAT4)) + 
    geom_point(size = 3, aes(color=age)) + 
    geom_smooth(formula=y ~ x^2,alpha=.15, se = TRUE, color='darkgrey') + 
    scale_color_manual(values=c('#fa9fb5','#d95f02','#1b9e77','#9e9ac8')) + 
    xlab('Age') + ylab('AvgExp') +
    theme_bw() + 
    theme(axis.text.x = element_text(size = 15),
          axis.text.y = element_text(size = 15),
          axis.title.y = element_text(size = 20),
          axis.title.x = element_text(size = 20),
          # legend.key.size = unit(2, 'cm'), #change legend key size
          # legend.key.height = unit(1, 'cm'), #change legend key height
          # legend.key.width = unit(1, 'cm'), #change legend key width
          legend.title = element_text(size=20), #change legend title font size
          legend.text = element_text(size=15))
# Close the pdf file
dev.off() 

In [None]:
ggplot(final_df_2, aes(x=actual_age, y=SOX4)) + 
    geom_point(size = 3, aes(color=age)) + 
    geom_smooth(formula=y ~ x^2,alpha=.15, se = TRUE, color = 'darkgrey') + scale_y_log10() +
    scale_color_manual(values=c('#fa9fb5','#d95f02','#1b9e77','#9e9ac8')) + 
    xlab('Age') + ylab('AvgExp') +
    theme_bw() + 
    theme(axis.text.x = element_text(size = 15),
          axis.text.y = element_text(size = 15),
          axis.title.y = element_text(size = 20),
          axis.title.x = element_text(size = 20),
          # legend.key.size = unit(2, 'cm'), #change legend key size
          # legend.key.height = unit(1, 'cm'), #change legend key height
          # legend.key.width = unit(1, 'cm'), #change legend key width
          legend.title = element_text(size=20), #change legend title font size
          legend.text = element_text(size=15))

In [None]:
# Open a pdf file
pdf("plots/cd4na_pipeline_CB_SOX4.pdf", width = 8, height = 4) 
# 2. Create a plot
ggplot(final_df_2, aes(x=actual_age, y=SOX4)) + 
    geom_point(size = 3, aes(color=age)) + 
    geom_smooth(formula=y ~ x^2,alpha=.15, se = TRUE, color = 'darkgrey') + scale_y_log10() +
    scale_color_manual(values=c('#fa9fb5','#d95f02','#1b9e77','#9e9ac8')) + 
    xlab('Age') + ylab('AvgExp') +
    theme_bw() + 
    theme(axis.text.x = element_text(size = 15),
          axis.text.y = element_text(size = 15),
          axis.title.y = element_text(size = 20),
          axis.title.x = element_text(size = 20),
          # legend.key.size = unit(2, 'cm'), #change legend key size
          # legend.key.height = unit(1, 'cm'), #change legend key height
          # legend.key.width = unit(1, 'cm'), #change legend key width
          legend.title = element_text(size=20), #change legend title font size
          legend.text = element_text(size=15))
# Close the pdf file
dev.off() 

In [None]:
ggplot(final_df_2, aes(x=actual_age, y=TSHZ2)) + 
    geom_point(size = 3, aes(color=age)) + 
    geom_smooth(formula=y ~ x^2,alpha=.15, se = TRUE, color = 'darkgrey') + #scale_y_log10() +
    scale_color_manual(values=c('#fa9fb5','#d95f02','#1b9e77','#9e9ac8')) + 
    xlab('Age') + ylab('AvgExp') +
    theme_bw() + 
    theme(axis.text.x = element_text(size = 15),
          axis.text.y = element_text(size = 15),
          axis.title.y = element_text(size = 20),
          axis.title.x = element_text(size = 20),
          # legend.key.size = unit(2, 'cm'), #change legend key size
          # legend.key.height = unit(1, 'cm'), #change legend key height
          # legend.key.width = unit(1, 'cm'), #change legend key width
          legend.title = element_text(size=20), #change legend title font size
          legend.text = element_text(size=15))

In [None]:
# Open a pdf file
pdf("plots/cd4na_pipeline_CB_TSHZ2.pdf", width = 8, height = 4) 
# 2. Create a plot
ggplot(final_df_2, aes(x=actual_age, y=TSHZ2)) + 
    geom_point(size = 3, aes(color=age)) + 
    geom_smooth(formula=y ~ x^2,alpha=.15, se = TRUE, color = 'darkgrey') + #scale_y_log10() +
    scale_color_manual(values=c('#fa9fb5','#d95f02','#1b9e77','#9e9ac8')) + 
    xlab('Age') + ylab('AvgExp') +
    theme_bw() + 
    theme(axis.text.x = element_text(size = 15),
          axis.text.y = element_text(size = 15),
          axis.title.y = element_text(size = 20),
          axis.title.x = element_text(size = 20),
          # legend.key.size = unit(2, 'cm'), #change legend key size
          # legend.key.height = unit(1, 'cm'), #change legend key height
          # legend.key.width = unit(1, 'cm'), #change legend key width
          legend.title = element_text(size=20), #change legend title font size
          legend.text = element_text(size=15))
# Close the pdf file
dev.off() 

In [None]:
options(repr.plot.width = 4, repr.plot.height = 6)
final_df %>% 
    mutate(age = factor(age, levels = c('CB','Ped','YA','OA'))) %>%
        ggplot() + 
        geom_boxplot(aes(x=age, y=TOX, fill=age)) +
        scale_fill_manual(values=c('#3182bd','#1b9e77','#de77ae','#d95f02')) +
        geom_jitter(aes(x=age, y=TOX), color='black', size=1, width=0.2) +
        theme(axis.text.x = element_text(size = 20),
              axis.text.y = element_text(size = 20),
              axis.title.x = element_blank(),
              axis.title.y = element_blank(),
              panel.grid.major = element_blank(), 
              panel.grid.minor = element_blank(),
              panel.background = element_blank(),
              axis.line = element_line(colour = "black"),
              legend.position = 'none')

In [None]:
# Open a pdf file
pdf("plots/cd4na_pipeline_TOX.pdf", width = 4, height = 6) 
# 2. Create a plot
final_df %>% 
    mutate(age = factor(age, levels = c('CB','Ped','YA','OA'))) %>%
        ggplot() + 
        geom_boxplot(aes(x=age, y=TOX, fill=age)) +
        scale_fill_manual(values=c('#3182bd','#1b9e77','#de77ae','#d95f02')) +
        geom_jitter(aes(x=age, y=TOX), color='black', size=1, width=0.2) +
        theme(axis.text.x = element_text(size = 20),
              axis.text.y = element_text(size = 20),
              axis.title.x = element_blank(),
              axis.title.y = element_blank(),
              panel.grid.major = element_blank(), 
              panel.grid.minor = element_blank(),
              panel.background = element_blank(),
              axis.line = element_line(colour = "black"),
              legend.position = 'none')
# Close the pdf file
dev.off() 

In [None]:
options(repr.plot.width = 4, repr.plot.height = 6)
final_df %>% 
    mutate(age = factor(age, levels = c('CB','Ped','YA','OA'))) %>%
        ggplot() + 
        geom_boxplot(aes(x=age, y=CPQ, fill=age)) +
        scale_fill_manual(values=c('#3182bd','#1b9e77','#de77ae','#d95f02')) +
        geom_jitter(aes(x=age, y=CPQ), color='black', size=1, width=0.2) +
        theme(axis.text.x = element_text(size = 10),
              axis.text.y = element_text(size = 20),
              axis.title.x = element_blank(),
              axis.title.y = element_blank(),
              panel.grid.major = element_blank(), 
              panel.grid.minor = element_blank(),
              panel.background = element_blank(),
              axis.line = element_line(colour = "black"),
              legend.position = 'none')

In [None]:
# Open a pdf file
pdf("plots/cd4na_pipeline_CPQ.pdf", width = 4, height = 6) 
# 2. Create a plot
final_df %>% 
    mutate(age = factor(age, levels = c('CB','Ped','YA','OA'))) %>%
        ggplot() + 
        geom_boxplot(aes(x=age, y=CPQ, fill=age)) +
        scale_fill_manual(values=c('#3182bd','#1b9e77','#de77ae','#d95f02')) +
        geom_jitter(aes(x=age, y=CPQ), color='black', size=1, width=0.2) +
        theme(axis.text.x = element_text(size = 20),
              axis.text.y = element_text(size = 20),
              axis.title.x = element_blank(),
              axis.title.y = element_blank(),
              panel.grid.major = element_blank(), 
              panel.grid.minor = element_blank(),
              panel.background = element_blank(),
              axis.line = element_line(colour = "black"),
              legend.position = 'none')
# Close the pdf file
dev.off() 

In [None]:
options(repr.plot.width = 4, repr.plot.height = 6)
final_df %>% 
    mutate(age = factor(age, levels = c('CB','Ped','YA','OA'))) %>%
        ggplot() + 
        geom_boxplot(aes(x=age, y=SOX4, fill=age)) +
        scale_fill_manual(values=c('#3182bd','#1b9e77','#de77ae','#d95f02')) +
        geom_jitter(aes(x=age, y=SOX4), color='black', size=1, width=0.2) +
        theme(axis.text.x = element_text(size = 20),
              axis.text.y = element_text(size = 20),
              axis.title.x = element_blank(),
              axis.title.y = element_blank(),
              panel.grid.major = element_blank(), 
              panel.grid.minor = element_blank(),
              panel.background = element_blank(),
              axis.line = element_line(colour = "black"),
              legend.position = 'none')

In [None]:
# Open a pdf file
pdf("plots/cd4na_pipeline_SOX4.pdf", width = 4, height = 6) 
# 2. Create a plot
final_df %>% 
    mutate(age = factor(age, levels = c('CB','Ped','YA','OA'))) %>%
        ggplot() + 
        geom_boxplot(aes(x=age, y=SOX4, fill=age)) +
        scale_fill_manual(values=c('#3182bd','#1b9e77','#de77ae','#d95f02')) +
        geom_jitter(aes(x=age, y=SOX4), color='black', size=1, width=0.2) +
        theme(axis.text.x = element_text(size = 20),
              axis.text.y = element_text(size = 20),
              axis.title.x = element_blank(),
              axis.title.y = element_blank(),
              panel.grid.major = element_blank(), 
              panel.grid.minor = element_blank(),
              panel.background = element_blank(),
              axis.line = element_line(colour = "black"),
              legend.position = 'none')
# Close the pdf file
dev.off() 

In [None]:
options(repr.plot.width = 4, repr.plot.height = 6)
final_df %>% 
    mutate(age = factor(age, levels = c('CB','Ped','YA','OA'))) %>%
        ggplot() + 
        geom_boxplot(aes(x=age, y=STAT4, fill=age)) +
        scale_fill_manual(values=c('#3182bd','#1b9e77','#de77ae','#d95f02')) +
        geom_jitter(aes(x=age, y=STAT4), color='black', size=1, width=0.2) +
        theme(axis.text.x = element_text(size = 20),
              axis.text.y = element_text(size = 20),
              axis.title.x = element_blank(),
              axis.title.y = element_blank(),
              panel.grid.major = element_blank(), 
              panel.grid.minor = element_blank(),
              panel.background = element_blank(),
              axis.line = element_line(colour = "black"),
              legend.position = 'none')

In [None]:
# Open a pdf file
pdf("plots/cd4na_pipeline_STAT4.pdf", width = 4, height = 6) 
# 2. Create a plot
final_df %>% 
    mutate(age = factor(age, levels = c('CB','Ped','YA','OA'))) %>%
        ggplot() + 
        geom_boxplot(aes(x=age, y=STAT4, fill=age)) +
        scale_fill_manual(values=c('#3182bd','#1b9e77','#de77ae','#d95f02')) +
        geom_jitter(aes(x=age, y=STAT4), color='black', size=1, width=0.2) +
        theme(axis.text.x = element_text(size = 20),
              axis.text.y = element_text(size = 20),
              axis.title.x = element_blank(),
              axis.title.y = element_blank(),
              panel.grid.major = element_blank(), 
              panel.grid.minor = element_blank(),
              panel.background = element_blank(),
              axis.line = element_line(colour = "black"),
              legend.position = 'none')
# Close the pdf file
dev.off() 