**Author:** Elias Rafael Ruiz-Morales

**Institution:** Wellcome Sanger institute

**July, 2023**

---

# Creation of tables to generate differential gene expression heatmaps - Fibroblasts and Endothelial

In [1]:
recoverPvalLog2FC <- function(files, columns, genes, outdir, suffix){
    # Function to get the Pvalues and log2FC values from the Differential expression results.
    # Useful to make plots for downstream analysis.
    
    # files= files from the differential expression results
    # columns= column names, each column name MUST MATCH to a file in the same position
    # genes= query genes to recover their values
    # outdir= output directory
    # suffix= suffix to add to the file names.
    #-------------------------------------------
    
    
    ## Building the OUTPUT dataframes
    ##

    #dimensions= columns * genes
    pval= matrix(data = rep(x = NA,(length(columns)*length(genes))),ncol = length(columns) )
    rownames(pval)=genes
    colnames(pval)=columns

    #Same structure for the log2FC datafram
    log2FC= pval
    
    ## Filling up the empty data frames
    ##
    for(i in 1:length(columns)){

        #column and file names that MUST be in the same position in both vectors

        file= files[i] #DE file to read
        column=columns[i] #column to fill

        #reading DE dataframe
        df= read.csv(file, sep = '\t')
        rownames(df)=df[,1] #rownames (genes) and deleting extra column with the rownames
        df[,1]=NULL


        ## Filling up the column, gene by gene
        ##
        for(gene in genes){  
            #checking if a gene is in the DE dataframe 
            if (gene%in%rownames(df)){

                    pval[gene,column]= df[gene,'p_val_adj']
                    log2FC[gene,column]= df[gene,'logFC']   

   
            }
            else{
                #gene is not in DE dataframe, place a pvalue=1 and log2FC=0
                pval[gene,column]= 1
                log2FC[gene,column]=0
            }
        }
    }

    if(missing(suffix)){
        file_name= paste0(outdir,'padj_selectedGenes.csv', collapse = '')
        write.csv(pval, file=file_name, quote=FALSE, row.names=TRUE)
        
        file_name= paste0(outdir,'log2FC_selectedGenes.csv', collapse = '')
        write.csv(log2FC, file=file_name, quote=FALSE, row.names=TRUE)
    }
    else{
        file_name= paste0(outdir,'padj_selectedGenes',suffix, collapse = '')
        write.csv(pval,file=file_name, quote=FALSE, row.names=TRUE)
        
        file_name= paste0(outdir,'log2FC_selectedGenes',suffix, collapse = '')
        write.csv(log2FC,file=file_name, quote=FALSE, row.names=TRUE)
    }
  
    
}


In [7]:
#files containing the tables with the DEGs from each cell type per infection
files_endoPV= c(
    '../results/SC_limma_fibroEndo_MAL_24h/3_DEGs_SC_MAL_24h_Endof.tsv',
    '../results/SC_limma_fibroEndo_MAL_24h/2_DEGs_SC_MAL_24h_PV.tsv',
         
    '../results/SC_limma_fibroEndo_LIS_24h/3_DEGs_SC_LIS_24h_Endof.tsv',
    '../results/SC_limma_fibroEndo_LIS_24h/2_DEGs_SC_LIS_24h_PV.tsv',
         
    '../results/SC_limma_fibroEndo_TOX_24h/3_DEGs_SC_TOX_24h_Endof.tsv',
    '../results/SC_limma_fibroEndo_TOX_24h/2_DEGs_SC_TOX_24h_PV.tsv'
        )


#cell types per infections that will be plot. This MUST match with the files above
columns_endoPV=c("PF_Endof","PF_PV",
          "LM_Endof","LM_PV",
          "TG_Endof","TG_PV")

In [8]:
#files containing the tables with the DEGs from each cell type per infection
files_F= c('../results/SC_limma_fibroEndo_MAL_24h/1_DEGs_SC_MAL_24h_F.tsv',
         '../results/SC_limma_fibroEndo_LIS_24h/1_DEGs_SC_LIS_24h_F.tsv',
         '../results/SC_limma_fibroEndo_TOX_24h/1_DEGs_SC_TOX_24h_F.tsv'
        )


#cell types per infections that will be plot. This MUST match with the files above
columns_F=c("PF_F","LM_F","TG_F")

-----

## Inflammation genes shared among compartments, across infections - Figure 2

In [4]:
genes_input= c(
    #--- Inflammation
    "IL1B","CCL3","CCL4","CCL20","CXCL8","CXCL3","PTGS2"
)
recoverPvalLog2FC(files_endoPV, columns_endoPV, genes=genes_input, outdir='../results/tables_limma/', suffix = '_selectedSharedInflammation_EndoPV.csv')

In [5]:
recoverPvalLog2FC(files_F, columns_F, genes=genes_input, outdir='../results/tables_limma/', suffix = '_selectedSharedInflammation_F.csv')

In [None]:
#"IL1A","CCL2","CCL5","CXCL1","CXCL2","CXCL5","IL33","CCL3L1"

## HMOX1 gene, specific to P. falciparum infection - Figure 3

In [10]:
genes_input= c(
    "HMOX1"
)
recoverPvalLog2FC(files_endoPV, columns_endoPV, genes=genes_input, outdir='../results/tables_limma/', suffix = '_HMOX_EndoPV.csv')

recoverPvalLog2FC(files_F, columns_F, genes=genes_input, outdir='../results/tables_limma/', suffix = '_HMOX_F.csv')

## Angiogenesis genes - Figure Supplementary 5

In [34]:
genes_input= c(
    #--- Angiogenesis
    "ADM","VEGFA","ANGPTL4","ITGA5","NINJ1","HSPG2","PXDN"
)
recoverPvalLog2FC(files_endoPV, columns_endoPV, genes=genes_input, outdir='../results/tables_limma/', suffix = '_selectedSharedAngiogenesis_EndoPV.csv')



In [35]:
recoverPvalLog2FC(files_F, columns_F, genes=genes_input, outdir='../results/tables_limma/', suffix = '_selectedSharedAngiogenesis_F.csv')

## IL1-R subunit genes - Figure 4

In [12]:
genes_input= c(
    "IL1R1","IL1RAP"
)
recoverPvalLog2FC(files_endoPV, columns_endoPV, genes=genes_input, outdir='../results/tables_limma/', suffix = '_IL1R_EndoPV.csv')
recoverPvalLog2FC(files_F, columns_F, genes=genes_input, outdir='../results/tables_limma/', suffix = '_IL1R_f.csv')

In [2]:
sessionInfo()

R version 4.0.4 (2021-02-15)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.1 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

loaded via a namespace (and not attached):
 [1] fansi_1.0.4       digest_0.6.33     utf8_1.2.3        crayon_1.5.2     
 [5] IRdisplay_1.0     repr_1.1.3        lifecycle_1.0.3   jsonlite_1.8.7   
 [9] evaluate_0.21     pillar_1.5.1      rlang_1.1.1       cli_3.6.1        
[13] uuid_0.1-4    