# create figures for med4 DE long term 

In [1]:
library(DESeq2)
library(RColorBrewer)
library(reshape2)
library("gplots")
library("GGally")                      # Load GGally package
library(goseq)
library(readxl)
library(tidyverse)
library("pheatmap")
library(corrplot)
library(circlize)

Loading required package: S4Vectors

Loading required package: stats4

Loading required package: BiocGenerics


Attaching package: 'BiocGenerics'


The following objects are masked from 'package:stats':

    IQR, mad, sd, var, xtabs


The following objects are masked from 'package:base':

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
    Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
    table, tapply, union, unique, unsplit, which.max, which.min



Attaching package: 'S4Vectors'


The following objects are masked from 'package:base':

    expand.grid, I, unname


Loading required package: IRanges


Attaching package: 'IRanges'


The following object is masked from 'package:grDevices':

    windows


Loading required package: GenomicRanges

Loading req

In [2]:
library(ggrepel)

In [3]:
options(repr.plot.width = 10, repr.plot.height = 10, repr.plot.res = 300)

In [4]:
fname_hcounts = file.path('..', 'RNASEQ', 'data', 'MED4_counts_combined.txt')
hcdf = read.csv(fname_hcounts, sep='\t' )
hgenes = hcdf[,c(1,2,3,4,5)]
hpath_fname = file.path('..', 'genomes', 'MED4', 'MED4_pathways.csv')
hpath_df = read.csv(hpath_fname)
hpath_df = left_join(hgenes, hpath_df, by='protein_id', multiple='all')

In [5]:
hpath_df = hpath_df  %>% 
    extract(path, c("pathway"), "^[[:digit:]]* *([^\\[]+) *\\[?", remove = FALSE) %>%
    mutate(pathway = str_replace(pathway, "in photosynthetic organisms", "")) %>%
    mutate(pathway = str_trim(pathway))

In [6]:
hpath_df %>% count(main) #%>% #arrange(desc(n)) %>% filter(n>40)
dput(unique(hpath_df$main))

main,n
<chr>,<int>
,852
09100 Metabolism,815
09120 Genetic Information Processing,181
09130 Environmental Information Processing,78
09140 Cellular Processes,52
09150 Organismal Systems,38
09160 Human Diseases,84
09180 Brite Hierarchies,703
09190 Not Included in Pathway or Brite,113
,10


c("09120 Genetic Information Processing", "09180 Brite Hierarchies", 
"", "09100 Metabolism", "09130 Environmental Information Processing", 
"09140 Cellular Processes", "09190 Not Included in Pathway or Brite", 
"09160 Human Diseases", "09150 Organismal Systems", NA)


In [7]:
df_all = read_csv(file.path('DE_results', 'MED4_all_FC_results.csv'))



[1mRows: [22m[34m24377[39m [1mColumns: [22m[34m11[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (4): contrast, gene_id, Assay, contrast1
[32mdbl[39m (5): logFC, AveExpr, pvalue, padj, Length
[33mlgl[39m (2): down, up

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [8]:
df_all = df_all %>%
    filter(contrast1 %in% c('LATEvsC1_prot', 'LATEvsC1P1_rna'))

In [9]:
df_degenes = df_all %>% 
    filter(padj < 0.05)

In [10]:
df_degenes2 = df_all %>% 
    filter(padj < 0.05) %>%
    filter (abs(logFC)>1.5)

In [11]:
kegg_main_list = c(
        '09100 Metabolism', 
        '09120 Genetic Information Processing', 
        '09130 Environmental Information Processing' , 
        '09180 Brite Hierarchies', 
        '09190 Not Included in Pathway or Brite')


In [12]:
kegg_mapping_df = read_csv('pro_kegg_map.csv')
kegg_mapping_df

[1mRows: [22m[34m23[39m [1mColumns: [22m[34m4[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (3): main, sub, Category
[32mdbl[39m (1): n

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


main,sub,n,Category
<chr>,<chr>,<dbl>,<chr>
09180 Brite Hierarchies,09182 Protein families: genetic information processing,128,Genetic Info
09100 Metabolism,09102 Energy metabolism,84,Energy/Carbohydrate/Glycan
09180 Brite Hierarchies,09181 Protein families: metabolism,79,metabolism
09100 Metabolism,09101 Carbohydrate metabolism,74,Energy/Carbohydrate/Glycan
09180 Brite Hierarchies,09183 Protein families: signaling and cellular processes,60,Signal/cell processes
09100 Metabolism,09105 Amino acid metabolism,52,Amino acid/Nucleotide
09100 Metabolism,09108 Metabolism of cofactors and vitamins,48,Cofactors and vitamins
09120 Genetic Information Processing,09124 Replication and repair,26,Genetic Info
09190 Not Included in Pathway or Brite,09191 Unclassified: metabolism,24,metabolism
09100 Metabolism,09109 Metabolism of terpenoids and polyketides,23,Secondary metabolites


In [13]:
kegg_mapping2_df = read_csv('pro_kegg_path_map.csv')
kegg_mapping2_df

[1mRows: [22m[34m6[39m [1mColumns: [22m[34m2[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (2): path, New_Category

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


path,New_Category
<chr>,<chr>
00195 Photosynthesis [PATH:ko00195],Photosynthesis/Carbon fixation
00710 Carbon fixation in photosynthetic organisms [PATH:ko00710],Photosynthesis/Carbon fixation
00910 Nitrogen metabolism [PATH:ko00910],Nitrogen metabolism
00194 Photosynthesis proteins [BR:ko00194],Photosynthesis/Carbon fixation
02000 Transporters [BR:ko02000],Membrane transport
99985 Amino acid metabolism,Amino acid/Nucleotide


In [14]:
hpath_df = hpath_df %>% 

    extract(sub, c("module"), "^[[:digit:]]* *([^\\[]+) *\\[?", remove = FALSE) %>%
    mutate(module = str_replace(module, "in photosynthetic organisms", "")) %>%
    mutate(module = str_replace(module, "metabolism", "")) %>%
    mutate(module = str_replace(module, "Biosynthesis of", "")) %>%
    mutate(module = str_replace(module, "Metabolism of", "")) %>%
    mutate(module = str_replace(module, "biosynthesis", "")) %>%
    mutate(module = str_trim(module)) 



In [15]:
# add Category
hpath_df = left_join(hpath_df, kegg_mapping_df, by=c('sub', 'main'), )


In [16]:
hpath_df = left_join(hpath_df, kegg_mapping2_df, by='path' )


In [17]:
hpath_df = hpath_df %>%
    mutate(Category = if_else(is.na(New_Category), Category, New_Category) )

In [18]:
# uncomment to recreate the file
hpath_df %>% 
    filter(gene_id %in% df_degenes2$gene_id) %>%
 filter(main %in% kegg_main_list) %>%
count(main, sub, path, Category, ) %>% write_csv('pro_kegg_map2.csv')
#arrange(desc(n)) #%>% filter(n>40)


In [19]:
# uncomment to recreate the file
hpath_df %>% 
    filter(gene_id %in% df_degenes2$gene_id) %>%
#filter(main %in% kegg_main_list) %>%
count(Category) %>% #write_csv('pro_kegg_map.csv')
arrange(desc(n)) #%>% filter(n>40)


Category,n
<chr>,<int>
,163
Genetic Info,40
Photosynthesis/Carbon fixation,27
Energy/Carbohydrate/Glycan,26
Membrane transport,22
Amino acid/Nucleotide,20
metabolism,16
Cofactors and vitamins,10
Signal/cell processes,9
Secondary metabolites,8


In [20]:
dput(unique(hpath_df$Category))

c("Genetic Info", NA, "Amino acid/Nucleotide", "metabolism", 
"Membrane transport", "Signal/cell processes", "Cofactors and vitamins", 
"Energy/Carbohydrate/Glycan", "Photosynthesis/Carbon fixation", 
"Poorly characterized", "Secondary metabolites", "Nitrogen metabolism"
)


In [21]:
# uncomment to recreate the file
hpath_df %>% 
    filter(gene_id %in% df_degenes2$gene_id) %>%
filter(Category == 'Energy/Carbohydrate/Glycan') %>%
count(path) %>% #write_csv('pro_kegg_map.csv')
arrange(desc(n)) #%>% filter(n>40)


path,n
<chr>,<int>
00630 Glyoxylate and dicarboxylate metabolism [PATH:ko00630],4
00920 Sulfur metabolism [PATH:ko00920],4
00010 Glycolysis / Gluconeogenesis [PATH:ko00010],2
00030 Pentose phosphate pathway [PATH:ko00030],2
00190 Oxidative phosphorylation [PATH:ko00190],2
00541 O-Antigen nucleotide sugar biosynthesis [PATH:ko00541],2
00040 Pentose and glucuronate interconversions [PATH:ko00040],1
00051 Fructose and mannose metabolism [PATH:ko00051],1
00520 Amino sugar and nucleotide sugar metabolism [PATH:ko00520],1
00542 O-Antigen repeat unit biosynthesis [PATH:ko00542],1


In [22]:
# uncomment to recreate the file
hpath_df %>% 
    filter(gene_id %in% df_degenes$gene_id) %>%
filter(Category == 'metabolism') %>%
count(path) %>% #write_csv('pro_kegg_map.csv')
arrange(desc(n)) #%>% filter(n>40)


path,n
<chr>,<int>
99980 Enzymes with EC numbers,17
01002 Peptidases and inhibitors [BR:ko01002],15
00061 Fatty acid biosynthesis [PATH:ko00061],10
01004 Lipid biosynthesis proteins [BR:ko01004],8
01006 Prenyltransferases [BR:ko01006],6
01007 Amino acid related enzymes [BR:ko01007],6
01005 Lipopolysaccharide biosynthesis proteins [BR:ko01005],5
01011 Peptidoglycan biosynthesis and degradation proteins [BR:ko01011],5
01003 Glycosyltransferases [BR:ko01003],4
00071 Fatty acid degradation [PATH:ko00071],3


In [23]:
head(hpath_df)

Unnamed: 0_level_0,gene_id,gene,product,protein_id,old_locus_tag,X,path_id,main,sub,module,path,pathway,ecpath,n,Category,New_Category
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<lgl>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<chr>,<chr>
1,TX50_RS00020,dnaN,DNA polymerase III subunit beta,WP_011131639.1,,14,K02338,09120 Genetic Information Processing,09124 Replication and repair,Replication and repair,03030 DNA replication [PATH:ko03030],DNA replication,K02338 dnaN; DNA polymerase III subunit beta [EC:2.7.7.7],26.0,Genetic Info,
2,TX50_RS00020,dnaN,DNA polymerase III subunit beta,WP_011131639.1,,15,K02338,09120 Genetic Information Processing,09124 Replication and repair,Replication and repair,03430 Mismatch repair [PATH:ko03430],Mismatch repair,K02338 dnaN; DNA polymerase III subunit beta [EC:2.7.7.7],26.0,Genetic Info,
3,TX50_RS00020,dnaN,DNA polymerase III subunit beta,WP_011131639.1,,16,K02338,09120 Genetic Information Processing,09124 Replication and repair,Replication and repair,03440 Homologous recombination [PATH:ko03440],Homologous recombination,K02338 dnaN; DNA polymerase III subunit beta [EC:2.7.7.7],26.0,Genetic Info,
4,TX50_RS00020,dnaN,DNA polymerase III subunit beta,WP_011131639.1,,17,K02338,09180 Brite Hierarchies,09182 Protein families: genetic information processing,Protein families: genetic information processing,03032 DNA replication proteins [BR:ko03032],DNA replication proteins,K02338 dnaN; DNA polymerase III subunit beta [EC:2.7.7.7],128.0,Genetic Info,
5,TX50_RS00020,dnaN,DNA polymerase III subunit beta,WP_011131639.1,,18,K02338,09180 Brite Hierarchies,09182 Protein families: genetic information processing,Protein families: genetic information processing,03400 DNA repair and recombination proteins [BR:ko03400],DNA repair and recombination proteins,K02338 dnaN; DNA polymerase III subunit beta [EC:2.7.7.7],128.0,Genetic Info,
6,TX50_RS00025,,hypothetical protein,WP_011131640.1,,19,,,,,,,,,,


In [24]:
hpath_df = hpath_df %>% 
    extract(ecpath, c("kegg_gene"), "^K[[:digit:]]+ *([^;]*);", remove = FALSE) 

In [25]:
hpath_df = hpath_df %>% 
    mutate(
        prev_gene = gene,
        gene = if_else((prev_gene != '') | (is.na(kegg_gene)), prev_gene, paste0('(',kegg_gene,')'))
        
    )

In [26]:
hpath_df_to_export = hpath_df %>% 
     group_by(gene_id) %>% 
     mutate(
         main = paste0(unique(main), collapse = ";"),
         sub = paste0(unique(sub), collapse = ";"),
         path = paste0(unique(path), collapse = ";"),
         ecpath = paste0(unique(ecpath), collapse = ";"),
         pathway = paste0(unique(pathway), collapse = ";"),
         module = paste0(unique(module), collapse = ";"),
         Category = paste0(unique(Category), collapse = ";")
         
     ) %>%
    ungroup() %>%
    distinct(gene_id, .keep_all = TRUE)

In [27]:


hpath_df_to_export_filter = hpath_df %>% 
    filter(main %in% kegg_main_list) %>%
     group_by(gene_id) %>% 
     mutate(
         main = paste0(unique(main), collapse = ";"),
         sub = paste0(unique(sub), collapse = ";"),
         path = paste0(unique(path), collapse = ";"),
         ecpath = paste0(unique(ecpath), collapse = ";"),
         pathway = paste0(unique(pathway), collapse = ";"),
         module = paste0(unique(module), collapse = ";"),
         Category = paste0(sort(unique(Category)), collapse = ";"),
     Category = str_replace(Category, ';metabolism', '')
     ) %>%
    ungroup() %>%
    distinct(gene_id, .keep_all = TRUE)

In [28]:
hpath_df_to_export %>% filter(!gene_id %in% hpath_df_to_export_filter$gene_id) %>%
filter(main != '')


gene_id,gene,product,protein_id,old_locus_tag,X,path_id,main,sub,module,path,pathway,ecpath,kegg_gene,n,Category,New_Category,prev_gene
<chr>,<chr>,<chr>,<chr>,<lgl>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<chr>,<chr>,<chr>
TX50_RS02235,(AS3MT),methyltransferase domain-containing protein,WP_011132052.1,,618.0,K07755,09160 Human Diseases,09161 Cancer: overview,Cancer: overview,05208 Chemical carcinogenesis - reactive oxygen species [PATH:ko05208],Chemical carcinogenesis - reactive oxygen species,K07755 AS3MT; arsenite methyltransferase [EC:2.1.1.137],AS3MT,,,,
TX50_RS02260,lepA,translation elongation factor 4,WP_011132056.1,,622.0,K03596,09160 Human Diseases,09171 Infectious disease: bacterial,Infectious disease: bacterial,05134 Legionellosis [PATH:ko05134],Legionellosis,K03596 lepA; GTP-binding protein LepA,lepA,,,,lepA
TX50_RS03060,,AbrB family transcriptional regulator,,,,,,,,,,,,,,,
TX50_RS03640,,ATP-dependent Clp protease adaptor ClpS,,,,,,,,,,,,,,,
TX50_RS04365,,high light inducible protein,,,,,,,,,,,,,,,
TX50_RS07310,,hypothetical protein,,,,,,,,,,,,,,,
TX50_RS07510,,high light inducible protein,,,,,,,,,,,,,,,
__no_feature,,,,,,,,,,,,,,,,,
__ambiguous,,,,,,,,,,,,,,,,,
__too_low_aQual,,,,,,,,,,,,,,,,,


In [29]:
# uncomment to recreate the file
hpath_df_to_export_filter %>% 
    filter(gene_id %in% df_degenes2$gene_id) %>%
#filter(main %in% kegg_main_list) %>%
count(Category) %>% #write_csv('pro_kegg_map.csv')
arrange(desc(n)) #%>% filter(n>40)


Category,n
<chr>,<int>
Genetic Info,20
Photosynthesis/Carbon fixation,11
Cofactors and vitamins,8
Membrane transport,8
Amino acid/Nucleotide,7
metabolism,7
Poorly characterized,6
Energy/Carbohydrate/Glycan,5
Energy/Carbohydrate/Glycan;Photosynthesis/Carbon fixation,4
Membrane transport;Nitrogen metabolism,3


In [30]:
# uncomment to recreate the file
hpath_df_to_export_filter %>% 
    filter(gene_id %in% df_degenes2$gene_id) %>%
filter(Category == 'metabolism') %>%
count(path) %>% #write_csv('pro_kegg_map.csv')
arrange(desc(n)) #%>% filter(n>40)


path,n
<chr>,<int>
99980 Enzymes with EC numbers,4
01002 Peptidases and inhibitors [BR:ko01002],2
01009 Protein phosphatases and associated proteins [BR:ko01009],1


In [31]:
df_all = left_join(df_all, hpath_df_to_export_filter, by='gene_id')

In [32]:
df_all %>% count(Category) %>% arrange(desc(n)) %>% write_csv('tmp.csv')

In [33]:
unique(df_all$contrast1)

In [34]:
t(head(df_all))

0,1,2,3,4,5,6
contrast,Clongterm - C1,Clongterm - C1,Clongterm - C1,Clongterm - C1,Clongterm - C1,Clongterm - C1
gene_id,TX50_RS01980,TX50_RS00040,TX50_RS04695,TX50_RS01970,TX50_RS05170,TX50_RS05585
logFC,3.191075,-1.443788,1.373159,3.706744,2.210139,2.786227
AveExpr,0.9167156,1.5845359,2.2254880,3.6657560,-1.3469405,-1.7260158
pvalue,6.252913e-10,1.202458e-08,1.731707e-08,2.510390e-08,2.650587e-08,3.478220e-08
padj,8.904149e-07,7.127836e-06,7.127836e-06,7.127836e-06,7.127836e-06,7.127836e-06
Assay,Proteome,Proteome,Proteome,Proteome,Proteome,Proteome
contrast1,LATEvsC1_prot,LATEvsC1_prot,LATEvsC1_prot,LATEvsC1_prot,LATEvsC1_prot,LATEvsC1_prot
Length,852,2439,912,1491,1134,954
down,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE


In [35]:
# number of differentiall expressed genes
df_all %>%
    filter(padj < 0.05) %>% count(Assay, down)

Assay,down,n
<chr>,<lgl>,<int>
Proteome,False,208
Proteome,True,206
RNA,False,207
RNA,True,178


# Top up and down regulated genes

In [36]:
df_all %>%
    filter(padj < 0.05) %>% top_n(-50, logFC) %>% 
    select(Assay,logFC,gene, product, Category) #%>% 
    #select(Assay,logFC,gene)

Assay,logFC,gene,product,Category
<chr>,<dbl>,<chr>,<chr>,<chr>
Proteome,-3.062849,(pepE),peptidase E,metabolism
Proteome,-3.089786,,,
Proteome,-2.437843,,,
Proteome,-1.956601,,,
Proteome,-1.959779,raiA,ribosome-associated translation inhibitor RaiA,Genetic Info
Proteome,-2.659823,"(wecA, tagO, rfe)",undecaprenyl/decaprenyl-phosphate alpha-N-acetylglucosaminyl 1-phosphate transferase,Energy/Carbohydrate/Glycan
Proteome,-2.122447,,,
Proteome,-2.428226,"(rfbD, rmlD)",sugar nucleotide-binding protein,Energy/Carbohydrate/Glycan;Secondary metabolites
RNA,-1.790114,gap,type I glyceraldehyde-3-phosphate dehydrogenase,Energy/Carbohydrate/Glycan;Photosynthesis/Carbon fixation
RNA,-2.005085,(K07086),sodium-dependent bicarbonate transport family permease,Poorly characterized


In [37]:
get_top_gene_id <- function(df_all, assay, top_number) {
    df_all %>%
        filter(padj < 0.05) %>% 
        filter(Assay == assay) %>% 
        top_n(top_number, logFC) %>% 
        select(gene_id, Assay)
}
number_of_top_ids = 5
top_up_de_genes = bind_rows(list(get_top_gene_id(df_all, 'Proteome', number_of_top_ids), get_top_gene_id(df_all, 'RNA', number_of_top_ids)))
top_down_de_genes = bind_rows(list(get_top_gene_id(df_all, 'Proteome', -number_of_top_ids), get_top_gene_id(df_all, 'RNA', -number_of_top_ids)))


In [38]:
dput(names(df_all))

c("contrast", "gene_id", "logFC", "AveExpr", "pvalue", "padj", 
"Assay", "contrast1", "Length", "down", "up", "gene", "product", 
"protein_id", "old_locus_tag", "X", "path_id", "main", "sub", 
"module", "path", "pathway", "ecpath", "kegg_gene", "n", "Category", 
"New_Category", "prev_gene")


In [39]:
df_pivot_for_top = df_all %>%
    pivot_wider(
        names_from = 'Assay',
        values_from = c("logFC", #"AveExpr", "pvalue",
                        "padj", ),
        id_cols=c("gene_id",  "gene",  "product",  "sub", 
                  #"module",
                  "path"
                  #"pathway",
                 # "ecpath"
                 )
    ) %>% select(gene_id, gene, product, sub, path, logFC_Proteome, padj_Proteome,logFC_RNA,padj_RNA)


In [40]:
dput(names(df_pivot_for_top))

c("gene_id", "gene", "product", "sub", "path", "logFC_Proteome", 
"padj_Proteome", "logFC_RNA", "padj_RNA")


In [41]:
df_pivot_for_top %>% filter(gene_id %in% top_up_de_genes$gene_id) %>% arrange(desc(logFC_RNA), desc(logFC_Proteome) )

gene_id,gene,product,sub,path,logFC_Proteome,padj_Proteome,logFC_RNA,padj_RNA
<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
TX50_RS07555,,,,,,,7.163547,7.016344e-24
TX50_RS09500,,,,,,,5.287747,5.587907e-45
TX50_RS09840,,,,,,,4.94047,6.618466000000001e-54
TX50_RS09690,,,,,,,4.811467,3.08846e-24
TX50_RS04605,,,,,,,4.775865,1.221015e-28
TX50_RS01970,"(nrtA, nasF, cynA)",ABC transporter substrate-binding protein,09102 Energy metabolism;09131 Membrane transport;09183 Protein families: signaling and cellular processes,00910 Nitrogen metabolism [PATH:ko00910];02010 ABC transporters [PATH:ko02010];02000 Transporters [BR:ko02000],3.706744,7.127836e-06,3.489122,1.1943650000000001e-18
TX50_RS05100,,,,,3.287265,0.0001100366,3.388137,2.387735e-13
TX50_RS01975,ntrB,nitrate ABC transporter permease,09102 Energy metabolism;09131 Membrane transport;09183 Protein families: signaling and cellular processes,00910 Nitrogen metabolism [PATH:ko00910];02010 ABC transporters [PATH:ko02010];02000 Transporters [BR:ko02000],3.436089,7.127836e-06,2.466096,4.766029e-05
TX50_RS01985,cynS,cyanase,09102 Energy metabolism,00910 Nitrogen metabolism [PATH:ko00910],3.559702,0.0001308647,1.795342,0.00573225
TX50_RS05580,,,,,3.297569,4.746019e-05,1.453699,1.117397e-05


In [42]:
df_pivot_for_top %>% filter(gene_id %in% top_down_de_genes$gene_id)%>% arrange(logFC_RNA, logFC_Proteome) 

gene_id,gene,product,sub,path,logFC_Proteome,padj_Proteome,logFC_RNA,padj_RNA
<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
TX50_RS09770,,,,,,,-3.9715639,0.001565508
TX50_RS09810,,,,,,,-3.8127732,1.892925e-16
TX50_RS07415,,,,,-1.548083,0.0440394151,-3.2392804,1.452656e-21
TX50_RS09805,,,,,,,-3.0010224,0.008824035
TX50_RS01790,,,,,,,-2.9665253,2.624547e-10
TX50_RS05775,,,,,-3.089786,0.0004806765,-1.7602647,1.899661e-09
TX50_RS06610,"(wecA, tagO, rfe)",undecaprenyl/decaprenyl-phosphate alpha-N-acetylglucosaminyl 1-phosphate transferase,09107 Glycan biosynthesis and metabolism;09181 Protein families: metabolism,00542 O-Antigen repeat unit biosynthesis [PATH:ko00542];00552 Teichoic acid biosynthesis [PATH:ko00552];00572 Arabinogalactan biosynthesis - Mycobacterium [PATH:ko00572];01003 Glycosyltransferases [BR:ko01003];01005 Lipopolysaccharide biosynthesis proteins [BR:ko01005],-2.659823,0.0119511553,-1.3640186,1.965563e-07
TX50_RS07345,,,,,-2.437843,0.0017520753,-1.0693021,0.02108802
TX50_RS06640,"(rfbD, rmlD)",sugar nucleotide-binding protein,09107 Glycan biosynthesis and metabolism;09109 Metabolism of terpenoids and polyketides;09110 Biosynthesis of other secondary metabolites,00541 O-Antigen nucleotide sugar biosynthesis [PATH:ko00541];00523 Polyketide sugar unit biosynthesis [PATH:ko00523];00521 Streptomycin biosynthesis [PATH:ko00521],-2.428226,0.0487356304,-0.6255909,1.0
TX50_RS05300,(pepE),peptidase E,09181 Protein families: metabolism,01002 Peptidases and inhibitors [BR:ko01002],-3.062849,0.0001166403,-0.3657736,1.0


# categories of DE genes

In [43]:
df_all %>%
    filter(padj < 0.05) %>% #top_n(-50, logFC) %>% 
    count(path)%>% arrange(desc(n))
    #select(Assay,logFC,gene)

path,n
<chr>,<int>
,343
00195 Photosynthesis [PATH:ko00195];00194 Photosynthesis proteins [BR:ko00194],26
99980 Enzymes with EC numbers,23
03010 Ribosome [PATH:ko03010];03011 Ribosome [BR:ko03011],19
00860 Porphyrin metabolism [PATH:ko00860],17
02000 Transporters [BR:ko02000],16
99997 Function unknown,14
02010 ABC transporters [PATH:ko02010];02000 Transporters [BR:ko02000],12
01002 Peptidases and inhibitors [BR:ko01002],10
03016 Transfer RNA biogenesis [BR:ko03016],10


In [44]:
df_all %>%
    filter(padj < 0.05) %>% #top_n(-50, logFC) %>% 
    count(sub) %>% arrange(desc(n))
    #select(Assay,logFC,gene)

sub,n
<chr>,<int>
,343
09182 Protein families: genetic information processing,46
09108 Metabolism of cofactors and vitamins,32
09102 Energy metabolism;09181 Protein families: metabolism,31
09191 Unclassified: metabolism,31
09122 Translation;09182 Protein families: genetic information processing,22
09183 Protein families: signaling and cellular processes,21
09124 Replication and repair;09182 Protein families: genetic information processing,18
09194 Poorly characterized,17
09105 Amino acid metabolism,16


In [45]:
df_all %>%
    filter(padj < 0.05) %>% #top_n(-50, logFC) %>% 
    mutate(transport = if_else(str_detect(sub, 'Membrane transport'), 'Membrane transport', '')) %>%
    # uncharacterized
    mutate(category = if_else(path== '', 'Uncharacterized', NA)) %>% 
    mutate(category = if_else(str_detect(sub, 'Poorly characterized'), 'Uncharacterized', category)) %>%
    mutate(category = if_else(str_detect(sub, 'Protein families: metabolism'), 'metabolism', category)) %>%
    mutate(category = if_else(str_detect(sub, 'Unclassified: metabolism'), 'metabolism', category)) %>%
    mutate(category = if_else(is.na(category), '', category)) %>%
    # sub
     mutate(category = if_else(str_detect(sub, 'genetic information processing'), str_c(category, 'genetic information processing', collapse = ";"), category)) %>%
    # mutate(category = if_else(str_detect(sub, 'Metabolism of cofactors and vitamins'), paste0(category, 'cofactors and vitamins', collapse = ";"), category)) %>%
    # mutate(category = if_else(str_detect(sub, 'Energy metabolism'), paste0(category, 'Energy metabolism', collapse = ";"), category)) %>%
    # mutate(category = if_else(str_detect(sub, 'signaling and cellular processes'), paste0(category, 'signaling and cellular processes', collapse = ";"), category)) %>%
    # mutate(category = if_else(str_detect(sub, 'Carbohydrate metabolism'), paste0(category, 'Carbohydrate metabolism', collapse = ";"), category)) %>%
    # mutate(category = if_else(str_detect(sub, 'Metabolism of terpenoids and polyketides'), paste0(category, 'terpenoids and polyketides', collapse = ";"), category)) %>%
    # mutate(category = if_else(str_detect(sub, 'Nucleotide metabolism'), paste0(category, 'Nucleotide metabolism', collapse = ";"), category)) %>%
    # mutate(category = if_else(str_detect(sub, 'Glycan biosynthesis and metabolism'), paste0(category, 'Glycan metabolism', collapse = ";"), category)) %>%
    # mutate(category = if_else(str_detect(sub, 'Lipid metabolism'), paste0(category, 'Lipid metabolism', collapse = ";"), category)) %>%

    # # mutate(category = if_else(str_detect(sub, 'Nucleotide metabolism'), 'Nucleotide metabolism', category)) %>%
    # # mutate(category = if_else(str_detect(sub, 'Nucleotide metabolism'), 'Nucleotide metabolism', category)) %>%
    # # mutate(category = if_else(str_detect(sub, 'Nucleotide metabolism'), 'Nucleotide metabolism', category)) %>%


    # mutate(category = if_else(str_detect(sub, 'Amino acid metabolism'), paste0(category, 'Amino acid', collapse = ";"), category)) %>%

    # # path overrides
    # mutate(category = if_else(str_detect(path, 'Nitrogen metabolism'), paste0(category, 'Nitrogen metabolism', collapse = ";"), category)) %>%
    # mutate(category = if_else(str_detect(path, 'Photosynthesis'), paste0(category, 'Photosynthesis', collapse = ";"), category)) %>%
    #count(system)
#    filter(is.na(category)) %>% 
#count(transport, category, path) #%>% arrange(desc(n))
count(transport, category) %>% arrange(desc(n))

transport,category,n
<chr>,<chr>,<int>
,,343
,,188
,genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;Uncharacterizedgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;Uncharacterizedgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;Uncharacterizedgenetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;Uncharacterizedgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;Uncharacterizedgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;Uncharacterizedgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;Uncharacterizedgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;Uncharacterizedgenetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;Uncharacterizedgenetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;Uncharacterizedgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;Uncharacterizedgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;Uncharacterizedgenetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;Uncharacterizedgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;Uncharacterizedgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;Uncharacterizedgenetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;Uncharacterizedgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;Uncharacterizedgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;metabolismgenetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing;genetic information processing,115
,metabolism,112
Membrane transport,,24
,Uncharacterized,17


In [46]:
df_all %>%
    filter(padj < 0.05) %>% #top_n(-50, logFC) %>% 
    mutate(transport = if_else(str_detect(sub, 'Membrane transport'), 'Membrane transport', '')) %>%
    # uncharacterized
    mutate(category = if_else(path== '', 'Uncharacterized', NA)) %>% 
    mutate(category = if_else(str_detect(sub, 'Poorly characterized'), 'Uncharacterized', category)) %>%
    mutate(category = if_else(str_detect(sub, 'Protein families: metabolism'), 'metabolism', category)) %>%
    mutate(category = if_else(str_detect(sub, 'Unclassified: metabolism'), 'metabolism', category)) %>%
    mutate(category = if_else(is.na(category), '', category)) %>%
    # sub
    mutate(category = if_else(str_detect(sub, 'genetic information processing'), paste0(category, 'genetic information processing', collapse = ";"), category)) %>%
    mutate(category = if_else(str_detect(sub, 'Metabolism of cofactors and vitamins'), paste0(category, 'cofactors and vitamins', collapse = ";"), category)) %>%
    mutate(category = if_else(str_detect(sub, 'Energy metabolism'), paste0(category, 'Energy metabolism', collapse = ";"), category)) %>%
    mutate(category = if_else(str_detect(sub, 'signaling and cellular processes'), paste0(category, 'signaling and cellular processes', collapse = ";"), category)) %>%
    mutate(category = if_else(str_detect(sub, 'Carbohydrate metabolism'), paste0(category, 'Carbohydrate metabolism', collapse = ";"), category)) %>%
    mutate(category = if_else(str_detect(sub, 'Metabolism of terpenoids and polyketides'), paste0(category, 'terpenoids and polyketides', collapse = ";"), category)) %>%
    mutate(category = if_else(str_detect(sub, 'Nucleotide metabolism'), paste0(category, 'Nucleotide metabolism', collapse = ";"), category)) %>%
    mutate(category = if_else(str_detect(sub, 'Glycan biosynthesis and metabolism'), paste0(category, 'Glycan metabolism', collapse = ";"), category)) %>%
    mutate(category = if_else(str_detect(sub, 'Lipid metabolism'), paste0(category, 'Lipid metabolism', collapse = ";"), category)) %>%

    # mutate(category = if_else(str_detect(sub, 'Nucleotide metabolism'), 'Nucleotide metabolism', category)) %>%
    # mutate(category = if_else(str_detect(sub, 'Nucleotide metabolism'), 'Nucleotide metabolism', category)) %>%
    # mutate(category = if_else(str_detect(sub, 'Nucleotide metabolism'), 'Nucleotide metabolism', category)) %>%


    mutate(category = if_else(str_detect(sub, 'Amino acid metabolism'), paste0(category, 'Amino acid', collapse = ";"), category)) %>%

    # path overrides
    mutate(category = if_else(str_detect(path, 'Nitrogen metabolism'), paste0(category, 'Nitrogen metabolism', collapse = ";"), category)) %>%
    mutate(category = if_else(str_detect(path, 'Photosynthesis'), paste0(category, 'Photosynthesis', collapse = ";"), category)) %>%
    #count(system)
#    filter(is.na(category)) %>% 
#count(transport, category, path) #%>% arrange(desc(n))
count(transport, category) %>% arrange(desc(n))

ERROR: [1m[33mError[39m in `mutate()`:[22m
[1m[22m[36mℹ[39m In argument: `category = if_else(...)`.
[1mCaused by error in `paste0()`:[22m
[33m![39m result would exceed 2^31-1 bytes


In [None]:
df_all %>%
    filter(padj < 0.05) %>% #top_n(-50, logFC) %>% 
    count(category) %>% arrange(desc(n))
    #select(Assay,logFC,gene)

In [None]:
help(merge)