In [None]:
#! /usr/bin/Rscript

#' @title Download Spectral DBs as Spectra
#'
#' @description
#'
#' A summary file in a text format is saved in the input directory 
#' which stores the timing when each database is stored, e.g: GNPS is 
#' updated when there is a submission. This information is stored for 
#' reproducibility checks and versions of the databases.
#' If all is selected, then GNPS, HMDB, and MassBank all 
#' are downloaded with their URLs. If a specific database is selected only
#' that database is downloaded. Each database is stored as in the format 
#' it is downloadable from the database webpages.


#' @param input_dir is full directory where all MZML input files
#'
#' @param db is either one of the spectral libraries which can be 
#'        gnps, hmdb, mbank or all


#' @return
#' 
#' Spectral DB saved with the following name in the input_dir: 
#' gnps.rda, hmdb.rda, mbank.rda
#' summary file saved as summaryFile.txt which contains the 
#' timings and versions if available of saved databases
#'
#' @author Mahnoor Zulfiqar
#' 
#' @examples
#' 
#' download_specDB(input_dir = "/usr/project/", db = "all")


# ---------- Preparations ----------
# Load libraries
library("Spectra")
library("stringr")
library("dplyr")

# ---------- Arguments and user variables ----------
args <- commandArgs(trailingOnly=TRUE)
#print(args)

path <- as.character(args[1])


# ---------- download_specDB ----------

merge_qc<- function(path){
    # combine all QC which are in positive mode
    df_pos <- list.files(path, pattern = "posCAMERA_Results_", full.names = TRUE) %>% 
        lapply(read_csv) %>% 
        bind_rows
    # remove any duplicated rows
    df_pos <- as.data.frame(df_pos[!duplicated(df_pos), ])

    #extract isotope column numbers, the numbers represent the group of isotope
    nm_p <- regmatches(df_pos[, "isotopes"],gregexpr("[[:digit:]]+\\.*[[:digit:]]*",df_pos[, "isotopes"]))

    # for all the numbers, extract only first number, since it is the group number, 
    # second number can be charge
    for (i in 1:length(nm_p)){
        y <- as.numeric(unlist(nm_p[i]))
        df_pos[i,'istops'] = y[1]
    }

    # write csv for the combined_camera_pos results
    write.csv(df_pos, paste(path, "/Combined_Camera_pos.csv", sep = ""))
    
    # combine all QC which are in negative mode
    df_neg <- list.files(path, pattern = "negCAMERA_Results_", full.names = TRUE) %>% 
        lapply(read_csv) %>% 
        bind_rows
    # remove any duplicated rows based on mz
    df_neg <- as.data.frame(df_neg[!duplicated(df_neg), ])

    #extract isotope column numbers, the numbers represent the group of isotope
    nm_n <- regmatches(df_neg[, "isotopes"],gregexpr("[[:digit:]]+\\.*[[:digit:]]*",df_neg[, "isotopes"]))

    # for all the numbers, extract only first number, since it is the group number, 
    # second number can be charge
    for (i in 1:length(nm_n)){
        y <- as.numeric(unlist(nm_n[i]))
        df_neg[i,'istops'] = y[1]
    }
    # write csv for the combined_camera_neg results
    write.csv(df_neg, paste(path, "/Combined_Camera_neg.csv", sep = ""))
}
# Usage: merge_qc(path)