## Load libraries

In [1]:
# ---------- Preparations ----------
# Load Libraries
library(Spectra)
library(MsBackendMgf)
library(MsBackendHmdb)
library(MsCoreUtils)
library(MsBackendMsp)
library(readr)
library(dplyr)
# 3 dependencies for latest MassBank version
library(rvest)
library(stringr)
library(xml2)
options(warn=-1)

Lade nötiges Paket: S4Vectors

Lade nötiges Paket: stats4

Lade nötiges Paket: BiocGenerics

Lade nötiges Paket: parallel


Attache Paket: ‘BiocGenerics’


Die folgenden Objekte sind maskiert von ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB


Die folgenden Objekte sind maskiert von ‘package:stats’:

    IQR, mad, sd, var, xtabs


Die folgenden Objekte sind maskiert von ‘package:base’:

    Filter, Find, Map, Position, Reduce, anyDuplicated, append,
    as.data.frame, basename, cbind, colnames, dirname, do.call,
    duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
    lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
    pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
    tapply, union, unique, unsplit, which.max, which.min



Attache Paket: ‘S4Vectors’


Die folgenden Objekte sind maskiert 

In [2]:
# Track Time 
start_time <- Sys.time()

## Define input directory, keep everything in this directory

In [3]:
# ---------- Script ----------
# input directory
input_dir <- paste(getwd(), "/", sep = '')
input_dir

In [4]:
#input_dir <- "/Users/mahnoorzulfiqar/OneDriveUNI/MZML/"
#input_dir

## load function file

In [5]:
# load the functions file
source(file = paste(input_dir, "Workflow_R_Functions.r", sep = ''))

In [6]:
# load the functions file
# source(file = '/Users/mahnoorzulfiqar/OneDriveUNI/MAW/Workflow_R_Functions.r')

In [7]:
# downloading spectral libraries; do NOT run
# load db spectra objects [gnps, hmdb, mbank]
# download_specDB(input_dir, db = "all")

## Load Spectral Databases as rda objects

In [8]:
# OR load the database rda objects 
#load(file = paste(input_dir,"gnps.rda", sep = ""))
#load(file = paste(input_dir,"hmdb.rda", sep = ""))
#load(file = paste(input_dir,"mbank.rda", sep = ""))

In [9]:
#gnps

In [10]:
#mbank

In [11]:
#hmdb

## Start the workflow

In [6]:
# Run the first function; this creates a dataframe of your input files, their result directories 
# and gives an id to each input file; stores the table in directory as a csv filr
input_table <- data.frame(ms2_rfilename(input_dir))
input_table

mzml_files,ResultFileNames,File_id
<chr>,<chr>,<chr>
./VN_211016_Sc_QC_PRM_neg.mzML,./VN_211016_Sc_QC_PRM_neg,file_1
./VN_211016_Sc_QC_PRM_pos.mzML,./VN_211016_Sc_QC_PRM_pos,file_2
./VN_211016_Sc_st_PRM_neg.mzML,./VN_211016_Sc_st_PRM_neg,file_3
./VN_211016_Sc_st_PRM_pos.mzML,./VN_211016_Sc_st_PRM_pos,file_4
./VN_211016_acetyl_carnitine.mzML,./VN_211016_acetyl_carnitine,file_5
./VN_211016_betaine.mzML,./VN_211016_betaine,file_6
./VN_211016_butanoyl_carnitine.mzML,./VN_211016_butanoyl_carnitine,file_7
./VN_211016_cyst_acid_PRM_neg.mzML,./VN_211016_cyst_acid_PRM_neg,file_8
./VN_211016_cyst_acid_PRM_pos.mzML,./VN_211016_cyst_acid_PRM_pos,file_9
./VN_211016_isovalerylcarnitine.mzML,./VN_211016_isovalerylcarnitine,file_10


In [None]:
for (i in 1:nrow(input_table)){
    
    
    # Preprocess and Read the mzMLfiles
    spec_pr <- spec_Processing(as.character(input_table[i, "mzml_files"]), input_table[i, "ResultFileNames"])
    
    
    # Extract spectra
    sps_all <- spec_pr[[1]]
    # Extract precursor m/z
    pre_mz<- spec_pr[[2]]

    #perform dereplication with all dbs
    df_derep <- spec_dereplication(pre_tbl = paste(input_dir, str_remove(paste(input_table[i, "ResultFileNames"], "/premz_list.txt", sep = ""), "./"), sep =""), 
                                   proc_mzml = paste(input_dir, str_remove(paste(input_table[i, "ResultFileNames"], "/processedSpectra.mzML", sep = ""), "./"), sep =""),
                                   db = "all", 
                                   result_dir = input_table[i, "ResultFileNames"],
                                   file_id = input_table[i, "File_id"], 
                                   input_dir, 
                                   ppmx = 15)
    
    
    # Extract MS2 peak lists
    spec_pr2 <- ms2_peaks(pre_tbl = paste(input_dir, str_remove(paste(input_table[i, "ResultFileNames"], "/premz_list.txt", sep = ""), "./"), sep =""), 
                          proc_mzml = paste(input_dir, str_remove(paste(input_table[i, "ResultFileNames"], "/processedSpectra.mzML", sep = ""), "./"), sep =""),
                          input_dir,
                          input_table[i, "ResultFileNames"],
                         file_id = input_table[i, "File_id"]) 
    
    # Extract MS1 peaks or isotopic peaks
    ms1p <- ms1_peaks(x = paste(input_dir, str_remove(paste(input_table[i, "ResultFileNames"],'/insilico/MS2DATA.csv', sep = ""), "./"), sep =""), 
                      y = input_table[i, "qcCAM_csv"], 
                      input_table[i, "ResultFileNames"], 
                      input_dir, 
                      QC = FALSE)
    
    #prepare sirius parameter files
    sirius_param_files <- sirius_param(x = paste(input_dir, str_remove(paste(input_table[i, "ResultFileNames"],'/insilico/MS1DATA.csv', sep = ""), "./"), sep =""), 
                                       result_dir = input_table[i, 'ResultFileNames'], 
                                       input_dir,
                                       SL = TRUE)
    
    # Run sirius
    run_sirius(files = paste(input_dir, str_remove(paste(input_table[i, "ResultFileNames"],'/insilico/MS1DATA_SiriusPandSL.csv', sep = ""), "./"), sep =""), 
               ppm_max = 5, 
               ppm_max_ms2 = 15, 
               QC = FALSE, 
               SL = TRUE, 
               SL_path = paste(input_dir, 'ScostSLS/', sep = ""),
               candidates = 30)
    
    
    
    # Post process Sirius results and extract adducts for MetFrag
    sirius_pproc <- sirius_postprocess(input_table[i, "ResultFileNames"], SL = TRUE)
    
    
    
    # prepare Metfrag parameter files
    met_param <- metfrag_param(x = paste(input_dir, str_remove(paste(input_table[i, "ResultFileNames"], "/insilico/MS1DATAsirius.csv", sep = ""), "./"), sep =""), 
                               result_dir = input_table[i, "ResultFileNames"],
                               input_dir,
                               adducts = paste(input_dir, "MetFrag_AdductTypes.csv", sep = ""), 
                               sl_mtfrag = paste(input_dir, "SLS_metfrag.txt", sep = ""), 
                               SL = TRUE,
                               ppm_max = 5, 
                               ppm_max_ms2= 15)
    
    
    # run metfrag
    run_metfrag(met_param = paste(input_dir, str_remove(paste(input_table[i, "ResultFileNames"], "/insilico/metparam_list.txt", sep = ""), "./"), sep =""),
                input_dir)
    
    
}

Writing file processedSpectra.mzML...
OK

[1mRows: [22m[34m0[39m [1mColumns: [22m[34m13[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m "\t"
[31mchr[39m (13): rank, formulaRank, CSI:FingerIDScore, molecularFormula, adduct, In...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.
[1mRows: [22m[34m13[39m [1mColumns: [22m[34m12[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m "\t"
[31mchr[39m (3): molecularFormula, adduct, precursorFormula
[32mdbl[39m (9): rank, SiriusScore, TreeScore, IsotopeScore, numExplainedPeaks, expl...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to

In [None]:
end_time <- Sys.time()
print(end_time - start_time)

### At the end you will have for each .mzML MS2 file, a result directory with same name and the many files and subdirectories