Merge pull request #13 from singjc/master

[MINOR] Changed assumed chromatogram folder from mzml to xics
shubham1637 · Feb 20, 2021 · 8d9435c · 8d9435c
2 parents 31bfc4f + 2cbf21a
commit 8d9435c
Show file tree

Hide file tree

Showing 54 changed files with 191 additions and 191 deletions.
diff --git a/R/align_dia_runs.R b/R/align_dia_runs.R
@@ -1,6 +1,6 @@
 #' Outputs intensities for each analyte from aligned Targeted-MS runs
 #'
-#' This function expects osw and mzml directories at dataPath. It first reads osw files and fetches chromatogram indices for each analyte.
+#' This function expects osw and xics directories at dataPath. It first reads osw files and fetches chromatogram indices for each analyte.
 #' It then align XICs of its reference XICs. Best peak, which has lowest m-score, about the aligned retention time is picked for quantification.
 #' @author Shubham Gupta, \email{[email protected]}
 #'
@@ -10,10 +10,10 @@
 #' Date: 2019-12-14
 #' @importFrom dplyr %>%
 #' @inheritParams checkParams
-#' @param dataPath (string) path to mzml and osw directory.
+#' @param dataPath (string) path to xics and osw directory.
 #' @param outFile (string) name of the output file.
 #' @param oswMerged (logical) TRUE for experiment-wide FDR and FALSE for run-specific FDR by pyprophet.
-#' @param runs (a vector of string) names of mzml file without extension.
+#' @param runs (a vector of string) names of xics file without extension.
 #' @param refRun (string) reference for alignment. If no run is provided, m-score is used to select reference run.
 #' @param applyFun (function) value must be either lapply or BiocParallel::bplapply.
 #' @return An output table with following columns: precursor, run, intensity, RT, leftWidth, rightWidth,
@@ -173,7 +173,7 @@ alignTargetedRuns <- function(dataPath, outFile = "DIAlignR", params = paramsDIA
 
 #' AlignObj for analytes between a pair of runs
 #'
-#' This function expects osw and mzml directories at dataPath. It first reads osw files and fetches chromatogram indices for each requested analyte.
+#' This function expects osw and xics directories at dataPath. It first reads osw files and fetches chromatogram indices for each requested analyte.
 #' It then align XICs of each analyte to its reference XICs. AlignObj is returned which contains aligned indices and cumulative score along the alignment path.
 #' @author Shubham Gupta, \email{[email protected]}
 #'

diff --git a/R/get_filenames.R b/R/get_filenames.R
@@ -6,7 +6,7 @@
 #'
 #' License: (c) Author (2019) + GPL-3
 #' Date: 2019-12-14
-#' @param dataPath (char) path to mzml and osw directory.
+#' @param dataPath (char) path to xics and osw directory.
 #' @param pattern (char) must be either *.osw or *merged.osw .
 #' @return A dataframe with three columns:
 #' \item{spectraFile}{(string) as mentioned in RUN table of osw files.}
@@ -68,7 +68,7 @@ filenamesFromOSW <- function(dataPath, pattern){
 #'
 #' License: (c) Author (2019) + GPL-3
 #' Date: 2019-12-14
-#' @param dataPath (char) Path to mzml and osw directory.
+#' @param dataPath (char) Path to xics and osw directory.
 #' @return A dataframe with two columns:
 #' \item{runName}{(string) contain respective mzML names without extension.}
 #' \item{chromatogramFile}{(string) Path to the chromatogram file.}
@@ -81,10 +81,10 @@ filenamesFromOSW <- function(dataPath, pattern){
 filenamesFromMZML <- function(dataPath, chromFile){
  if(chromFile == "mzML") p <- ".chrom.mzML$"
  if(chromFile == "sqMass") p <- ".chrom.sqMass$"
- temp <- list.files(path = file.path(dataPath, "mzml"), pattern=p)
+ temp <- list.files(path = file.path(dataPath, "xics"), pattern=p)
  message(length(temp), " ", sub("\\$","",p), " files are found.")
  mzMLfiles <- vapply(temp, function(x) sub(p,"", x), "", USE.NAMES = FALSE)
- output <- data.frame("runName" = mzMLfiles, "chromatogramFile" = file.path(dataPath, "mzml", temp))
+ output <- data.frame("runName" = mzMLfiles, "chromatogramFile" = file.path(dataPath, "xics", temp))
  output[["chromatogramFile"]] <- as.character(output[["chromatogramFile"]]) # Convert from factor to character.
  output[["runName"]] <- as.character(output[["runName"]]) # Convert from factor to character.
  output
@@ -102,7 +102,7 @@ filenamesFromMZML <- function(dataPath, chromFile){
 #' License: (c) Author (2019) + GPL-3
 #' Date: 2019-12-14
 #' @inheritParams checkParams
-#' @param dataPath (char) Path to mzml and osw directory.
+#' @param dataPath (char) Path to xics and osw directory.
 #' @param oswMerged (logical) TRUE for experiment-wide FDR and FALSE for run-specific FDR by pyprophet.
 #' @return (dataframe) it has five columns:
 #' \item{spectraFile}{(string) as mentioned in RUN table of osw files.}
@@ -121,7 +121,7 @@ getRunNames <- function(dataPath, oswMerged = TRUE, params = paramsDIAlignR()){
  } else{
  filenames <- filenamesFromOSW(dataPath, pattern = "*merged.osw$")
  }
- # Get names of mzml files.
+ # Get names of xics files.
  nameCutPattern = "(.*)(/)(.*)" # regex expression to fetch mzML file name from RUN.FILENAME columns of osw files.
  runs <- vapply(filenames[["spectraFile"]], function(x) gsub(nameCutPattern, replacement = "\\3", x), "")
  fileExtn <- strsplit(runs[[1]], "\\.")[[1]][2]
@@ -132,7 +132,7 @@ getRunNames <- function(dataPath, oswMerged = TRUE, params = paramsDIAlignR()){
  # Check if osw files have corresponding mzML file.
  runs <- intersect(filenames[["runName"]], mzMLfiles[["runName"]])
  if(length(runs) != length(filenames[["runName"]])){
- cat("Following files did not have their counterpart in mzml directory\n")
+ cat("Following files did not have their counterpart in xics directory\n")
  print(setdiff(filenames[["runName"]], mzMLfiles[["runName"]]))
  }
  if(length(runs) == 0){

diff --git a/R/get_peaks_chromatograms.R b/R/get_peaks_chromatograms.R
@@ -13,7 +13,7 @@
 #' @keywords internal
 #' @examples
 #' dataPath <- system.file("extdata", package = "DIAlignR")
-#' mzmlName<-paste0(dataPath,"/mzml/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.mzML")
+#' mzmlName<-paste0(dataPath,"/xics/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.mzML")
 #' mz <- mzR::openMSfile(mzmlName, backend = "pwiz")
 #' chromIndices <- c(37L, 38L, 39L, 40L, 41L, 42L)
 #' \dontrun{
@@ -45,7 +45,7 @@ extractXIC_group <- function(mz, chromIndices){
 #' @keywords internal
 #' @examples
 #' dataPath <- system.file("extdata", package = "DIAlignR")
-#' sqName <- paste0(dataPath,"/mzml/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.sqMass")
+#' sqName <- paste0(dataPath,"/xics/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.sqMass")
 #' chromIndices <- c(36L, 37L, 38L, 39L, 40L, 41L)
 #' \dontrun{
 #' con <- DBI::dbConnect(RSQLite::SQLite(), dbname = sqName)
@@ -165,7 +165,7 @@ getXICs4AlignObj <- function(mzPntrs, fileInfo, runs, prec2chromIndex, analytes)
 #' @inheritParams checkParams
 #' @param analytes (integer) a vector of precursor IDs.
 #' @param runs (vector of string) names of mzML files without extension.
-#' @param dataPath (string) Path to mzml and osw directory.
+#' @param dataPath (string) Path to xics and osw directory.
 #' @param maxFdrQuery (numeric) A numeric value between 0 and 1. It is used to filter features from osw file which have SCORE_MS2.QVALUE less than itself.
 #' @param runType (char) This must be one of the strings "DIA_proteomics", "DIA_Metabolomics".
 #' @param oswMerged (logical) TRUE for experiment-wide FDR and FALSE for run-specific FDR by pyprophet.

diff --git a/R/merge_order.R b/R/merge_order.R
@@ -100,7 +100,7 @@ getNodeIDs <- function(tree){
 #' @description {
 #' While traversing from leaf to root node, at each node a master run is created.
 #' Merged features and merged chromatograms from parent runs are estimated. Chromatograms are written on the disk
-#' at dataPath/mzml. For each precursor aligned parent time-vectors and corresponding child time-vector
+#' at dataPath/xics. For each precursor aligned parent time-vectors and corresponding child time-vector
 #' are also calculated and written as *_av.rds at dataPath.
 #'
 #' Accesors to the new files are added to fileInfo, mzPntrs and prec2chromIndex. Features, reference
@@ -157,7 +157,7 @@ getNodeIDs <- function(tree){
 #' rm(mzPntrs)
 #' # Cleanup
 #' file.remove(list.files(dataPath, pattern = "*_av.rds", full.names = TRUE))
-#' file.remove(list.files(file.path(dataPath, "mzml"), pattern = "^master[0-9]+\\.chrom\\.mzML$", full.names = TRUE))
+#' file.remove(list.files(file.path(dataPath, "xics"), pattern = "^master[0-9]+\\.chrom\\.mzML$", full.names = TRUE))
 #' }
 traverseUp <- function(tree, dataPath, fileInfo, features, mzPntrs, prec2chromIndex, precursors,
  params, adaptiveRTs, refRuns, multipeptide, peptideScores, ropenms, applyFun = lapply){
@@ -228,7 +228,7 @@ traverseUp <- function(tree, dataPath, fileInfo, features, mzPntrs, prec2chromIn
 #' # Cleanup
 #' rm(mzPntrs)
 #' file.remove(list.files(dataPath, pattern = "*_av.rds", full.names = TRUE))
-#' file.remove(list.files(file.path(dataPath, "mzml"), pattern = "^master[0-9]+\\.chrom\\.mzML$", full.names = TRUE))
+#' file.remove(list.files(file.path(dataPath, "xics"), pattern = "^master[0-9]+\\.chrom\\.mzML$", full.names = TRUE))
 #' }
 traverseDown <- function(tree, dataPath, fileInfo, multipeptide, prec2chromIndex, mzPntrs, precursors,
  adaptiveRTs, refRuns, params, applyFun = lapply){
@@ -364,7 +364,7 @@ traverseDown <- function(tree, dataPath, fileInfo, multipeptide, prec2chromIndex
 #' # Cleanup
 #' rm(mzPntrs)
 #' file.remove(file.path(dataPath, "master1_av.rds"))
-#' file.remove(file.path(dataPath, "mzml", "master1.chrom.mzML"))
+#' file.remove(file.path(dataPath, "xics", "master1.chrom.mzML"))
 #' }
 alignToMaster <- function(ref, eXp, alignedVecs, refRun, adaptiveRT, multipeptide, prec2chromIndex,
  mzPntrs, fileInfo, precursors, params, applyFun = lapply){

diff --git a/R/merge_osw_mzml.R b/R/merge_osw_mzml.R
@@ -30,16 +30,16 @@ mergeOswAnalytes_ChromHeader <- function(oswAnalytes, chromHead, analyteFDR = 1
 
 #' Get list of peptides and their chromatogram indices.
 #'
-#' This function reads all osw and mzml files in the directories at dataPath. It selects analytes which has associated features with m-score < maxFdrQuery.
-#' For these analytes it fetches chromatogram indices by matching transition_id(osw) with chromatogramID(mzml).
+#' This function reads all osw and xics files in the directories at dataPath. It selects analytes which has associated features with m-score < maxFdrQuery.
+#' For these analytes it fetches chromatogram indices by matching transition_id(osw) with chromatogramID(xics).
 #' @author Shubham Gupta, \email{[email protected]}
 #'
 #' ORCID: 0000-0003-3500-8152
 #'
 #' License: (c) Author (2019) + GPL-3
 #' Date: 2019-12-13
 #' @importFrom rlang .data
-#' @param dataPath (char) path to mzml and osw directory.
+#' @param dataPath (char) path to xics and osw directory.
 #' @param filenames (data-frame) column "filename" contains RUN table from osw files. column "runs" contain respective mzML names without extension.
 #' To get filenames use DIAlignR::getRunNames function.
 #' @param maxFdrQuery (numeric) A numeric value between 0 and 1. It is used to filter features from osw file which have SCORE_MS2.QVALUE less than itself.
@@ -155,7 +155,7 @@ mapPrecursorToChromIndices <- function(prec2transition, chromHead){
 
 #' Get chromatogram indices of precursors.
 #'
-#' This function reads the header of chromatogram files. It then fetches chromatogram indices by matching transition_id(osw) with chromatogramID(mzml).
+#' This function reads the header of chromatogram files. It then fetches chromatogram indices by matching transition_id(osw) with chromatogramID(xics).
 #' @author Shubham Gupta, \email{[email protected]}
 #'
 #' ORCID: 0000-0003-3500-8152

diff --git a/R/merge_runs.R b/R/merge_runs.R
@@ -1,7 +1,7 @@
 #' Create a child run from two parent runs
 #'
 #' Get merged features and merged chromatograms from parent runs. Chromatograms are written on the disk
-#' at dataPath/mzml. For each precursor aligned parent time-vectors and corresponding child time-vector
+#' at dataPath/xics. For each precursor aligned parent time-vectors and corresponding child time-vector
 #' are also calculated and written as *_av.rda at dataPath.
 #'
 #' @author Shubham Gupta, \email{[email protected]}
@@ -42,7 +42,7 @@
 #' multipeptide <- getNodeRun(runA = "run2", runB = "run0", mergeName = mergeName, dataPath = ".", fileInfo, features,
 #' mzPntrs, prec2chromIndex, precursors, params, adaptiveRTs, refRuns, multipeptide, peptideScores, ropenms)
 #' rm(mzPntrs)
-#' file.remove(file.path(".", "mzml", paste0(mergeName, ".chrom.mzML")))
+#' file.remove(file.path(".", "xics", paste0(mergeName, ".chrom.mzML")))
 #' file.remove(list.files(".", pattern = "*_av.rds", full.names = TRUE))
 #' }
 getNodeRun <- function(runA, runB, mergeName, dataPath, fileInfo, features, mzPntrs, prec2chromIndex,
@@ -134,10 +134,10 @@ getNodeRun <- function(runA, runB, mergeName, dataPath, fileInfo, features, mzPn
  ##### Write node mzML file #####
  mergedXICs <- unlist(mergedXICs, recursive = FALSE, use.names = FALSE)
  if(params[["chromFile"]] =="mzML"){
- fileName <- file.path(dataPath, "mzml", paste0(mergeName, ".chrom.mzML"))
+ fileName <- file.path(dataPath, "xics", paste0(mergeName, ".chrom.mzML"))
  createMZML(ropenms, fileName, mergedXICs, precursors$transition_ids)
  } else if(params[["chromFile"]] =="sqMass"){
- fileName <- file.path(dataPath, "mzml", paste0(mergeName, ".chrom.sqMass"))
+ fileName <- file.path(dataPath, "xics", paste0(mergeName, ".chrom.sqMass"))
  createSqMass(fileName, mergedXICs, precursors$transition_ids, params[["lossy"]])
  }
 

diff --git a/R/peak_area.R b/R/peak_area.R
@@ -67,7 +67,7 @@ newRow <- function(xics, left, right, RT, analyte, run, params){
 #' @importFrom magrittr %>%
 #' @inheritParams alignTargetedRuns
 #' @param peakTable (data-frame) usually an output of alignTargetedRuns. Must have these columns: run, precursor, leftWidth, rightWidth.
-#' @param dataPath (string) path to mzml and osw directory.
+#' @param dataPath (string) path to xics and osw directory.
 #' @param oswMerged (logical) TRUE for experiment-wide FDR and FALSE for run-specific FDR by pyprophet.
 #' @return (data-frame)
 #' @seealso \code{\link{alignTargetedRuns}, \link{calculateIntensity}}

diff --git a/R/progressive_alignment.R b/R/progressive_alignment.R
@@ -1,6 +1,6 @@
 #' Peptide quantification through progressive alignment
 #'
-#' This function expects osw and mzml directories at dataPath. It first reads osw files and fetches
+#' This function expects osw and xics directories at dataPath. It first reads osw files and fetches
 #' chromatogram indices for each analyte. To perform alignment, first a crude guide-tree is built which
 #' can also be provided with newickTree parameter. As we traverse from the leaf-nodes to the root node,
 #' runs are aligned pairwise. The root node is named master1 that has average of all fragment ion chromatograms
@@ -15,11 +15,11 @@
 #' Date: 2020-07-10
 #' @inheritParams checkParams
 #' @inheritParams alignTargetedRuns
-#' @param dataPath (string) path to mzml and osw directory.
+#' @param dataPath (string) path to xics and osw directory.
 #' @param outFile (string) name of the output file.
 #' @param ropenms (pyopenms module) get this python module through \code{\link{get_ropenms}}. Required only for chrom.mzML files.
 #' @param oswMerged (logical) TRUE for experiment-wide FDR and FALSE for run-specific FDR by pyprophet.
-#' @param runs (string) names of mzml file without extension.
+#' @param runs (string) names of xics file without extension.
 #' @param newickTree (string) guidance tree in newick format. Look up \code{\link{getTree}}.
 #' @return (None)
 #' @seealso \code{\link{alignTargetedRuns}}
@@ -33,7 +33,7 @@
 #' # Removing aligned vectors
 #' file.remove(list.files(dataPath, pattern = "*_av.rds", full.names = TRUE))
 #' # Removing temporarily created master chromatograms
-#' file.remove(list.files(file.path(dataPath, "mzml"), pattern = "^master[0-9]+\\.chrom\\.mzML$", full.names = TRUE))
+#' file.remove(list.files(file.path(dataPath, "xics"), pattern = "^master[0-9]+\\.chrom\\.mzML$", full.names = TRUE))
 #' file.remove(file.path(dataPath, "test3.temp.RData"))
 #' file.remove(file.path(dataPath, "master.merged.osw"))
 #' }

diff --git a/R/pyopenms.R b/R/pyopenms.R
@@ -55,7 +55,7 @@ addXIC <- function(ropenms, expriment, xic, nativeId){
 #' @seealso \code{\link{get_ropenms}, \link{addXIC}}
 #' @examples
 #' dataPath <- system.file("extdata", package = "DIAlignR")
-#' filename <- paste0(dataPath, "/mzml/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.mzML")
+#' filename <- paste0(dataPath, "/xics/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.mzML")
 #' data(XIC_QFNNTDIVLLEDFQK_3_DIAlignR)
 #' XICs <- XIC_QFNNTDIVLLEDFQK_3_DIAlignR[["hroest_K120808_Strep10%PlasmaBiolRepl1_R03_SW_filt"]]
 #' nativeIds <- list(27706:27711)
@@ -117,7 +117,7 @@ get_ropenms <- function(pythonPath = NULL, condaEnv = NULL, useConda=TRUE){
 
 notReady <- function(ropenms, dataPath, filename){
  mz = ropenms$OnDiscMSExperiment()
- #filename <- paste0(dataPath, "/mzml/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.mzML")
+ #filename <- paste0(dataPath, "/xics/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.mzML")
  mz$openFile(filename)
  meta_data <- mz$getMetaData()
  header <- meta_data$getChromatograms()

diff --git a/R/read_mzml.R b/R/read_mzml.R
@@ -7,15 +7,15 @@
 #'
 #' License: (c) Author (2019) + GPL-3
 #' Date: 2019-12-13
-#' @param mzmlName (char) path to mzml file.
+#' @param mzmlName (char) path to xics file.
 #' @return (A data-frame) It has 10 columns. The two important columns are:
 #' \item{chromatogramId}{(integer) Fragment-ion ID that matches with transition ID in osw file.}
 #' \item{chromatogramIndex}{(integer) Index of chromatogram in mzML file.}
 #'
 #' @keywords internal
 #' @examples
 #' dataPath <- system.file("extdata", package = "DIAlignR")
-#' mzmlName <-paste0(dataPath,"/mzml/hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt.chrom.mzML")
+#' mzmlName <-paste0(dataPath,"/xics/hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt.chrom.mzML")
 #' \dontrun{
 #' chromHead <- readChromatogramHeader(mzmlName = mzmlName)
 #' }
@@ -40,15 +40,15 @@ readMzMLHeader <- function(mzmlName){
 #'
 #' License: (c) Author (2020) + GPL-3
 #' Date: 2020-12-25
-#' @param mzmlName (char) path to mzml file.
+#' @param mzmlName (char) path to xics file.
 #' @return (A data-frame) It has 10 columns. The two important columns are:
 #' \item{chromatogramId}{(integer) Fragment-ion ID that matches with transition ID in osw file.}
 #' \item{chromatogramIndex}{(integer) Index of chromatogram in mzML file.}
 #'
 #' @keywords internal
 #' @examples
 #' dataPath <- system.file("extdata", package = "DIAlignR")
-#' sqName <-paste0(dataPath,"/mzml/hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt.chrom.sqMass")
+#' sqName <-paste0(dataPath,"/xics/hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt.chrom.sqMass")
 #' \dontrun{
 #' chromHead <- readChromatogramHeader(sqName)
 #' }

diff --git a/R/read_osw.R b/R/read_osw.R
@@ -68,7 +68,7 @@ fetchAnalytesInfo <- function(oswName, maxFdrQuery, oswMerged,
 #'
 #' License: (c) Author (2019) + GPL-3
 #' Date: 2019-12-13
-#' @param dataPath (char) path to mzml and osw directory.
+#' @param dataPath (char) path to xics and osw directory.
 #' @param filenames (data-frame) column "filename" contains RUN table from osw files. column "runs" contain respective mzML names without extension.
 #' To get filenames use \code{\link{getRunNames}} function.
 #' @param oswMerged (logical) TRUE for experiment-wide FDR and FALSE for run-specific FDR by pyprophet.

diff --git a/R/sqMass.R b/R/sqMass.R
@@ -90,7 +90,7 @@ createSqMass <- function(filename, XICs, transitionIDs, lossy){
 #' @return A numeric vector. Uncompressed form of the Blob.
 #' @examples
 #' dataPath <- system.file("extdata", package = "DIAlignR")
-#' sqName <- paste0(dataPath,"/mzml/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.sqMass")
+#' sqName <- paste0(dataPath,"/xics/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.sqMass")
 #' con <- DBI::dbConnect(RSQLite::SQLite(), dbname = sqName)
 #' df1 <- DBI::dbGetQuery(con, "SELECT CHROMATOGRAM_ID, COMPRESSION, DATA_TYPE, DATA FROM DATA WHERE CHROMATOGRAM_ID = 36;")
 #' DBI::dbDisconnect(con)