Skip to content

Commit

Permalink
Merge pull request #13 from singjc/master
Browse files Browse the repository at this point in the history
[MINOR] Changed assumed chromatogram folder from mzml to xics
  • Loading branch information
shubham1637 authored Feb 20, 2021
2 parents 31bfc4f + 2cbf21a commit 8d9435c
Show file tree
Hide file tree
Showing 54 changed files with 191 additions and 191 deletions.
8 changes: 4 additions & 4 deletions R/align_dia_runs.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' Outputs intensities for each analyte from aligned Targeted-MS runs
#'
#' This function expects osw and mzml directories at dataPath. It first reads osw files and fetches chromatogram indices for each analyte.
#' This function expects osw and xics directories at dataPath. It first reads osw files and fetches chromatogram indices for each analyte.
#' It then align XICs of its reference XICs. Best peak, which has lowest m-score, about the aligned retention time is picked for quantification.
#' @author Shubham Gupta, \email{[email protected]}
#'
Expand All @@ -10,10 +10,10 @@
#' Date: 2019-12-14
#' @importFrom dplyr %>%
#' @inheritParams checkParams
#' @param dataPath (string) path to mzml and osw directory.
#' @param dataPath (string) path to xics and osw directory.
#' @param outFile (string) name of the output file.
#' @param oswMerged (logical) TRUE for experiment-wide FDR and FALSE for run-specific FDR by pyprophet.
#' @param runs (a vector of string) names of mzml file without extension.
#' @param runs (a vector of string) names of xics file without extension.
#' @param refRun (string) reference for alignment. If no run is provided, m-score is used to select reference run.
#' @param applyFun (function) value must be either lapply or BiocParallel::bplapply.
#' @return An output table with following columns: precursor, run, intensity, RT, leftWidth, rightWidth,
Expand Down Expand Up @@ -173,7 +173,7 @@ alignTargetedRuns <- function(dataPath, outFile = "DIAlignR", params = paramsDIA

#' AlignObj for analytes between a pair of runs
#'
#' This function expects osw and mzml directories at dataPath. It first reads osw files and fetches chromatogram indices for each requested analyte.
#' This function expects osw and xics directories at dataPath. It first reads osw files and fetches chromatogram indices for each requested analyte.
#' It then align XICs of each analyte to its reference XICs. AlignObj is returned which contains aligned indices and cumulative score along the alignment path.
#' @author Shubham Gupta, \email{[email protected]}
#'
Expand Down
14 changes: 7 additions & 7 deletions R/get_filenames.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#'
#' License: (c) Author (2019) + GPL-3
#' Date: 2019-12-14
#' @param dataPath (char) path to mzml and osw directory.
#' @param dataPath (char) path to xics and osw directory.
#' @param pattern (char) must be either *.osw or *merged.osw .
#' @return A dataframe with three columns:
#' \item{spectraFile}{(string) as mentioned in RUN table of osw files.}
Expand Down Expand Up @@ -68,7 +68,7 @@ filenamesFromOSW <- function(dataPath, pattern){
#'
#' License: (c) Author (2019) + GPL-3
#' Date: 2019-12-14
#' @param dataPath (char) Path to mzml and osw directory.
#' @param dataPath (char) Path to xics and osw directory.
#' @return A dataframe with two columns:
#' \item{runName}{(string) contain respective mzML names without extension.}
#' \item{chromatogramFile}{(string) Path to the chromatogram file.}
Expand All @@ -81,10 +81,10 @@ filenamesFromOSW <- function(dataPath, pattern){
filenamesFromMZML <- function(dataPath, chromFile){
if(chromFile == "mzML") p <- ".chrom.mzML$"
if(chromFile == "sqMass") p <- ".chrom.sqMass$"
temp <- list.files(path = file.path(dataPath, "mzml"), pattern=p)
temp <- list.files(path = file.path(dataPath, "xics"), pattern=p)
message(length(temp), " ", sub("\\$","",p), " files are found.")
mzMLfiles <- vapply(temp, function(x) sub(p,"", x), "", USE.NAMES = FALSE)
output <- data.frame("runName" = mzMLfiles, "chromatogramFile" = file.path(dataPath, "mzml", temp))
output <- data.frame("runName" = mzMLfiles, "chromatogramFile" = file.path(dataPath, "xics", temp))
output[["chromatogramFile"]] <- as.character(output[["chromatogramFile"]]) # Convert from factor to character.
output[["runName"]] <- as.character(output[["runName"]]) # Convert from factor to character.
output
Expand All @@ -102,7 +102,7 @@ filenamesFromMZML <- function(dataPath, chromFile){
#' License: (c) Author (2019) + GPL-3
#' Date: 2019-12-14
#' @inheritParams checkParams
#' @param dataPath (char) Path to mzml and osw directory.
#' @param dataPath (char) Path to xics and osw directory.
#' @param oswMerged (logical) TRUE for experiment-wide FDR and FALSE for run-specific FDR by pyprophet.
#' @return (dataframe) it has five columns:
#' \item{spectraFile}{(string) as mentioned in RUN table of osw files.}
Expand All @@ -121,7 +121,7 @@ getRunNames <- function(dataPath, oswMerged = TRUE, params = paramsDIAlignR()){
} else{
filenames <- filenamesFromOSW(dataPath, pattern = "*merged.osw$")
}
# Get names of mzml files.
# Get names of xics files.
nameCutPattern = "(.*)(/)(.*)" # regex expression to fetch mzML file name from RUN.FILENAME columns of osw files.
runs <- vapply(filenames[["spectraFile"]], function(x) gsub(nameCutPattern, replacement = "\\3", x), "")
fileExtn <- strsplit(runs[[1]], "\\.")[[1]][2]
Expand All @@ -132,7 +132,7 @@ getRunNames <- function(dataPath, oswMerged = TRUE, params = paramsDIAlignR()){
# Check if osw files have corresponding mzML file.
runs <- intersect(filenames[["runName"]], mzMLfiles[["runName"]])
if(length(runs) != length(filenames[["runName"]])){
cat("Following files did not have their counterpart in mzml directory\n")
cat("Following files did not have their counterpart in xics directory\n")
print(setdiff(filenames[["runName"]], mzMLfiles[["runName"]]))
}
if(length(runs) == 0){
Expand Down
6 changes: 3 additions & 3 deletions R/get_peaks_chromatograms.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#' @keywords internal
#' @examples
#' dataPath <- system.file("extdata", package = "DIAlignR")
#' mzmlName<-paste0(dataPath,"/mzml/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.mzML")
#' mzmlName<-paste0(dataPath,"/xics/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.mzML")
#' mz <- mzR::openMSfile(mzmlName, backend = "pwiz")
#' chromIndices <- c(37L, 38L, 39L, 40L, 41L, 42L)
#' \dontrun{
Expand Down Expand Up @@ -45,7 +45,7 @@ extractXIC_group <- function(mz, chromIndices){
#' @keywords internal
#' @examples
#' dataPath <- system.file("extdata", package = "DIAlignR")
#' sqName <- paste0(dataPath,"/mzml/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.sqMass")
#' sqName <- paste0(dataPath,"/xics/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.sqMass")
#' chromIndices <- c(36L, 37L, 38L, 39L, 40L, 41L)
#' \dontrun{
#' con <- DBI::dbConnect(RSQLite::SQLite(), dbname = sqName)
Expand Down Expand Up @@ -165,7 +165,7 @@ getXICs4AlignObj <- function(mzPntrs, fileInfo, runs, prec2chromIndex, analytes)
#' @inheritParams checkParams
#' @param analytes (integer) a vector of precursor IDs.
#' @param runs (vector of string) names of mzML files without extension.
#' @param dataPath (string) Path to mzml and osw directory.
#' @param dataPath (string) Path to xics and osw directory.
#' @param maxFdrQuery (numeric) A numeric value between 0 and 1. It is used to filter features from osw file which have SCORE_MS2.QVALUE less than itself.
#' @param runType (char) This must be one of the strings "DIA_proteomics", "DIA_Metabolomics".
#' @param oswMerged (logical) TRUE for experiment-wide FDR and FALSE for run-specific FDR by pyprophet.
Expand Down
8 changes: 4 additions & 4 deletions R/merge_order.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ getNodeIDs <- function(tree){
#' @description {
#' While traversing from leaf to root node, at each node a master run is created.
#' Merged features and merged chromatograms from parent runs are estimated. Chromatograms are written on the disk
#' at dataPath/mzml. For each precursor aligned parent time-vectors and corresponding child time-vector
#' at dataPath/xics. For each precursor aligned parent time-vectors and corresponding child time-vector
#' are also calculated and written as *_av.rds at dataPath.
#'
#' Accesors to the new files are added to fileInfo, mzPntrs and prec2chromIndex. Features, reference
Expand Down Expand Up @@ -157,7 +157,7 @@ getNodeIDs <- function(tree){
#' rm(mzPntrs)
#' # Cleanup
#' file.remove(list.files(dataPath, pattern = "*_av.rds", full.names = TRUE))
#' file.remove(list.files(file.path(dataPath, "mzml"), pattern = "^master[0-9]+\\.chrom\\.mzML$", full.names = TRUE))
#' file.remove(list.files(file.path(dataPath, "xics"), pattern = "^master[0-9]+\\.chrom\\.mzML$", full.names = TRUE))
#' }
traverseUp <- function(tree, dataPath, fileInfo, features, mzPntrs, prec2chromIndex, precursors,
params, adaptiveRTs, refRuns, multipeptide, peptideScores, ropenms, applyFun = lapply){
Expand Down Expand Up @@ -228,7 +228,7 @@ traverseUp <- function(tree, dataPath, fileInfo, features, mzPntrs, prec2chromIn
#' # Cleanup
#' rm(mzPntrs)
#' file.remove(list.files(dataPath, pattern = "*_av.rds", full.names = TRUE))
#' file.remove(list.files(file.path(dataPath, "mzml"), pattern = "^master[0-9]+\\.chrom\\.mzML$", full.names = TRUE))
#' file.remove(list.files(file.path(dataPath, "xics"), pattern = "^master[0-9]+\\.chrom\\.mzML$", full.names = TRUE))
#' }
traverseDown <- function(tree, dataPath, fileInfo, multipeptide, prec2chromIndex, mzPntrs, precursors,
adaptiveRTs, refRuns, params, applyFun = lapply){
Expand Down Expand Up @@ -364,7 +364,7 @@ traverseDown <- function(tree, dataPath, fileInfo, multipeptide, prec2chromIndex
#' # Cleanup
#' rm(mzPntrs)
#' file.remove(file.path(dataPath, "master1_av.rds"))
#' file.remove(file.path(dataPath, "mzml", "master1.chrom.mzML"))
#' file.remove(file.path(dataPath, "xics", "master1.chrom.mzML"))
#' }
alignToMaster <- function(ref, eXp, alignedVecs, refRun, adaptiveRT, multipeptide, prec2chromIndex,
mzPntrs, fileInfo, precursors, params, applyFun = lapply){
Expand Down
8 changes: 4 additions & 4 deletions R/merge_osw_mzml.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,16 @@ mergeOswAnalytes_ChromHeader <- function(oswAnalytes, chromHead, analyteFDR = 1

#' Get list of peptides and their chromatogram indices.
#'
#' This function reads all osw and mzml files in the directories at dataPath. It selects analytes which has associated features with m-score < maxFdrQuery.
#' For these analytes it fetches chromatogram indices by matching transition_id(osw) with chromatogramID(mzml).
#' This function reads all osw and xics files in the directories at dataPath. It selects analytes which has associated features with m-score < maxFdrQuery.
#' For these analytes it fetches chromatogram indices by matching transition_id(osw) with chromatogramID(xics).
#' @author Shubham Gupta, \email{[email protected]}
#'
#' ORCID: 0000-0003-3500-8152
#'
#' License: (c) Author (2019) + GPL-3
#' Date: 2019-12-13
#' @importFrom rlang .data
#' @param dataPath (char) path to mzml and osw directory.
#' @param dataPath (char) path to xics and osw directory.
#' @param filenames (data-frame) column "filename" contains RUN table from osw files. column "runs" contain respective mzML names without extension.
#' To get filenames use DIAlignR::getRunNames function.
#' @param maxFdrQuery (numeric) A numeric value between 0 and 1. It is used to filter features from osw file which have SCORE_MS2.QVALUE less than itself.
Expand Down Expand Up @@ -155,7 +155,7 @@ mapPrecursorToChromIndices <- function(prec2transition, chromHead){

#' Get chromatogram indices of precursors.
#'
#' This function reads the header of chromatogram files. It then fetches chromatogram indices by matching transition_id(osw) with chromatogramID(mzml).
#' This function reads the header of chromatogram files. It then fetches chromatogram indices by matching transition_id(osw) with chromatogramID(xics).
#' @author Shubham Gupta, \email{[email protected]}
#'
#' ORCID: 0000-0003-3500-8152
Expand Down
8 changes: 4 additions & 4 deletions R/merge_runs.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' Create a child run from two parent runs
#'
#' Get merged features and merged chromatograms from parent runs. Chromatograms are written on the disk
#' at dataPath/mzml. For each precursor aligned parent time-vectors and corresponding child time-vector
#' at dataPath/xics. For each precursor aligned parent time-vectors and corresponding child time-vector
#' are also calculated and written as *_av.rda at dataPath.
#'
#' @author Shubham Gupta, \email{[email protected]}
Expand Down Expand Up @@ -42,7 +42,7 @@
#' multipeptide <- getNodeRun(runA = "run2", runB = "run0", mergeName = mergeName, dataPath = ".", fileInfo, features,
#' mzPntrs, prec2chromIndex, precursors, params, adaptiveRTs, refRuns, multipeptide, peptideScores, ropenms)
#' rm(mzPntrs)
#' file.remove(file.path(".", "mzml", paste0(mergeName, ".chrom.mzML")))
#' file.remove(file.path(".", "xics", paste0(mergeName, ".chrom.mzML")))
#' file.remove(list.files(".", pattern = "*_av.rds", full.names = TRUE))
#' }
getNodeRun <- function(runA, runB, mergeName, dataPath, fileInfo, features, mzPntrs, prec2chromIndex,
Expand Down Expand Up @@ -134,10 +134,10 @@ getNodeRun <- function(runA, runB, mergeName, dataPath, fileInfo, features, mzPn
##### Write node mzML file #####
mergedXICs <- unlist(mergedXICs, recursive = FALSE, use.names = FALSE)
if(params[["chromFile"]] =="mzML"){
fileName <- file.path(dataPath, "mzml", paste0(mergeName, ".chrom.mzML"))
fileName <- file.path(dataPath, "xics", paste0(mergeName, ".chrom.mzML"))
createMZML(ropenms, fileName, mergedXICs, precursors$transition_ids)
} else if(params[["chromFile"]] =="sqMass"){
fileName <- file.path(dataPath, "mzml", paste0(mergeName, ".chrom.sqMass"))
fileName <- file.path(dataPath, "xics", paste0(mergeName, ".chrom.sqMass"))
createSqMass(fileName, mergedXICs, precursors$transition_ids, params[["lossy"]])
}

Expand Down
2 changes: 1 addition & 1 deletion R/peak_area.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ newRow <- function(xics, left, right, RT, analyte, run, params){
#' @importFrom magrittr %>%
#' @inheritParams alignTargetedRuns
#' @param peakTable (data-frame) usually an output of alignTargetedRuns. Must have these columns: run, precursor, leftWidth, rightWidth.
#' @param dataPath (string) path to mzml and osw directory.
#' @param dataPath (string) path to xics and osw directory.
#' @param oswMerged (logical) TRUE for experiment-wide FDR and FALSE for run-specific FDR by pyprophet.
#' @return (data-frame)
#' @seealso \code{\link{alignTargetedRuns}, \link{calculateIntensity}}
Expand Down
8 changes: 4 additions & 4 deletions R/progressive_alignment.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' Peptide quantification through progressive alignment
#'
#' This function expects osw and mzml directories at dataPath. It first reads osw files and fetches
#' This function expects osw and xics directories at dataPath. It first reads osw files and fetches
#' chromatogram indices for each analyte. To perform alignment, first a crude guide-tree is built which
#' can also be provided with newickTree parameter. As we traverse from the leaf-nodes to the root node,
#' runs are aligned pairwise. The root node is named master1 that has average of all fragment ion chromatograms
Expand All @@ -15,11 +15,11 @@
#' Date: 2020-07-10
#' @inheritParams checkParams
#' @inheritParams alignTargetedRuns
#' @param dataPath (string) path to mzml and osw directory.
#' @param dataPath (string) path to xics and osw directory.
#' @param outFile (string) name of the output file.
#' @param ropenms (pyopenms module) get this python module through \code{\link{get_ropenms}}. Required only for chrom.mzML files.
#' @param oswMerged (logical) TRUE for experiment-wide FDR and FALSE for run-specific FDR by pyprophet.
#' @param runs (string) names of mzml file without extension.
#' @param runs (string) names of xics file without extension.
#' @param newickTree (string) guidance tree in newick format. Look up \code{\link{getTree}}.
#' @return (None)
#' @seealso \code{\link{alignTargetedRuns}}
Expand All @@ -33,7 +33,7 @@
#' # Removing aligned vectors
#' file.remove(list.files(dataPath, pattern = "*_av.rds", full.names = TRUE))
#' # Removing temporarily created master chromatograms
#' file.remove(list.files(file.path(dataPath, "mzml"), pattern = "^master[0-9]+\\.chrom\\.mzML$", full.names = TRUE))
#' file.remove(list.files(file.path(dataPath, "xics"), pattern = "^master[0-9]+\\.chrom\\.mzML$", full.names = TRUE))
#' file.remove(file.path(dataPath, "test3.temp.RData"))
#' file.remove(file.path(dataPath, "master.merged.osw"))
#' }
Expand Down
4 changes: 2 additions & 2 deletions R/pyopenms.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ addXIC <- function(ropenms, expriment, xic, nativeId){
#' @seealso \code{\link{get_ropenms}, \link{addXIC}}
#' @examples
#' dataPath <- system.file("extdata", package = "DIAlignR")
#' filename <- paste0(dataPath, "/mzml/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.mzML")
#' filename <- paste0(dataPath, "/xics/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.mzML")
#' data(XIC_QFNNTDIVLLEDFQK_3_DIAlignR)
#' XICs <- XIC_QFNNTDIVLLEDFQK_3_DIAlignR[["hroest_K120808_Strep10%PlasmaBiolRepl1_R03_SW_filt"]]
#' nativeIds <- list(27706:27711)
Expand Down Expand Up @@ -117,7 +117,7 @@ get_ropenms <- function(pythonPath = NULL, condaEnv = NULL, useConda=TRUE){

notReady <- function(ropenms, dataPath, filename){
mz = ropenms$OnDiscMSExperiment()
#filename <- paste0(dataPath, "/mzml/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.mzML")
#filename <- paste0(dataPath, "/xics/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.mzML")
mz$openFile(filename)
meta_data <- mz$getMetaData()
header <- meta_data$getChromatograms()
Expand Down
8 changes: 4 additions & 4 deletions R/read_mzml.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@
#'
#' License: (c) Author (2019) + GPL-3
#' Date: 2019-12-13
#' @param mzmlName (char) path to mzml file.
#' @param mzmlName (char) path to xics file.
#' @return (A data-frame) It has 10 columns. The two important columns are:
#' \item{chromatogramId}{(integer) Fragment-ion ID that matches with transition ID in osw file.}
#' \item{chromatogramIndex}{(integer) Index of chromatogram in mzML file.}
#'
#' @keywords internal
#' @examples
#' dataPath <- system.file("extdata", package = "DIAlignR")
#' mzmlName <-paste0(dataPath,"/mzml/hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt.chrom.mzML")
#' mzmlName <-paste0(dataPath,"/xics/hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt.chrom.mzML")
#' \dontrun{
#' chromHead <- readChromatogramHeader(mzmlName = mzmlName)
#' }
Expand All @@ -40,15 +40,15 @@ readMzMLHeader <- function(mzmlName){
#'
#' License: (c) Author (2020) + GPL-3
#' Date: 2020-12-25
#' @param mzmlName (char) path to mzml file.
#' @param mzmlName (char) path to xics file.
#' @return (A data-frame) It has 10 columns. The two important columns are:
#' \item{chromatogramId}{(integer) Fragment-ion ID that matches with transition ID in osw file.}
#' \item{chromatogramIndex}{(integer) Index of chromatogram in mzML file.}
#'
#' @keywords internal
#' @examples
#' dataPath <- system.file("extdata", package = "DIAlignR")
#' sqName <-paste0(dataPath,"/mzml/hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt.chrom.sqMass")
#' sqName <-paste0(dataPath,"/xics/hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt.chrom.sqMass")
#' \dontrun{
#' chromHead <- readChromatogramHeader(sqName)
#' }
Expand Down
2 changes: 1 addition & 1 deletion R/read_osw.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ fetchAnalytesInfo <- function(oswName, maxFdrQuery, oswMerged,
#'
#' License: (c) Author (2019) + GPL-3
#' Date: 2019-12-13
#' @param dataPath (char) path to mzml and osw directory.
#' @param dataPath (char) path to xics and osw directory.
#' @param filenames (data-frame) column "filename" contains RUN table from osw files. column "runs" contain respective mzML names without extension.
#' To get filenames use \code{\link{getRunNames}} function.
#' @param oswMerged (logical) TRUE for experiment-wide FDR and FALSE for run-specific FDR by pyprophet.
Expand Down
2 changes: 1 addition & 1 deletion R/sqMass.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ createSqMass <- function(filename, XICs, transitionIDs, lossy){
#' @return A numeric vector. Uncompressed form of the Blob.
#' @examples
#' dataPath <- system.file("extdata", package = "DIAlignR")
#' sqName <- paste0(dataPath,"/mzml/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.sqMass")
#' sqName <- paste0(dataPath,"/xics/hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt.chrom.sqMass")
#' con <- DBI::dbConnect(RSQLite::SQLite(), dbname = sqName)
#' df1 <- DBI::dbGetQuery(con, "SELECT CHROMATOGRAM_ID, COMPRESSION, DATA_TYPE, DATA FROM DATA WHERE CHROMATOGRAM_ID = 36;")
#' DBI::dbDisconnect(con)
Expand Down
Loading

0 comments on commit 8d9435c

Please sign in to comment.