From 2261927f5a60de62c7a365a751d7119995e3443f Mon Sep 17 00:00:00 2001 From: Mark Tomko <610104+mtomko@users.noreply.github.com> Date: Tue, 13 Feb 2024 14:57:27 -0800 Subject: [PATCH] Condition report * Rename parameter * Add output file type * case class hygiene * Add output file to config * Write summary file * Update existing tests * Test for summary writer * Add file to command line parser * Update readme and manual * Set version to 3.11.0-SNAPSHOT * Update changelog --- CHANGELOG.md | 3 + README.md | 15 ++- docs/MANUAL.md | 9 +- .../org/broadinstitute/gpp/poolq3/PoolQ.scala | 19 +++- .../gpp/poolq3/PoolQConfig.scala | 6 + .../gpp/poolq3/reports/QualityWriter.scala | 78 ++++++++----- .../gpp/poolq3/types/OutputFileType.scala | 1 + .../gpp/poolq3/types/PoolQSummary.scala | 2 +- .../integration/UnlabeledConditionsTest.scala | 2 + .../legacy/LegacyIntegrationTest.scala | 14 +++ .../ConditionBarcodeCountsSummaryTest.scala | 104 ++++++++++++++++++ version.sbt | 2 +- 12 files changed, 213 insertions(+), 42 deletions(-) create mode 100644 src/test/scala/org/broadinstitute/gpp/poolq3/reports/ConditionBarcodeCountsSummaryTest.scala diff --git a/CHANGELOG.md b/CHANGELOG.md index dfeb748..109badd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 3.11.0 +* Machine-parseable condition barcode summary file + ## 3.10.0 * More efficient and memory-safe sampling technique for unexpected sequence reporting diff --git a/README.md b/README.md index 9eaf6b5..b40bd01 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,6 @@ other information that can be used to troubleshoot experiments. These include ma locations, matching correlations between barcodes, and lists of frequently-occurring unknown barcodes. ## Documentation - For information on how to run PoolQ and its various modes and options, please see the [manual](docs/MANUAL.md). We also maintain a [changelog](CHANGELOG.md) listing updates made to PoolQ. @@ -41,13 +40,13 @@ associated licenses. PoolQ was completely rewritten for version 3. The new code is faster and the codebase is much cleaner and more maintainable. We have taken the opportunity to make other changes to PoolQ as well. -- There are substantial changes to the command-line interface for the program. -- The default counts file format has changed slightly, although there is a command-line - argument that indicates that PoolQ 3 should write a backwards-compatible counts file. The differences - are in headers only; file parsers should be able to adapt easily. -- The quality file has changed somewhat. Importantly, the definition of certain statistics has changed - slightly, so quality metrics cannot be directly compared between the the new and old versions. In addition, - we no longer provide normalized match counts. +* There are substantial changes to the command-line interface for the program. +* The default counts file format has changed slightly, although there is a command-line +argument that indicates that PoolQ 3 should write a backwards-compatible counts file. The differences +are in headers only; file parsers should be able to adapt easily. +* The quality file has changed somewhat. Importantly, the definition of certain statistics has changed +slightly, so quality metrics cannot be directly compared between the the new and old versions. In addition, +we no longer provide normalized match counts. See the [manual](docs/MANUAL.md) for complete details on the differences versions 2 and 3. diff --git a/docs/MANUAL.md b/docs/MANUAL.md index ebd8d91..8b724c4 100644 --- a/docs/MANUAL.md +++ b/docs/MANUAL.md @@ -2,7 +2,7 @@ PoolQ is a counter for indexed samples from next-gen sequencing of pooled DNA. -_This documentation covers PoolQ version 3.10.0 (last updated 02/12/2024)._ +_This documentation covers PoolQ version 3.11.0 (last updated 02/13/2024)._ ## Background @@ -559,7 +559,7 @@ PoolQ you will need a Java 8 JDK. You can download an appropriate JRE or JDK fro You can download PoolQ from an as yet undetermined location. The file you download is a ZIP file that you will need to unzip. In most cases, this is as simple as right-clicking on the zip file, and selecting something like "extract contents" from the popup menu. This will create a new folder on -your computer named `poolq-3.10.0`, with the following contents: +your computer named `poolq-3.11.0`, with the following contents: - `poolq3.jar` - `poolq3.bat` @@ -610,7 +610,7 @@ You can run PoolQ from any Windows, Mac, or Linux machine, but it requires some how to launch programs from the command line on your given operating system. 1. Open a terminal window for your operating system -2. Change directories to the `poolq-3.10.0` directory +2. Change directories to the `poolq-3.11.0` directory - On Windows, run: @@ -627,7 +627,7 @@ how to launch programs from the command line on your given operating system. If you successfully launched PoolQ, you should see a usage message explaining all of the command-line options: - poolq3 3.10.0 + poolq3 3.11.0 Usage: poolq [options] --row-reference reference file for row barcodes (i.e., constructs) @@ -652,6 +652,7 @@ command-line options: --umi-counts-dir --umi-barcode-counts-dir --quality + --condition-barcode-counts-summary --counts --normalized-counts --barcode-counts diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/PoolQ.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/PoolQ.scala index a100542..fca9e06 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/PoolQ.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/PoolQ.scala @@ -33,6 +33,7 @@ import org.broadinstitute.gpp.poolq3.reports.{ } import org.broadinstitute.gpp.poolq3.types.{ BarcodeCountsFileType, + ConditionBarcodeCountsSummaryFileType, CountsFileType, LogNormalizedCountsFileType, OutputFileType, @@ -49,7 +50,14 @@ object PoolQ { private[this] val log: Logger = getLogger private[this] val AlwaysWrittenFiles: Set[OutputFileType] = - Set(CountsFileType, QualityFileType, LogNormalizedCountsFileType, BarcodeCountsFileType, RunInfoFileType) + Set( + CountsFileType, + QualityFileType, + ConditionBarcodeCountsSummaryFileType, + LogNormalizedCountsFileType, + BarcodeCountsFileType, + RunInfoFileType + ) final def main(args: Array[String]): Unit = PoolQConfig.parse(args) match { @@ -169,7 +177,14 @@ object PoolQ { config.reportsDialect ) _ = log.info(s"Writing quality file ${config.output.qualityFile}") - _ <- QualityWriter.write(config.output.qualityFile, state, rowReference, colReference, config.isPairedEnd) + _ <- QualityWriter.write( + config.output.qualityFile, + config.output.conditionBarcodeCountsSummaryFile, + state, + rowReference, + colReference, + config.isPairedEnd + ) _ <- umiInfo.fold(().pure[Try])(_ => UmiQualityWriter.write(config.output.umiQualityFile, state)) _ = log.info(s"Writing log-normalized counts file ${config.output.normalizedCountsFile}") normalizedCounts = LogNormalizedCountsWriter.logNormalizedCounts(counts, rowReference, colReference) diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/PoolQConfig.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/PoolQConfig.scala index 7e86223..f7daa8c 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/PoolQConfig.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/PoolQConfig.scala @@ -74,6 +74,7 @@ final case class PoolQOutput( normalizedCountsFile: Path = Paths.get("lognormalized-counts.txt"), barcodeCountsFile: Path = Paths.get("barcode-counts.txt"), qualityFile: Path = Paths.get("quality.txt"), + conditionBarcodeCountsSummaryFile: Path = Paths.get("condition-barcode-counts-summary.txt"), correlationFile: Path = Paths.get("correlation.txt"), unexpectedSequencesFile: Path = Paths.get("unexpected-sequences.txt"), umiQualityFile: Path = Paths.get("umi-quality.txt"), @@ -253,6 +254,11 @@ object PoolQConfig { val _ = opt[Path]("quality").valueName("").action((f, c) => c.copy(output = c.output.copy(qualityFile = f))) + val _ = + opt[Path]("condition-barcode-counts-summary") + .valueName("") + .action((f, c) => c.copy(output = c.output.copy(conditionBarcodeCountsSummaryFile = f))) + val _ = opt[Path]("counts").valueName("").action((f, c) => c.copy(output = c.output.copy(countsFile = f))) val _ = opt[Path]("normalized-counts").valueName("").action { (f, c) => diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/QualityWriter.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/QualityWriter.scala index 987eac7..ff050ab 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/QualityWriter.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/QualityWriter.scala @@ -16,17 +16,39 @@ import org.broadinstitute.gpp.poolq3.reference.Reference object QualityWriter { + class TeeWriter(w1: PrintWriter, w2: PrintWriter) { + + def print(s: String): Unit = { + w1.print(s) + w2.print(s) + } + + def println(s: String): Unit = { + w1.println(s) + w2.println(s) + } + + def println(): Unit = { + w1.println() + w2.println() + } + + } + def write( - file: Path, + qualityFile: Path, + conditionBarcodeCountsSummaryFile: Path, state: State, rowReference: Reference, colReference: Reference, isPairedEnd: Boolean ): Try[Unit] = - Using(new PrintWriter(file.toFile)) { writer => - val barcodeLocationStats = - if (isPairedEnd) { - s"""Reads with no construct barcode: ${state.rowBarcodeNotFound + state.revRowBarcodeNotFound - state.neitherRowBarcodeFound} + Try { + Using.resources(new PrintWriter(qualityFile.toFile), new PrintWriter(conditionBarcodeCountsSummaryFile.toFile)) { + case (qualityWriter, cbcsWriter) => + val barcodeLocationStats = + if (isPairedEnd) { + s"""Reads with no construct barcode: ${state.rowBarcodeNotFound + state.revRowBarcodeNotFound - state.neitherRowBarcodeFound} | |Reads with no forward construct barcode: ${state.rowBarcodeNotFound} |Max forward construct barcode index: ${state.rowBarcodeStats.maxPosStr} @@ -38,15 +60,15 @@ object QualityWriter { |Min reverse construct barcode index: ${state.revRowBarcodeStats.minPosStr} |Avg reverse construct barcode index: ${decOptFmt(state.revRowBarcodeStats.avg)}""".stripMargin - } else { - s"""Reads with no construct barcode: ${state.rowBarcodeNotFound} + } else { + s"""Reads with no construct barcode: ${state.rowBarcodeNotFound} |Max construct barcode index: ${state.rowBarcodeStats.maxPosStr} |Min construct barcode index: ${state.rowBarcodeStats.minPosStr} |Avg construct barcode index: ${decOptFmt(state.rowBarcodeStats.avg)}""".stripMargin - } + } - val header = - s"""Total reads: ${state.reads} + val header = + s"""Total reads: ${state.reads} |Matching reads: ${state.matches} |1-base mismatch reads: ${state.matches - state.exactMatches} | @@ -55,25 +77,29 @@ object QualityWriter { |$barcodeLocationStats |""".stripMargin - writer.println(header) + qualityWriter.println(header) - writer.println(s"Read counts for sample barcodes with associated conditions:") - writer.println( - s"Barcode\tCondition\tMatched (Construct+Sample Barcode)\tMatched Sample Barcode\t% Match\tNormalized Match" - ) - colReference.allBarcodes.foreach { colBarcode => - val data = perBarcodeQualityData(state, rowReference, colReference, colBarcode) - writer.println(data.mkString("\t")) - } + qualityWriter.println(s"Read counts for sample barcodes with associated conditions:") + + // use a TeeWriter for the next section of the report + val tw = new TeeWriter(qualityWriter, cbcsWriter) + tw.println( + s"Barcode\tCondition\tMatched (Construct+Sample Barcode)\tMatched Sample Barcode\t% Match\tNormalized Match" + ) + colReference.allBarcodes.foreach { colBarcode => + val data = perBarcodeQualityData(state, rowReference, colReference, colBarcode) + tw.println(data.mkString("\t")) + } - writer.println() - writer.println("Read counts for most common sample barcodes without associated conditions:") - val unepectedBarcodeFrequencies = - state.unknownCol.keys.map(barcode => BarcodeFrequency(barcode, state.unknownCol.count(barcode))).toSeq - topN(unepectedBarcodeFrequencies, 100).foreach { case BarcodeFrequency(barcode, count) => - writer.println(barcode + "\t" + count.toString) + qualityWriter.println() + qualityWriter.println("Read counts for most common sample barcodes without associated conditions:") + val unepectedBarcodeFrequencies = + state.unknownCol.keys.map(barcode => BarcodeFrequency(barcode, state.unknownCol.count(barcode))).toSeq + topN(unepectedBarcodeFrequencies, 100).foreach { case BarcodeFrequency(barcode, count) => + qualityWriter.println(barcode + "\t" + count.toString) + } + qualityWriter.println() } - writer.println() } private[this] def decOptFmt(d: Option[Double]): String = d.map(Decimal00Format.format).getOrElse("N/A") diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/types/OutputFileType.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/types/OutputFileType.scala index e4dfaae..3dfccec 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/types/OutputFileType.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/types/OutputFileType.scala @@ -8,6 +8,7 @@ package org.broadinstitute.gpp.poolq3.types trait OutputFileType extends Product with Serializable case object CountsFileType extends OutputFileType case object QualityFileType extends OutputFileType +case object ConditionBarcodeCountsSummaryFileType extends OutputFileType case object LogNormalizedCountsFileType extends OutputFileType case object BarcodeCountsFileType extends OutputFileType case object CorrelationFileType extends OutputFileType diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/types/PoolQSummary.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/types/PoolQSummary.scala index 03832a7..33fad18 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/types/PoolQSummary.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/types/PoolQSummary.scala @@ -5,4 +5,4 @@ */ package org.broadinstitute.gpp.poolq3.types -case class PoolQSummary(runSummary: PoolQRunSummary, outputFiles: Set[OutputFileType]) +final case class PoolQSummary(runSummary: PoolQRunSummary, outputFiles: Set[OutputFileType]) diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UnlabeledConditionsTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UnlabeledConditionsTest.scala index 09b9611..12c6938 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UnlabeledConditionsTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/UnlabeledConditionsTest.scala @@ -23,6 +23,7 @@ class UnlabeledConditionsTest extends CatsEffectSuite with TestResources { barcodeCountsFile <- tempFile[IO]("barcode-counts", ".txt") normalizedCountsFile <- tempFile[IO]("normcounts", ".txt") qualityFile <- tempFile[IO]("quality", ".txt") + conditionBarcodeCountsSummaryFile <- tempFile[IO]("condition-barcode-counts-summary", ".txt") correlationFile <- tempFile[IO]("correlation", ".txt") unexpectedSequencesFile <- tempFile[IO]("unexpected", ".txt") runInfoFile <- tempFile[IO]("runinfo", ".txt") @@ -32,6 +33,7 @@ class UnlabeledConditionsTest extends CatsEffectSuite with TestResources { normalizedCountsFile = normalizedCountsFile, barcodeCountsFile = barcodeCountsFile, qualityFile = qualityFile, + conditionBarcodeCountsSummaryFile = conditionBarcodeCountsSummaryFile, correlationFile = correlationFile, unexpectedSequencesFile = unexpectedSequencesFile, runInfoFile = runInfoFile diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/legacy/LegacyIntegrationTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/legacy/LegacyIntegrationTest.scala index b08e733..2333ec3 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/integration/legacy/LegacyIntegrationTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/integration/legacy/LegacyIntegrationTest.scala @@ -33,6 +33,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { barcodeCountsFile <- File.temporaryFile("barcode-counts", ".txt") normalizedCountsFile <- File.temporaryFile("normcounts", ".txt") qualityFile <- File.temporaryFile("quality", ".txt") + conditionBarcodeCountsSummaryFile <- File.temporaryFile("condition-barcode-counts-summary", ".txt") correlationFile <- File.temporaryFile("correlation", ".txt") unexpectedSequencesFile <- File.temporaryFile("unexpected", ".txt") unexpectedSequenceCacheDir <- File.temporaryDirectory("unexpected-cache") @@ -49,6 +50,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { normalizedCountsFile = normalizedCountsFile.toJava.toPath, barcodeCountsFile = barcodeCountsFile.toJava.toPath, qualityFile = qualityFile.toJava.toPath, + conditionBarcodeCountsSummaryFile = conditionBarcodeCountsSummaryFile.toJava.toPath, correlationFile = correlationFile.toJava.toPath, unexpectedSequencesFile = unexpectedSequencesFile.toJava.toPath, runInfoFile = runInfoFile.toJava.toPath @@ -75,6 +77,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { barcodeCountsFile <- File.temporaryFile("barcode-counts", ".txt") normalizedCountsFile <- File.temporaryFile("normcounts", ".txt") qualityFile <- File.temporaryFile("quality", ".txt") + conditionBarcodeCountsSummaryFile <- File.temporaryFile("condition-barcode-counts-summary", ".txt") correlationFile <- File.temporaryFile("correlation", ".txt") unexpectedSequencesFile <- File.temporaryFile("unexpected", ".txt") unexpectedSequenceCacheDir <- File.temporaryDirectory("unexpected-cache") @@ -91,6 +94,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { normalizedCountsFile = normalizedCountsFile.toJava.toPath, barcodeCountsFile = barcodeCountsFile.toJava.toPath, qualityFile = qualityFile.toJava.toPath, + conditionBarcodeCountsSummaryFile = conditionBarcodeCountsSummaryFile.toJava.toPath, correlationFile = correlationFile.toJava.toPath, unexpectedSequencesFile = unexpectedSequencesFile.toJava.toPath, runInfoFile = runInfoFile.toJava.toPath @@ -126,6 +130,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { normalizedCountsFile <- File.temporaryFile("normcounts", ".txt") barcodeCountsFile <- File.temporaryFile("barcode-counts", ".txt") qualityFile <- File.temporaryFile("quality", ".txt") + conditionBarcodeCountsSummaryFile <- File.temporaryFile("condition-barcode-counts-summary", ".txt") correlationFile <- File.temporaryFile("correlation", ".txt") unexpectedSequencesFile <- File.temporaryFile("unexpected", ".txt") unexpectedSequenceCacheDir <- File.temporaryDirectory("unexpected-cache") @@ -142,6 +147,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { normalizedCountsFile = normalizedCountsFile.toJava.toPath, barcodeCountsFile = barcodeCountsFile.toJava.toPath, qualityFile = qualityFile.toJava.toPath, + conditionBarcodeCountsSummaryFile = conditionBarcodeCountsSummaryFile.toJava.toPath, correlationFile = correlationFile.toJava.toPath, unexpectedSequencesFile = unexpectedSequencesFile.toJava.toPath, runInfoFile = runInfoFile.toJava.toPath @@ -170,6 +176,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { normalizedCountsFile <- File.temporaryFile("normcounts", ".txt") barcodeCountsFile <- File.temporaryFile("barcode-counts", ".txt") qualityFile <- File.temporaryFile("quality", ".txt") + conditionBarcodeCountsSummaryFile <- File.temporaryFile("condition-barcode-counts-summary", ".txt") correlationFile <- File.temporaryFile("correlation", ".txt") unexpectedSequencesFile <- File.temporaryFile("unexpected", ".txt") unexpectedSequenceCacheDir <- File.temporaryDirectory("unexpected-cache") @@ -186,6 +193,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { normalizedCountsFile = normalizedCountsFile.toJava.toPath, barcodeCountsFile = barcodeCountsFile.toJava.toPath, qualityFile = qualityFile.toJava.toPath, + conditionBarcodeCountsSummaryFile = conditionBarcodeCountsSummaryFile.toJava.toPath, correlationFile = correlationFile.toJava.toPath, unexpectedSequencesFile = unexpectedSequencesFile.toJava.toPath, runInfoFile = runInfoFile.toJava.toPath @@ -215,6 +223,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { normalizedCountsFile <- File.temporaryFile("normcounts", ".txt") barcodeCountsFile <- File.temporaryFile("barcode-counts", ".txt") qualityFile <- File.temporaryFile("quality", ".txt") + conditionBarcodeCountsSummaryFile <- File.temporaryFile("condition-barcode-counts-summary", ".txt") correlationFile <- File.temporaryFile("correlation", ".txt") unexpectedSequencesFile <- File.temporaryFile("unexpected", ".txt") unexpectedSequenceCacheDir <- File.temporaryDirectory("unexpected-cache") @@ -232,6 +241,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { normalizedCountsFile = normalizedCountsFile.toJava.toPath, barcodeCountsFile = barcodeCountsFile.toJava.toPath, qualityFile = qualityFile.toJava.toPath, + conditionBarcodeCountsSummaryFile = conditionBarcodeCountsSummaryFile.toJava.toPath, correlationFile = correlationFile.toJava.toPath, unexpectedSequencesFile = unexpectedSequencesFile.toJava.toPath, runInfoFile = runInfoFile.toJava.toPath @@ -255,6 +265,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { normalizedCountsFile <- File.temporaryFile("normcounts", ".txt") barcodeCountsFile <- File.temporaryFile("barcode-counts", ".txt") qualityFile <- File.temporaryFile("quality", ".txt") + conditionBarcodeCountsSummaryFile <- File.temporaryFile("condition-barcode-counts-summary", ".txt") correlationFile <- File.temporaryFile("correlation", ".txt") unexpectedSequencesFile <- File.temporaryFile("unexpected", ".txt") unexpectedSequenceCacheDir <- File.temporaryDirectory("unexpected-cache") @@ -272,6 +283,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { normalizedCountsFile = normalizedCountsFile.toJava.toPath, barcodeCountsFile = barcodeCountsFile.toJava.toPath, qualityFile = qualityFile.toJava.toPath, + conditionBarcodeCountsSummaryFile = conditionBarcodeCountsSummaryFile.toJava.toPath, correlationFile = correlationFile.toJava.toPath, unexpectedSequencesFile = unexpectedSequencesFile.toJava.toPath, runInfoFile = runInfoFile.toJava.toPath @@ -299,6 +311,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { normalizedCountsFile <- File.temporaryFile("normcounts", ".txt") barcodeCountsFile <- File.temporaryFile("barcode-counts", ".txt") qualityFile <- File.temporaryFile("quality", ".txt") + conditionBarcodeCountsSummaryFile <- File.temporaryFile("condition-barcode-counts-summary", ".txt") correlationFile <- File.temporaryFile("correlation", ".txt") unexpectedSequencesFile <- File.temporaryFile("unexpected", ".txt") unexpectedSequenceCacheDir <- File.temporaryDirectory("unexpected-cache") @@ -315,6 +328,7 @@ class LegacyIntegrationTest extends AnyFlatSpec with TestResources { normalizedCountsFile = normalizedCountsFile.toJava.toPath, barcodeCountsFile = barcodeCountsFile.toJava.toPath, qualityFile = qualityFile.toJava.toPath, + conditionBarcodeCountsSummaryFile = conditionBarcodeCountsSummaryFile.toJava.toPath, correlationFile = correlationFile.toJava.toPath, unexpectedSequencesFile = unexpectedSequencesFile.toJava.toPath, runInfoFile = runInfoFile.toJava.toPath diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/reports/ConditionBarcodeCountsSummaryTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/ConditionBarcodeCountsSummaryTest.scala new file mode 100644 index 0000000..b0e4340 --- /dev/null +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/ConditionBarcodeCountsSummaryTest.scala @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2022 The Broad Institute, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.broadinstitute.gpp.poolq3.reports + +import cats.effect.IO +import fs2.io.file.Files +import munit.CatsEffectSuite +import org.broadinstitute.gpp.poolq3.hist.{BasicShardedHistogram, OpenHashMapHistogram, TupleHistogram} +import org.broadinstitute.gpp.poolq3.parser.ReferenceEntry +import org.broadinstitute.gpp.poolq3.process.State +import org.broadinstitute.gpp.poolq3.reference.ExactReference + +class ConditionBarcodeCountsSummaryTest extends CatsEffectSuite { + + private val Condition1 = "DMSO" + private val Condition2 = "ITMFA" + private val Condition3 = "No Drug" + private val SampleBarcode1 = "GTAT" + private val SampleBarcode2 = "ACAT" + private val SampleBarcode3 = "TCAG" + private val SampleBarcode4 = "TCCG" + + // we only need 1 construct to populate the report + private val Construct1 = "AACCGGTTAACCGGTTTTAAG" + private val ConstructId1 = "BRDN01" + + private val Constructs = List(ReferenceEntry(Construct1, ConstructId1)) + + private val rowReference = ExactReference(Constructs, identity, includeAmbiguous = false) + + private val colReference = + ExactReference( + List( + ReferenceEntry(SampleBarcode1, Condition1), + ReferenceEntry(SampleBarcode2, Condition2), + ReferenceEntry(SampleBarcode3, Condition3), + ReferenceEntry(SampleBarcode4, Condition3) + ), + identity, + includeAmbiguous = false + ) + + def emptyState(): State = + new State( + new BasicShardedHistogram[String, (String, String)](new TupleHistogram()), + new OpenHashMapHistogram(), + new OpenHashMapHistogram(), + new OpenHashMapHistogram() + ) + + test("condition barcode counts summary") { + val sample1MatchesBoth = 10 + val sample2MatchesBoth = 7 + val sample3MatchesBoth = 28 + val sample4MatchesBoth = 3 + + val sample1MatchesCol = sample1MatchesBoth + 8 + val sample2MatchesCol = sample2MatchesBoth + 3 + val sample3MatchesCol = sample3MatchesBoth + 17 + val sample4MatchesCol = sample4MatchesBoth + 11 + + Files[IO].tempDirectory.use { tmpDir => + val cbcs = tmpDir / "cbcs.txt" + + // fill out state + val state = emptyState() + 0.until(sample1MatchesBoth).foreach(_ => state.known.increment(None, (Construct1, SampleBarcode1))) + 0.until(sample2MatchesBoth).foreach(_ => state.known.increment(None, (Construct1, SampleBarcode2))) + 0.until(sample3MatchesBoth).foreach(_ => state.known.increment(None, (Construct1, SampleBarcode3))) + 0.until(sample4MatchesBoth).foreach(_ => state.known.increment(None, (Construct1, SampleBarcode4))) + + 0.until(sample1MatchesCol).foreach(_ => state.knownCol.increment(SampleBarcode1)) + 0.until(sample2MatchesCol).foreach(_ => state.knownCol.increment(SampleBarcode2)) + 0.until(sample3MatchesCol).foreach(_ => state.knownCol.increment(SampleBarcode3)) + 0.until(sample4MatchesCol).foreach(_ => state.knownCol.increment(SampleBarcode4)) + + state.reads = sample1MatchesCol + sample2MatchesCol + sample3MatchesCol + sample4MatchesCol + 5 + + IO.blocking { + QualityWriter + .write((tmpDir / "quality.txt").toNioPath, cbcs.toNioPath, state, rowReference, colReference, false) + .get + } >> + Files[IO] + .readUtf8(cbcs) + .compile + .lastOrError + .assertEquals( + """Barcode Condition Matched (Construct+Sample Barcode) Matched Sample Barcode % Match Normalized Match + |GTAT DMSO 10 18 55.56 16.730 + |ACAT ITMFA 7 10 70.00 16.215 + |TCAG No Drug 28 45 62.22 18.215 + |TCCG No Drug 3 14 21.43 14.993 + |""".stripMargin + ) + + } + + } + +} diff --git a/version.sbt b/version.sbt index 6cf7bb9..03e4cb1 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -ThisBuild / version := "3.10.1-SNAPSHOT" +ThisBuild / version := "3.11.0-SNAPSHOT"