Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update ADAM dependency version to 0.27.0 #33

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion deca-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<dependencies>
<dependency>
<groupId>org.bdgenomics.adam</groupId>
<artifactId>adam-shade-spark2_${scala.version.prefix}</artifactId>
<version>${adam.version}</version>
</dependency>
</dependencies>
<configuration>
<createDependencyReducedPom>false</createDependencyReducedPom>
<filters>
Expand All @@ -39,8 +46,9 @@
<goal>shade</goal>
</goals>
<configuration>
<shaderHint>workaround</shaderHint>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
</transformers>
</configuration>
</execution>
Expand Down
12 changes: 6 additions & 6 deletions deca-cli/src/main/scala/org/bdgenomics/deca/cli/CNVer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -103,26 +103,26 @@ class CNVer(protected val args: CNVerArgs) extends BDGSparkCommand[CNVerArgs] {
ARF.duplicateRead,
ARF.failedVendorQualityChecks,
ARF.primaryAlignment,
ARF.mapq,
ARF.contigName,
ARF.mappingQuality,
ARF.referenceName,
ARF.start,
ARF.end,
ARF.cigar,
ARF.mateMapped,
ARF.mateContigName,
ARF.mateReferenceName,
ARF.mateAlignmentStart,
ARF.inferredInsertSize)
ARF.insertSize)
Projection(readFields: _*)
}

val readsRdds = args.readsPaths.map(path => {
// TODO: Add push down filters
log.info("Loading {} alignment file", path)
info("Loading %s alignment file".format(path))
sc.loadAlignments(path, optProjection = Some(readProj), stringency = ValidationStringency.SILENT)
})

val targetsAsFeatures = {
val targetProj = Projection(FF.contigName, FF.start, FF.end)
val targetProj = Projection(FF.referenceName, FF.start, FF.end)
sc.loadFeatures(args.targetsPath, optProjection = Some(targetProj))
}

Expand Down
12 changes: 6 additions & 6 deletions deca-cli/src/main/scala/org/bdgenomics/deca/cli/Coverager.scala
Original file line number Diff line number Diff line change
Expand Up @@ -88,25 +88,25 @@ class Coverager(protected val args: CoveragerArgs) extends BDGSparkCommand[Cover
ARF.duplicateRead,
ARF.failedVendorQualityChecks,
ARF.primaryAlignment,
ARF.mapq,
ARF.contigName,
ARF.mappingQuality,
ARF.referenceName,
ARF.start,
ARF.end,
ARF.cigar,
ARF.mateMapped,
ARF.mateContigName,
ARF.mateReferenceName,
ARF.mateAlignmentStart,
ARF.inferredInsertSize)
ARF.insertSize)
Projection(readFields: _*)
}

val readsRdds = args.readsPaths.map(path => {
// TODO: Add push down filters
log.info("Loading {} alignment file", path)
info("Loading %s alignment file".format(path))
sc.loadAlignments(path, optProjection = Some(readProj), stringency = ValidationStringency.SILENT)
})

val targetProj = Projection(FF.contigName, FF.start, FF.end)
val targetProj = Projection(FF.referenceName, FF.start, FF.end)
val targetsAsFeatures = sc.loadFeatures(args.targetsPath, optProjection = Some(targetProj))

var matrix = Coverage.coverageMatrix(readsRdds, targetsAsFeatures, minMapQ = args.minMappingQuality)
Expand Down
26 changes: 13 additions & 13 deletions deca-core/src/main/scala/org/bdgenomics/deca/Coverage.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ import org.apache.spark.broadcast.Broadcast
import org.apache.spark.mllib.linalg.distributed.{ IndexedRow, IndexedRowMatrix }
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.models.ReferenceRegion
import org.bdgenomics.adam.rdd.feature.FeatureRDD
import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD
import org.bdgenomics.adam.rdd.feature.FeatureDataset
import org.bdgenomics.adam.rdd.read.AlignmentRecordDataset
import org.bdgenomics.adam.rich.RichAlignmentRecord
import org.bdgenomics.deca.coverage.ReadDepthMatrix
import org.bdgenomics.deca.util.MLibUtils
Expand Down Expand Up @@ -69,7 +69,7 @@ object Coverage extends Serializable with Logging {

def totalCoverageOfRegion(region: ReferenceRegion, read: RichAlignmentRecord): Long = {
// "Clip" bases when the insert size is shorter than the read length
val insert = read.getInferredInsertSize
val insert = read.getInsertSize
val regionStart = if (insert != null && insert < 0) max(region.start, read.getEnd + insert - 1) else region.start
val regionEnd = if (insert != null && insert > 0) min(region.end, read.getStart + insert + 1) else region.end

Expand All @@ -93,7 +93,7 @@ object Coverage extends Serializable with Logging {

private def perBaseCoverageOfRegion(region: ReferenceRegion, pileup: DenseVector[Int], read: RichAlignmentRecord): DenseVector[Int] = {
// "Clip" bases when the insert size is shorter than the read length
val insert = read.getInferredInsertSize
val insert = read.getInsertSize
val regionStart = if (insert != null && insert < 0) max(region.start, read.getEnd + insert - 1) else region.start
val regionEnd = if (insert != null && insert > 0) min(region.end, read.getStart + insert + 1) else region.end

Expand Down Expand Up @@ -138,12 +138,12 @@ object Coverage extends Serializable with Logging {
}

private def readOverlapsTarget(read: AlignmentRecord, target: ReferenceRegion): Boolean = {
read.getContigName == target.referenceName && read.getEnd() > target.start && read.getStart < target.end
read.getReferenceName == target.referenceName && read.getEnd() > target.start && read.getStart < target.end
}

private def couldOverlapMate(read: AlignmentRecord): Boolean = {
read.getMateMapped &&
read.getMateContigName == read.getContigName &&
read.getMateReferenceName == read.getReferenceName &&
read.getMateAlignmentStart >= read.getStart && read.getMateAlignmentStart < read.getEnd
}

Expand All @@ -168,7 +168,7 @@ object Coverage extends Serializable with Logging {
if (first < targets.length) Some(first) else None
}

def sortedCoverageCalculation(targets: Broadcast[Array[(ReferenceRegion, Int)]], reads: AlignmentRecordRDD): RDD[(Int, Double)] = {
def sortedCoverageCalculation(targets: Broadcast[Array[(ReferenceRegion, Int)]], reads: AlignmentRecordDataset): RDD[(Int, Double)] = {
val targetsArray = targets.value
reads.rdd.mapPartitions(readIter => {
if (!readIter.hasNext) {
Expand All @@ -193,7 +193,7 @@ object Coverage extends Serializable with Logging {
}

return scanForward(read, firstIndex, targetIndex + 1, mateOverlap)
} else if (read.getContigName == targetRegion.referenceName && read.getStart >= targetRegion.end)
} else if (read.getReferenceName == targetRegion.referenceName && read.getStart >= targetRegion.end)
return scanForward(read, targetIndex + 1, targetIndex + 1, mateOverlap)
else
return firstIndex
Expand All @@ -210,7 +210,7 @@ object Coverage extends Serializable with Logging {
breakable {
readIter.foreach(read => {
// Crossed a contig boundary? Reset firstRead and firstTarget for coverage analysis of next contig
if (read.getContigName != firstRead.getContigName) {
if (read.getReferenceName != firstRead.getReferenceName) {
firstRead = read
firstTarget = findFirstTarget(targetsArray, ReferenceRegion.unstranded(read))
break // Should happen infrequently
Expand Down Expand Up @@ -252,7 +252,7 @@ object Coverage extends Serializable with Logging {
new IndexedRowMatrix(indexedRows, numSamples, numTargets.toInt)
}

def coverageMatrix(readRdds: Seq[AlignmentRecordRDD], targets: FeatureRDD, minMapQ: Int = 0, numPartitions: Option[Int] = None): ReadDepthMatrix = ComputeReadDepths.time {
def coverageMatrix(readRdds: Seq[AlignmentRecordDataset], targets: FeatureDataset, minMapQ: Int = 0, numPartitions: Option[Int] = None): ReadDepthMatrix = ComputeReadDepths.time {
// Sequence dictionary parsing is broken in current ADAM release:
// https://github.com/bigdatagenomics/adam/issues/1409
// which breaks the desired sorting of the targets. Upgrading to a newer version of ADAM did not fix the issues as
Expand All @@ -273,10 +273,10 @@ object Coverage extends Serializable with Logging {
// TODO: Verify inputs, e.g. BAM files, are in sorted order

val samplesDriver = readRdds.map(readsRdd => {
val samples = readsRdd.recordGroups.toSamples
val samples = readsRdd.readGroups.toSamples
if (samples.length > 1)
throw new IllegalArgumentException("reads RDD must be a single sample")
samples.head.getSampleId
samples.head.getId
}).toArray

val numSamples = readRdds.length
Expand All @@ -291,7 +291,7 @@ object Coverage extends Serializable with Logging {
!read.getFailedVendorQualityChecks &&
read.getPrimaryAlignment &&
read.getReadMapped &&
(minMapQ == 0 || read.getMapq >= minMapQ)
(minMapQ == 0 || read.getMappingQuality >= minMapQ)
}))

sortedCoverageCalculation(broadcastTargetArray, filteredReadsRdd)
Expand Down
14 changes: 7 additions & 7 deletions deca-core/src/main/scala/org/bdgenomics/deca/HMM.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ package org.bdgenomics.deca
import org.apache.spark.SparkContext
import org.apache.spark.storage.StorageLevel
import org.bdgenomics.adam.models.SequenceDictionary
import org.bdgenomics.adam.rdd.feature.FeatureRDD
import org.bdgenomics.adam.rdd.feature.FeatureDataset
import org.bdgenomics.deca.Timers._
import org.bdgenomics.deca.coverage.ReadDepthMatrix
import org.bdgenomics.deca.hmm.{ SampleModel, TransitionProbabilities }
import org.bdgenomics.deca.util.MLibUtils
import org.bdgenomics.formats.avro.{ Feature, Strand }
import org.bdgenomics.formats.avro.{ Feature, Sample, Strand }
import org.bdgenomics.utils.misc.Logging

/**
Expand All @@ -36,7 +36,7 @@ object HMM extends Serializable with Logging {
def discoverCNVs(readMatrix: ReadDepthMatrix,
sequences: SequenceDictionary = SequenceDictionary.empty,
M: Double = 3, T: Double = 6, p: Double = 1e-8, D: Double = 70000,
minSomeQuality: Double = 30.0): FeatureRDD = DiscoverCNVs.time {
minSomeQuality: Double = 30.0): FeatureDataset = DiscoverCNVs.time {

val sc = SparkContext.getOrCreate()

Expand Down Expand Up @@ -81,14 +81,14 @@ object HMM extends Serializable with Logging {

if (start_target.referenceName == end_target.referenceName) {
val builder = Feature.newBuilder(raw_feature)
builder.setContigName(start_target.referenceName)
builder.setReferenceName(start_target.referenceName)
builder.setStart(start_target.start)
builder.setEnd(end_target.end)

Iterable(process(builder))
} else {
val builder1 = Feature.newBuilder(raw_feature)
builder1.setContigName(start_target.referenceName)
builder1.setReferenceName(start_target.referenceName)
builder1.setStart(start_target.start)
val optCl: Option[Long] = sequences(start_target.referenceName).map(_.length)
val cl: Long = optCl.getOrElse(Long.MaxValue)
Expand All @@ -97,7 +97,7 @@ object HMM extends Serializable with Logging {
builder1.setEnd(jcl)

val builder2 = Feature.newBuilder(raw_feature)
builder2.setContigName(end_target.referenceName)
builder2.setReferenceName(end_target.referenceName)
builder2.setStart(0L)
builder2.setEnd(end_target.end)

Expand All @@ -106,6 +106,6 @@ object HMM extends Serializable with Logging {
})
})

FeatureRDD(cnvs, sequences)
FeatureDataset(cnvs, sd = sequences, samples = Seq.empty[Sample])
}
}
4 changes: 2 additions & 2 deletions deca-core/src/test/scala/org/bdgenomics/deca/HMMSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ class HMMSuite extends DecaFunSuite {

val del = cnvs.rdd.filter(f => Option(f.getSource).exists(_.equals("HG00121"))).first()
assert(del.getFeatureType() == "DEL")
assert(del.getContigName() == "22")
assert(del.getReferenceName() == "22")
assert(del.getStart() == 18898401L)
assert(del.getEnd() == 18913235L)
assert(aboutEq(del.getScore(), 9.167934190998345))
Expand All @@ -95,7 +95,7 @@ class HMMSuite extends DecaFunSuite {

val dup = cnvs.rdd.filter(f => Option(f.getSource).exists(_.equals("HG00113"))).first()
assert(dup.getFeatureType() == "DUP")
assert(dup.getContigName() == "22")
assert(dup.getReferenceName() == "22")
assert(dup.getStart() == 17071767L)
assert(dup.getEnd() == 17073440L)
assert(aboutEq(dup.getScore(), 25.321428730083596))
Expand Down
Loading