Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: raise exception if CollectDuplexSeqMetrics run on consensus BAM #1003

Merged
merged 13 commits into from
Jul 31, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,18 @@ class CollectDuplexSeqMetrics
// Build the iterator we'll use based on whether or not we're restricting to a set of intervals
val in = SamSource(input)
val _filteredIterator = in.iterator.filter(r => r.paired && r.mapped && r.mateMapped && r.firstOfPair && !r.secondary && !r.supplementary)
.bufferBetter
znorgaard marked this conversation as resolved.
Show resolved Hide resolved
// Ensure the records are not consensus records
_filteredIterator.headOption.foreach {
r =>
def exceptionString = s"Input BAM file to CollectDuplexSeqMetrics ($input) appears to contain consensus reads." +
znorgaard marked this conversation as resolved.
Show resolved Hide resolved
znorgaard marked this conversation as resolved.
Show resolved Hide resolved
"CollectDuplexSeqMetrics cannot run on consensus BAMs, and instead requires the UMI-grouped BAM generated " +
"prior to consensus calling. The UMI-grouped BAM is the output of running GroupReadsByUmi." +
znorgaard marked this conversation as resolved.
Show resolved Hide resolved
s"\nFirst record in $input has consensus tags present:\n${_filteredIterator.head}"
clintval marked this conversation as resolved.
Show resolved Hide resolved

if (Umis.isConsensusRead(r)) throw new IllegalArgumentException(exceptionString)
else r
znorgaard marked this conversation as resolved.
Show resolved Hide resolved
}
val iterator = intervals match {
case None => _filteredIterator
case Some(path) =>
Expand Down
11 changes: 11 additions & 0 deletions src/main/scala/com/fulcrumgenomics/umi/Umis.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
package com.fulcrumgenomics.umi

import com.fulcrumgenomics.bam.api.SamRecord
import com.fulcrumgenomics.umi.ConsensusTags
import com.fulcrumgenomics.util.Sequences

object Umis {
Expand Down Expand Up @@ -127,4 +128,14 @@ object Umis {
@inline private def isValidUmiCharacter(ch: Char): Boolean = {
ch == 'A' || ch == 'C' || ch == 'G' || ch == 'T' || ch == 'N' || ch == '-'
}

/** Tests if a record is a consensus or not
znorgaard marked this conversation as resolved.
Show resolved Hide resolved
*
* @param rec the record to test
znorgaard marked this conversation as resolved.
Show resolved Hide resolved
* @return boolean indicating if the record is a consensus or not
*/
def isConsensusRead(rec: SamRecord): Boolean = {
znorgaard marked this conversation as resolved.
Show resolved Hide resolved
rec.contains(ConsensusTags.PerRead.RawReadCount) ||
(rec.contains(ConsensusTags.PerRead.AbRawReadCount) && rec.contains(ConsensusTags.PerRead.BaRawReadCount))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ package com.fulcrumgenomics.umi

import java.nio.file.{Path, Paths}
import java.util.Random

import com.fulcrumgenomics.FgBioDef._
import com.fulcrumgenomics.bam.api.SamOrder
import com.fulcrumgenomics.commons.util.SimpleCounter
import com.fulcrumgenomics.testing.SamBuilder.{Minus, Plus}
import com.fulcrumgenomics.testing.{SamBuilder, UnitSpec}
import com.fulcrumgenomics.umi.ConsensusTags
import com.fulcrumgenomics.util.{Io, Metric, Rscript}
import htsjdk.samtools.util.{Interval, IntervalList}
import org.apache.commons.math3.distribution.NormalDistribution
Expand All @@ -41,6 +41,7 @@ import scala.math.{max, min}
class CollectDuplexSeqMetricsTest extends UnitSpec {
private val MI = "MI"
private val RX = "RX"
private val aD = "aD"

// Case class to hold pointers to all the outputs of CollectDuplexSeqMetrics
private case class Outputs(families: Path, duplexFamilies: Path, umis: Path, duplexUmis: Path, yields: Path, plots: Path) {
Expand Down Expand Up @@ -312,6 +313,22 @@ class CollectDuplexSeqMetricsTest extends UnitSpec {
duplexFamilies.find(f => f.ab_size == 6 && f.ba_size == 0).get.count shouldBe 1
}

it should "raise an exception if duplex consensus records are provided" in {
val builder = new SamBuilder(readLength=100, sort=Some(SamOrder.TemplateCoordinate))
builder.addPair(
contig=1,
start1=1000,
start2=1100,
attrs=Map(
RX -> "AAA-GGG",
MI -> "1/A",
ConsensusTags.PerRead.AbRawReadCount -> 10,
ConsensusTags.PerRead.BaRawReadCount -> 10
)
)
an[IllegalArgumentException] shouldBe thrownBy { exec(builder) }
}

"CollectDuplexSeqMetrics.updateUmiMetrics" should "not count duplex umis" in collector(duplex=false).foreach { c =>
val builder = new SamBuilder(readLength=10)
builder.addPair(start1=100, start2=200, attrs=Map(RX -> "AAA-CCC", MI -> "1/A"))
Expand Down
19 changes: 18 additions & 1 deletion src/test/scala/com/fulcrumgenomics/umi/UmisTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@

package com.fulcrumgenomics.umi

import com.fulcrumgenomics.bam.api.SamRecord
import com.fulcrumgenomics.bam.api.{SamOrder, SamRecord}
import com.fulcrumgenomics.testing.{SamBuilder, UnitSpec}
import com.fulcrumgenomics.umi.ConsensusTags
import org.scalatest.OptionValues

class UmisTest extends UnitSpec with OptionValues {
Expand Down Expand Up @@ -111,4 +112,20 @@ class UmisTest extends UnitSpec with OptionValues {
an[Exception] should be thrownBy copyUmiFromReadName(rec=rec("NAME:ACGT-CCKC"))
an[Exception] should be thrownBy copyUmiFromReadName(rec=rec("NAME:CCKC-ACGT"))
}

"Umis.isConsensusRead" should "return false for reads without consensus tags" in {
val builder = new SamBuilder(sort=Some(SamOrder.Coordinate), readLength=10, baseQuality=20)
builder.addFrag(start=100).exists(Umis.isConsensusRead) shouldBe false
builder.addPair(start1=100, start2=100, unmapped2=true).exists(Umis.isConsensusRead) shouldBe false
}

it should "return true for reads with consensus tags" in {
val builder = new SamBuilder(sort=Some(SamOrder.Coordinate), readLength=10, baseQuality=20)
builder.addFrag(start=10, attrs=Map(ConsensusTags.PerRead.RawReadCount -> 10))
.exists(Umis.isConsensusRead) shouldBe true
builder.addFrag(
start=10,
attrs=Map(ConsensusTags.PerRead.AbRawReadCount -> 10, ConsensusTags.PerRead.BaRawReadCount -> 10)
).exists(Umis.isConsensusRead) shouldBe true
}
}
Loading