From 158994368e5aaace858b49d8b8b9b6b2a01a7640 Mon Sep 17 00:00:00 2001 From: Pierre Lindenbaum Date: Tue, 29 Dec 2015 07:09:19 +0100 Subject: [PATCH] improve cigar --- src/java/htsjdk/samtools/Cigar.java | 85 +++++++++++++++++-- src/java/htsjdk/samtools/CigarElement.java | 5 ++ src/java/htsjdk/samtools/CigarOperator.java | 25 ++++++ src/tests/java/htsjdk/samtools/CigarTest.java | 24 ++++++ 4 files changed, 131 insertions(+), 8 deletions(-) diff --git a/src/java/htsjdk/samtools/Cigar.java b/src/java/htsjdk/samtools/Cigar.java index eb747ac0d6..12ffd0c57a 100644 --- a/src/java/htsjdk/samtools/Cigar.java +++ b/src/java/htsjdk/samtools/Cigar.java @@ -26,6 +26,7 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Collections; +import java.util.Iterator; import java.util.List; /** @@ -37,7 +38,7 @@ * * c.f. http://samtools.sourceforge.net/SAM1.pdf for complete CIGAR specification. */ -public class Cigar implements Serializable { +public class Cigar implements Serializable, Iterable { public static final long serialVersionUID = 1L; private final List cigarElements = new ArrayList(); @@ -82,6 +83,8 @@ public int getReferenceLength() { case EQ: case X: length += element.getLength(); + break; + default: break; } } return length; @@ -101,6 +104,8 @@ public int getPaddedReferenceLength() { case X: case P: length += element.getLength(); + break; + default: break; } } return length; @@ -236,15 +241,15 @@ private static boolean isRealOperator(final CigarOperator op) { } private static boolean isInDelOperator(final CigarOperator op) { - return op == CigarOperator.I || op == CigarOperator.D; + return op !=null && op.isIndel(); } private static boolean isClippingOperator(final CigarOperator op) { - return op == CigarOperator.S || op == CigarOperator.H; + return op !=null && op.isClipping(); } private static boolean isPaddingOperator(final CigarOperator op) { - return op == CigarOperator.P; + return op !=null && op.isPadding(); } @Override @@ -254,15 +259,79 @@ public boolean equals(final Object o) { final Cigar cigar = (Cigar) o; - if (cigarElements != null ? !cigarElements.equals(cigar.cigarElements) : cigar.cigarElements != null) - return false; + return cigarElements.equals(cigar.cigarElements); + } + + /** build a new Cigar object from a list of cigar operators. + * This can be used if you have the operators associated to + * each base in the read. + * + * e.g: read length =10 with cigar= [M,M,M,M,M,M,M,M,M,M], here + * fromCigarOperators would generate the cigar '10M' + * + * later the user resolved the 'M' to '=' or 'X', the array is now + * + * [=,=,=,=,=,X,X,=,=,=] + * + * fromCigarOperators would generate the cigar '5M2X3M' + * + * */ + public static Cigar fromCigarOperators(final List cigarOperators) { + if (cigarOperators == null) throw new IllegalArgumentException("cigarOperators is null"); + final List cigarElementList = new ArrayList<>(); + int i = 0; + // find adjacent operators and build list of cigar elements + while (i < cigarOperators.size() ) { + final CigarOperator currentOp = cigarOperators.get(i); + int j = i + 1; + while (j < cigarOperators.size() && cigarOperators.get(j).equals(currentOp)) { + j++; + } + cigarElementList.add(new CigarElement(j - i, currentOp)); + i = j; + } + return new Cigar(cigarElementList); + } + + /** shortcut to getCigarElements().iterator() */ + @Override + public Iterator iterator() { + return this.getCigarElements().iterator(); + } + + /** returns true if the cigar string contains the given operator */ + public boolean containsOperator(final CigarOperator operator) { + return this.cigarElements.stream().anyMatch( element -> element.getOperator() == operator); + } + + /** returns the first cigar element */ + public CigarElement getFirstCigarElement() { + return isEmpty() ? null : this.cigarElements.get(0); + } + + /** returns the last cigar element */ + public CigarElement getLastCigarElement() { + return isEmpty() ? null : this.cigarElements.get(this.numCigarElements() - 1 ); + } + + /** returns true if the cigar string starts With a clipping operator */ + public boolean isLeftClipped() { + return !isEmpty() && isClippingOperator(getFirstCigarElement().getOperator()); + } - return true; + /** returns true if the cigar string ends With a clipping operator */ + public boolean isRightClipped() { + return !isEmpty() && isClippingOperator(getLastCigarElement().getOperator()); } + /** returns true if the cigar is clipped */ + public boolean isClipped() { + return isLeftClipped() || isRightClipped(); + } + @Override public int hashCode() { - return cigarElements != null ? cigarElements.hashCode() : 0; + return cigarElements.hashCode(); } public String toString() { diff --git a/src/java/htsjdk/samtools/CigarElement.java b/src/java/htsjdk/samtools/CigarElement.java index bd8226b956..c645e6cc2f 100644 --- a/src/java/htsjdk/samtools/CigarElement.java +++ b/src/java/htsjdk/samtools/CigarElement.java @@ -67,4 +67,9 @@ public int hashCode() { result = 31 * result + (operator != null ? operator.hashCode() : 0); return result; } + + @Override + public String toString() { + return String.valueOf(this.length)+this.operator; + } } diff --git a/src/java/htsjdk/samtools/CigarOperator.java b/src/java/htsjdk/samtools/CigarOperator.java index 0eab7a001c..46ea539d4a 100644 --- a/src/java/htsjdk/samtools/CigarOperator.java +++ b/src/java/htsjdk/samtools/CigarOperator.java @@ -179,6 +179,31 @@ public static byte enumToCharacter(final CigarOperator e) { return e.character; } + /** Returns true if the operator is a clipped (hard or soft) operator */ + public boolean isClipping() { + return this == S || this == H; + } + + /** Returns true if the operator is a Insertion or Deletion operator */ + public boolean isIndel() { + return this == I || this == D; + } + + /** Returns true if the operator is a Skipped Region Insertion or Deletion operator */ + public boolean isIndelOrSkippedRegion() { + return this == N || isIndel(); + } + + /** Returns true if the operator is a M, a X or a EQ */ + public boolean isAlignment() { + return this == M || this == X || this == EQ; + } + + /** Returns true if the operator is a Padding operator */ + public boolean isPadding() { + return this == P; + } + /** Returns the cigar operator as it would be seen in a SAM file. */ @Override public String toString() { return this.string; diff --git a/src/tests/java/htsjdk/samtools/CigarTest.java b/src/tests/java/htsjdk/samtools/CigarTest.java index 1d7d4c60c5..acdc22407b 100644 --- a/src/tests/java/htsjdk/samtools/CigarTest.java +++ b/src/tests/java/htsjdk/samtools/CigarTest.java @@ -27,6 +27,7 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.util.Arrays; import java.util.List; /** @@ -91,4 +92,27 @@ public void testNegative(final String cigar, final SAMValidationError.Type type) Assert.assertEquals(errors.size(), 1, String.format("Got %d error, expected exactly one error.", errors.size())); Assert.assertEquals(errors.get(0).getType(), type); } + + @Test + public void testMakeCigarFromOperators() { + final List cigarOperators = Arrays.asList( + CigarOperator.S, + CigarOperator.M, + CigarOperator.M, + CigarOperator.M, + CigarOperator.I, + CigarOperator.M, + CigarOperator.D, + CigarOperator.M + ); + final Cigar cigar = Cigar.fromCigarOperators(cigarOperators); + Assert.assertFalse(cigar.isEmpty()); + Assert.assertEquals(cigar.numCigarElements(), 6); + Assert.assertEquals(cigar.toString(),"1S3M1I1M1D1M"); + Assert.assertFalse(cigar.containsOperator(CigarOperator.N)); + Assert.assertTrue(cigar.containsOperator(CigarOperator.D)); + Assert.assertTrue(cigar.isLeftClipped()); + Assert.assertFalse(cigar.isRightClipped()); + Assert.assertTrue(cigar.isClipped()); + } }