Skip to content

Commit

Permalink
Merge pull request #311 from samtools/yf_add_histogram_comp
Browse files Browse the repository at this point in the history
Added function to compare metric file histograms (added test!)
  • Loading branch information
Yossi Farjoun committed Aug 25, 2015
2 parents 19eb038 + b1e41d7 commit a84b5b7
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 0 deletions.
17 changes: 17 additions & 0 deletions src/java/htsjdk/samtools/metrics/MetricsFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -573,4 +573,21 @@ public static boolean areMetricsEqual(final File file1, final File file2) {
}

}

/**
* Compare the metrics and histograms in two files, ignoring headers.
*/
public static boolean areMetricsAndHistogramsEqual(final File file1, final File file2) {
try {
final MetricsFile<MetricBase, Comparable<?>> mf1 = new MetricsFile<MetricBase, Comparable<?>>();
final MetricsFile<MetricBase, Comparable<?>> mf2 = new MetricsFile<MetricBase, Comparable<?>>();
mf1.read(new FileReader(file1));
mf2.read(new FileReader(file2));

return mf1.areMetricsEqual(mf2) && mf1.areHistogramsEqual(mf2);

} catch (FileNotFoundException e) {
throw new SAMException(e.getMessage(), e);
}
}
}
21 changes: 21 additions & 0 deletions src/tests/java/htsjdk/samtools/metrics/MetricsFileTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ public void testFloatingPointEquality() throws IOException {
MetricsFile<FloatingPointMetric,Integer> file2 = writeThenReadBack(file);
Assert.assertEquals(file, file2);



}

@Test
Expand Down Expand Up @@ -178,6 +180,22 @@ public void testWriteMetricsFile() throws IOException, ClassNotFoundException {
Assert.assertEquals(file, file3);
}

@Test
public void areMetricsFilesEqualTest(){
final File TEST_DIR = new File("testdata/htsjdk/samtools/metrics/");
final File file1 = new File(TEST_DIR,"metricsOne.metrics");
final File file2 = new File(TEST_DIR,"metricsOneCopy.metrics");
final File fileModifiedHist = new File(TEST_DIR,"metricsOneModifiedHistogram.metrics");
final File fileModifiedMet = new File(TEST_DIR,"metricsOneModifiedMetrics.metrics");

Assert.assertTrue(MetricsFile.areMetricsEqual(file1, file2));
Assert.assertTrue(MetricsFile.areMetricsEqual(file1, fileModifiedHist));

Assert.assertFalse(MetricsFile.areMetricsAndHistogramsEqual(file1, fileModifiedHist));
Assert.assertFalse(MetricsFile.areMetricsEqual(file1, fileModifiedMet));
Assert.assertFalse(MetricsFile.areMetricsAndHistogramsEqual(file1, fileModifiedMet));
}

/** Helper method to persist metrics to file and read them back again. */
private <METRIC extends MetricBase> MetricsFile<METRIC, Integer> writeThenReadBack(MetricsFile<METRIC,Integer> in) throws IOException {
File f = File.createTempFile("test", ".metrics");
Expand All @@ -189,4 +207,7 @@ private <METRIC extends MetricBase> MetricsFile<METRIC, Integer> writeThenReadBa
retval.read(new FileReader(f));
return retval;
}



}
13 changes: 13 additions & 0 deletions testdata/htsjdk/samtools/metrics/metricsOne.metrics
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
## htsjdk.samtools.metrics.StringHeader
# picard.illumina.MarkIlluminaAdapters INPUT=testdata/picard/illumina/MarkIlluminaAdaptersTest/unevenReads.sam OUTPUT=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven5946421709712534555.sam METRICS=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven4591996041776878558.metrics MIN_MATCH_BASES_SE=12 MIN_MATCH_BASES_PE=6 MAX_ERROR_RATE_SE=0.1 MAX_ERROR_RATE_PE=0.1 ADAPTERS=[INDEXED, DUAL_INDEXED, PAIRED_END] ADAPTER_TRUNCATION_LENGTH=30 PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPTERS_SEEN=100 NUM_ADAPTERS_TO_KEEP=1 VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json
## htsjdk.samtools.metrics.StringHeader
# Started on: Mon Aug 24 13:31:51 EDT 2015

## METRICS CLASS htsjdk.samtools.metrics.MetricsFileTest$TestMetric
STRING_PROP DATE_PROP SHORT_PROP INTEGER_PROP LONG_PROP FLOAT_PROP DOUBLE_PROP ENUM_PROP BOOLEAN_PROP CHARACTER_PROP SHORT_PRIMITIVE INT_PRIMITIVE LONG_PRIMITIVE FLOAT_PRIMITIVE DOUBLE_PRIMITIVE BOOLEAN_PRIMITIVE CHAR_PRIMITIVE
Hello World 2008-12-31 123 9223372036854775807 456.789001 0.713487 Two N A 123 919834781 9223372034707292160 0.55694 0.229233 Y B

## HISTOGRAM java.lang.Integer
clipped_bases read_count
6 1
7 1
13 changes: 13 additions & 0 deletions testdata/htsjdk/samtools/metrics/metricsOneCopy.metrics
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
## htsjdk.samtools.metrics.StringHeader
# picard.illumina.MarkIlluminaAdapters INPUT=testdata/picard/illumina/MarkIlluminaAdaptersTest/unevenReads.sam OUTPUT=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven5946421709712534555.sam METRICS=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven4591996041776878558.metrics MIN_MATCH_BASES_SE=12 MIN_MATCH_BASES_PE=6 MAX_ERROR_RATE_SE=0.1 MAX_ERROR_RATE_PE=0.1 ADAPTERS=[INDEXED, DUAL_INDEXED, PAIRED_END] ADAPTER_TRUNCATION_LENGTH=30 PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPTERS_SEEN=100 NUM_ADAPTERS_TO_KEEP=1 VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json
## htsjdk.samtools.metrics.StringHeader
# Started on: Mon Aug 24 13:31:51 EDT 2015

## METRICS CLASS htsjdk.samtools.metrics.MetricsFileTest$TestMetric
STRING_PROP DATE_PROP SHORT_PROP INTEGER_PROP LONG_PROP FLOAT_PROP DOUBLE_PROP ENUM_PROP BOOLEAN_PROP CHARACTER_PROP SHORT_PRIMITIVE INT_PRIMITIVE LONG_PRIMITIVE FLOAT_PRIMITIVE DOUBLE_PRIMITIVE BOOLEAN_PRIMITIVE CHAR_PRIMITIVE
Hello World 2008-12-31 123 9223372036854775807 456.789001 0.713487 Two N A 123 919834781 9223372034707292160 0.55694 0.229233 Y B

## HISTOGRAM java.lang.Integer
clipped_bases read_count
6 1
7 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
## htsjdk.samtools.metrics.StringHeader
# picard.illumina.MarkIlluminaAdapters INPUT=testdata/picard/illumina/MarkIlluminaAdaptersTest/unevenReads.sam OUTPUT=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven5946421709712534555.sam METRICS=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven4591996041776878558.metrics MIN_MATCH_BASES_SE=12 MIN_MATCH_BASES_PE=6 MAX_ERROR_RATE_SE=0.1 MAX_ERROR_RATE_PE=0.1 ADAPTERS=[INDEXED, DUAL_INDEXED, PAIRED_END] ADAPTER_TRUNCATION_LENGTH=30 PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPTERS_SEEN=100 NUM_ADAPTERS_TO_KEEP=1 VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json
## htsjdk.samtools.metrics.StringHeader
# Started on: Mon Aug 24 13:31:51 EDT 2015

## METRICS CLASS htsjdk.samtools.metrics.MetricsFileTest$TestMetric
STRING_PROP DATE_PROP SHORT_PROP INTEGER_PROP LONG_PROP FLOAT_PROP DOUBLE_PROP ENUM_PROP BOOLEAN_PROP CHARACTER_PROP SHORT_PRIMITIVE INT_PRIMITIVE LONG_PRIMITIVE FLOAT_PRIMITIVE DOUBLE_PRIMITIVE BOOLEAN_PRIMITIVE CHAR_PRIMITIVE
Hello World 2008-12-31 123 9223372036854775807 456.789001 0.713487 Two N A 123 919834781 9223372034707292160 0.55694 0.229233 Y B

## HISTOGRAM java.lang.Integer
clipped_bases read_count
6 1
7 1
8 1
13 changes: 13 additions & 0 deletions testdata/htsjdk/samtools/metrics/metricsOneModifiedMetrics.metrics
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
## htsjdk.samtools.metrics.StringHeader
# picard.illumina.MarkIlluminaAdapters INPUT=testdata/picard/illumina/MarkIlluminaAdaptersTest/unevenReads.sam OUTPUT=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven5946421709712534555.sam METRICS=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven4591996041776878558.metrics MIN_MATCH_BASES_SE=12 MIN_MATCH_BASES_PE=6 MAX_ERROR_RATE_SE=0.1 MAX_ERROR_RATE_PE=0.1 ADAPTERS=[INDEXED, DUAL_INDEXED, PAIRED_END] ADAPTER_TRUNCATION_LENGTH=30 PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPTERS_SEEN=100 NUM_ADAPTERS_TO_KEEP=1 VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json
## htsjdk.samtools.metrics.StringHeader
# Started on: Mon Aug 24 13:31:51 EDT 2015

## METRICS CLASS htsjdk.samtools.metrics.MetricsFileTest$TestMetric
STRING_PROP DATE_PROP SHORT_PROP INTEGER_PROP LONG_PROP FLOAT_PROP DOUBLE_PROP ENUM_PROP BOOLEAN_PROP CHARACTER_PROP SHORT_PRIMITIVE INT_PRIMITIVE LONG_PRIMITIVE FLOAT_PRIMITIVE DOUBLE_PRIMITIVE BOOLEAN_PRIMITIVE CHAR_PRIMITIVE
Hello World 2008-12-31 122 9223372036854775807 456.789001 0.713487 Two N A 123 919834781 9223372034707292160 0.55694 0.229233 Y B

## HISTOGRAM java.lang.Integer
clipped_bases read_count
6 1
7 1

0 comments on commit a84b5b7

Please sign in to comment.