Skip to content

Commit

Permalink
hotfix-4.5.x: new CLI parameter enables avoiding decomposition of MNV…
Browse files Browse the repository at this point in the history
…s/block substitutions
  • Loading branch information
javild committed Mar 21, 2018
1 parent ed4db49 commit 2a55a3e
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -383,13 +383,22 @@ public class VariantAnnotationCommandOptions {
required = false, arity = 1)
public String referenceFasta;

@Parameter(names = {"--skip-normalize"}, description = "Skip normalization of input variants. Normalization"
+ " includes splitting multi-allele positions read from a VCF, allele trimming and decomposing MNVs. Has"
@Parameter(names = {"--skip-normalize"}, description = "Skip normalization of input variants. Should not be used"
+ " when the input (-i, --input-file) is a VCF file. Normalization includes splitting multi-allele positions "
+ "read from a VCF, allele trimming and decomposing MNVs. Has"
+ " no effect if reading variants from a CellBase variation collection "
+ "(\"--input-variation-collection\") or running a variant annotation benchmark (\"--benchmark\"): in"
+ " these two cases variant normalization is never carried out.",
required = false, arity = 0)
public boolean skipNormalize;
public boolean skipNormalize = false;

@Parameter(names = {"--skip-decompose"}, description = "Use this flag to avoid decomposition of "
+ "multi-nucleotide-variants (MNVs) / block substitutions as part of the normalization process. If this"
+ " flag is NOT activated, as a step during the normalization process reference and alternate alleles"
+ " from MNVs/Block substitutions will be aligned and decomposed into their forming simple variants. "
+ " This flag has no effect if --skip-normalize is present.",
required = false, arity = 0)
public boolean skipDecompose = false;

@Parameter(names = {"--server-cache"}, description = "Use of this parameter is discouraged unless the "
+ "server administrator advises so. Annotation was already pre-calculated and cached in "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ public enum FileFormat {VCF, JSON, AVRO, VEP};
private boolean benchmark;
private Path referenceFasta;
private boolean normalize;
private boolean decompose;
private List<String> chromosomeList;
private int port;
private String species;
Expand Down Expand Up @@ -379,14 +380,14 @@ private List<ParallelTaskRunner.TaskWithException<String, Variant, Exception>> g
VCFHeader header = (VCFHeader) codec.readActualHeader(lineIterator);
VCFHeaderVersion headerVersion = codec.getVCFHeaderVersion();
variantAnnotatorTaskList.add(new VcfStringAnnotatorTask(header, headerVersion,
variantAnnotatorList, sharedContext, normalize));
variantAnnotatorList, sharedContext, normalize, decompose));
} catch (IOException e) {
throw new IOException("Unable to read VCFHeader");
}
break;
case JSON:
logger.info("Using a JSON parser to read variants...");
variantAnnotatorTaskList.add(new JsonStringAnnotatorTask(variantAnnotatorList, normalize));
variantAnnotatorTaskList.add(new JsonStringAnnotatorTask(variantAnnotatorList, normalize, decompose));
break;
default:
break;
Expand Down Expand Up @@ -601,12 +602,17 @@ private void indexCustomVcfFile(int customFileNumber, RocksDB db) {
Iterator<VariantContext> iterator = vcfFileReader.iterator();
VariantContextToVariantConverter converter = new VariantContextToVariantConverter("", "",
vcfFileReader.getFileHeader().getSampleNamesInOrder());
VariantNormalizer normalizer = new VariantNormalizer(true, false, true);
// Currently, only VCF files are supported for custom-annotation so makes no sense to allow no normalisation
// of variants.
// However, decomposition of MNVs/Block substitutions can still be optional
VariantNormalizer normalizer = new VariantNormalizer(true, false, decompose);
lineCounter = 0;
while (iterator.hasNext()) {
variantContext = iterator.next();
// Reference positions will not be indexed
if (variantContext.getAlternateAlleles().size() > 0) {
// Currently, only VCF files are supported for custom-annotation so makes no sense to allow no normalisation
// of variants.
List<Variant> variantList = normalizer.normalize(converter.apply(Collections.singletonList(variantContext)), true);
for (Variant variant : variantList) {
db.put((variant.getChromosome() + "_" + variant.getStart() + "_" + variant.getReference() + "_"
Expand Down Expand Up @@ -740,6 +746,8 @@ private void checkParameters() throws IOException {
normalize = false;
}

decompose = !variantAnnotationCommandOptions.skipDecompose;

// output file
if (variantAnnotationCommandOptions.output != null) {
output = Paths.get(variantAnnotationCommandOptions.output);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,25 @@ public class JsonStringAnnotatorTask implements ParallelTaskRunner.TaskWithExcep
private List<VariantAnnotator> variantAnnotatorList;
private boolean normalize;
private static ObjectMapper jsonObjectMapper;
private static VariantNormalizer normalizer;
private VariantNormalizer normalizer;

static {
jsonObjectMapper = new ObjectMapper();
jsonObjectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);
jsonObjectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
normalizer = new VariantNormalizer(true, false, true);
}

private static final String VARIANT_STRING_PATTERN = "([ACGTN]*)|(<CNV[0-9]+>)|(<DUP>)|(<DEL>)|(<INS>)|(<INV>)";

public JsonStringAnnotatorTask(List<VariantAnnotator> variantAnnotatorList) {
this(variantAnnotatorList, true);
this(variantAnnotatorList, true, true);
}

public JsonStringAnnotatorTask(List<VariantAnnotator> variantAnnotatorList, boolean normalize) {
public JsonStringAnnotatorTask(List<VariantAnnotator> variantAnnotatorList, boolean normalize,
boolean decompose) {
this.variantAnnotatorList = variantAnnotatorList;
this.normalize = normalize;
normalizer = new VariantNormalizer(true, false, decompose);
}

public void pre() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,23 +49,25 @@ public class VcfStringAnnotatorTask implements ParallelTaskRunner.TaskWithExcept
private List<VariantAnnotator> variantAnnotatorList;
private FullVcfCodec vcfCodec;
private VariantContextToVariantConverter converter;
private static VariantNormalizer normalizer = new VariantNormalizer(true, false, true);
// private static VariantNormalizer normalizer = new VariantNormalizer(true, false, true);
private VariantNormalizer normalizer;
private boolean normalize;

public VcfStringAnnotatorTask(VCFHeader header, VCFHeaderVersion version,
List<VariantAnnotator> variantAnnotatorList, SharedContext sharedContext) {
this(header, version, variantAnnotatorList, sharedContext, true);
this(header, version, variantAnnotatorList, sharedContext, true, true);
}

public VcfStringAnnotatorTask(VCFHeader header, VCFHeaderVersion version,
List<VariantAnnotator> variantAnnotatorList, SharedContext sharedContext,
boolean normalize) {
boolean normalize, boolean decompose) {
this.vcfCodec = new FullVcfCodec();
this.vcfCodec.setVCFHeader(header, version);
this.converter = new VariantContextToVariantConverter("", "", header.getSampleNamesInOrder());
this.variantAnnotatorList = variantAnnotatorList;
this.sharedContext = sharedContext;
this.normalize = normalize;
normalizer = new VariantNormalizer(true, false, decompose);
}

@Override
Expand Down

0 comments on commit 2a55a3e

Please sign in to comment.