Skip to content

Commit

Permalink
Make the regex negation work correctly with missing values
Browse files Browse the repository at this point in the history
for example,
    -i 'TAG[*]~"\."'
    -i 'TAG[*]!~"\."'

Resolves #2355
  • Loading branch information
pd3 committed Jan 29, 2025
1 parent d61bb5b commit 827b2cd
Show file tree
Hide file tree
Showing 8 changed files with 113 additions and 73 deletions.
5 changes: 4 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

Changes affecting the whole of bcftools, or multiple commands:

* Add support for matching lines by ID (#1739)
* Add support for matching lines by ID via the --pair-logic and --collapse options (#1739)

* The -i/-e filtering expressions now properly match the regex negation of missing
values, e.g. -i 'TAG!~"\."' (#2355)


Changes affecting specific commands:
Expand Down
132 changes: 70 additions & 62 deletions doc/bcftools.1

Large diffs are not rendered by default.

20 changes: 13 additions & 7 deletions doc/bcftools.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="generator" content="Asciidoctor 2.0.15.dev">
<meta name="generator" content="Asciidoctor 2.0.20">
<title>bcftools(1)</title>
<link rel="stylesheet" href="./docbook-xsl.css">
</head>
Expand Down Expand Up @@ -50,7 +50,7 @@ <h2 id="_description">DESCRIPTION</h2>
<div class="sect2">
<h3 id="_version">VERSION</h3>
<div class="paragraph">
<p>This manual page was last updated <strong>2024-12-28 22:19 GMT</strong> and refers to bcftools git version <strong>1.21-61-g68f13f2e+</strong>.</p>
<p>This manual page was last updated <strong>2025-01-29 08:37 CET</strong> and refers to bcftools git version <strong>1.21-72-g724713f+</strong>.</p>
</div>
</div>
<div class="sect2">
Expand Down Expand Up @@ -3481,7 +3481,7 @@ <h3 id="norm">bcftools norm [<em>OPTIONS</em>] <em>file.vcf.gz</em></h3>
cannot be stressed enough, that <em>s</em> will NOT fix strand issues in
your VCF, do NOT use it for that purpose!!! (Instead see
<a href="http://samtools.github.io/bcftools/howtos/plugin.af-dist.html" class="bare">http://samtools.github.io/bcftools/howtos/plugin.af-dist.html</a> and
&lt;<a href="http://samtools.github.io/bcftools/howtos/plugin.fixref.html&gt;." class="bare">http://samtools.github.io/bcftools/howtos/plugin.fixref.html&gt;.</a>)</p>
<a href="http://samtools.github.io/bcftools/howtos/plugin.fixref.html" class="bare">http://samtools.github.io/bcftools/howtos/plugin.fixref.html</a>.)</p>
</dd>
<dt class="hdlist1"><strong>-d, --rm-dup</strong> <em>snps</em>|<em>indels</em>|<em>both</em>|<em>all</em>|<em>exact</em></dt>
<dd>
Expand Down Expand Up @@ -4336,6 +4336,12 @@ <h4 id="_examples_4">Examples:</h4>
bcftools query -f '[%SAMPLE %GT %DP\n]' -i 'FMT/DP=1 | FMT/DP=2' file.vcf</pre>
</div>
</div>
<div class="literalblock">
<div class="content">
<pre># Refer to ID column vs INFO/ID tag vs FORMAT/ID tag
bcftools query -f 'columnID=%ID infoID=%INFO/ID [fmtID=%ID ] [columnID=%/ID]'</pre>
</div>
</div>
</div>
</div>
<div class="sect2">
Expand Down Expand Up @@ -5493,13 +5499,13 @@ <h2 id="expressions">FILTERING EXPRESSIONS</h2>
<div class="ulist">
<ul>
<li>
<p>C_i = A_i + B_i when length(A)==B(A) and sets length&#169;=length(A)</p>
<p>C_i = A_i + B_i when length(A)==B(A) and sets length(C)=length(A)</p>
</li>
<li>
<p>C_i = A_i + B_0 when length(B)=1 and sets length&#169;=length(A)</p>
<p>C_i = A_i + B_0 when length(B)=1 and sets length(C)=length(A)</p>
</li>
<li>
<p>C_i = A_0 + B_i when length(A)=1 and sets length&#169;=length(B)</p>
<p>C_i = A_0 + B_i when length(A)=1 and sets length(C)=length(B)</p>
</li>
<li>
<p>throw an error when length(A)!=length(B) AND length(A)!=1 AND length(B)!=1</p>
Expand Down Expand Up @@ -5717,7 +5723,7 @@ <h2 id="_copying">COPYING</h2>
</div>
<div id="footer">
<div id="footer-text">
Last updated 2024-12-28 22:19:33 UTC
Last updated 2025-01-29 08:37:53 +0100
</div>
</div>
</body>
Expand Down
10 changes: 7 additions & 3 deletions filter.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* filter.c -- filter expressions.
Copyright (C) 2013-2024 Genome Research Ltd.
Copyright (C) 2013-2025 Genome Research Ltd.
Author: Petr Danecek <[email protected]>
Expand Down Expand Up @@ -2609,9 +2609,11 @@ static int _regex_vector_strings(regex_t *regex, char *str, size_t len, int logi
char *mid = str;
while ( mid < end && *mid && *mid!=',' ) mid++;
int miss = mid - str == 1 && str[0]=='.' ? 1 : 0;
if ( miss && missing_logic[miss] ) return 1;
int match = ( miss && missing_logic[miss] ) ? 1 : 0;
if ( logic==TOK_NLIKE ) match = match ? 0 : 1;
if ( match ) return 1;
char tmp = *mid; *mid = 0;
int match = regexec(regex, str, 0,NULL,0) ? 0 : 1;
match = regexec(regex, str, 0,NULL,0) ? 0 : 1;
*mid = tmp;
if ( logic==TOK_NLIKE ) match = match ? 0 : 1;
if ( match ) return 1;
Expand Down Expand Up @@ -2698,6 +2700,7 @@ static void cmp_vector_strings(token_t *atok, token_t *btok, token_t *rtok)
{
token_t *tok = atok->regex ? btok : atok;
rtok->pass_site = _regex_vector_strings(regex, tok->str_value.s, tok->str_value.l, logic, missing_logic);
fprintf(stderr,"pass=%d [%s]\n",rtok->pass_site,tok->str_value.s);
}
return;
}
Expand Down Expand Up @@ -3742,6 +3745,7 @@ static filter_t *filter_init_(bcf_hdr_t *hdr, const char *str, int exit_on_error
if ( type==BCF_HT_INT ) set_missing = 1;
else if ( type==BCF_HT_REAL ) set_missing = 1;
}
else if ( !out[k].tag ) error("Error: could not parse the expression\n"); // e.g. =~
else if ( !strcmp("QUAL",out[k].tag) ) set_missing = 1;
if ( set_missing ) { out[j].is_str = 0; out[j].is_missing = 1; bcf_double_set_missing(out[j].values[0]); }
}
Expand Down
3 changes: 3 additions & 0 deletions test/query.filter.15.1.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.
.,.
a,.,c
3 changes: 3 additions & 0 deletions test/query.filter.15.2.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
a,b,c
a
a,.,c
9 changes: 9 additions & 0 deletions test/query.filter.15.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
##fileformat=VCFv4.2
##contig=<ID=chr1,length=5>
##INFO=<ID=TAG,Number=.,Type=String,Description="Some tag">
#CHROM POS ID REF ALT QUAL FILTER INFO
chr1 1 . * * . . TAG=a,b,c
chr1 2 . * * . . TAG=a
chr1 3 . * * . . TAG=.
chr1 4 . * * . . TAG=.,.
chr1 5 . * * . . TAG=a,.,c
4 changes: 4 additions & 0 deletions test/test.pl
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@
run_test(\&test_vcf_merge,$opts,in=>['merge.gvcf.5.a','merge.gvcf.5.b'],out=>'merge.gvcf.5.1.out',args=>'--gvcf - --merge none');
run_test(\&test_vcf_merge,$opts,in=>['merge.gvcf.11.a','merge.gvcf.11.b','merge.gvcf.11.c'],out=>'merge.gvcf.11.1.out',args=>'--gvcf -');
# run_test(\&test_vcf_merge_big,$opts,in=>'merge_big.1',out=>'merge_big.1.1',nsmpl=>79000,nfiles=>79,nalts=>486,args=>''); # commented out for speed
run_test(\&test_vcf_query,$opts,in=>'query.filter.15',out=>'query.filter.15.1.out',args=>q[-f '%TAG' -i 'TAG[*]="."']);
run_test(\&test_vcf_query,$opts,in=>'query.filter.15',out=>'query.filter.15.1.out',args=>q[-f '%TAG' -i 'TAG[*]~"\."']);
run_test(\&test_vcf_query,$opts,in=>'query.filter.15',out=>'query.filter.15.2.out',args=>q[-f '%TAG' -i 'TAG[*]!="."']);
run_test(\&test_vcf_query,$opts,in=>'query.filter.15',out=>'query.filter.15.2.out',args=>q[-f '%TAG' -i 'TAG[*]!~"\."']);
run_test(\&test_vcf_query,$opts,in=>'query.3',out=>'query.3.1.out',args=>q[-f '%CHROM %POS %ID %REF %ALT %QUAL %FILTER \\t %INFO/CHROM %INFO/POS %INFO/ID %INFO/REF %INFO/ALT %INFO/QUAL %INFO/FILTER']);
run_test(\&test_vcf_query,$opts,in=>'query.3',out=>'query.3.2.out',args=>q[-f '[ %CHROM] \\t [ %POS] \\t [ %ID] \\t [ %REF] \\t [ %ALT] \\t [ %QUAL] \\t [ %FILTER]']);
run_test(\&test_vcf_query,$opts,in=>'query.3',out=>'query.3.3.out',args=>q[-f '[ %/CHROM] \\t [ %/POS] \\t [ %/ID] \\t [ %/REF] \\t [ %/ALT] \\t [ %/QUAL] \\t [ %/FILTER]']);
Expand Down

0 comments on commit 827b2cd

Please sign in to comment.