Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Report AF independent of base quality filters #126

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions src/lofreq/lofreq_call.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,10 @@ report_var(vcf_file_t *vcf_file, const plp_col_t *p, const char *ref,
&sb_left_pv, &sb_right_pv, &sb_two_pv);
sb_qual = PROB_TO_PHREDQUAL_SAFE(sb_two_pv);
}


vcf_var_sprintf_info(var, is_indel? p->coverage_plp - p->num_tails : p->coverage_plp,
af, sb_qual, dp4, is_indel, p->hrun, is_consvar);
af, sb_qual, dp4, is_indel, p->hrun, is_consvar, p->num_alt_bases);

vcf_write_var(vcf_file, var);
vcf_free_var(&var);
Expand Down Expand Up @@ -730,7 +732,7 @@ call_indels(const plp_col_t *p, varcall_conf_t *conf)
*
*/
void
call_snvs(const plp_col_t *p, varcall_conf_t *conf)
call_snvs(plp_col_t *p, varcall_conf_t *conf)
{
double *bc_err_probs; /* error probs (qualities) passed down to snpcaller */
int bc_num_err_probs; /* #elements in bc_err_probs */
Expand Down Expand Up @@ -854,6 +856,9 @@ call_snvs(const plp_col_t *p, varcall_conf_t *conf)
dp4.alt_fw = p->fw_counts[alt_nt4];
dp4.alt_rv = p->rv_counts[alt_nt4];

// remember the filtered alt count to report it later
p->num_alt_bases = alt_count;

report_var(& conf->vcf_out, p, report_ref, report_alt,
af, PROB_TO_PHREDQUAL(pvalue),
is_indel, is_consvar, &dp4);
Expand Down
1 change: 1 addition & 0 deletions src/lofreq/plp.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ typedef struct {
char cons_base[MAX_INDELSIZE]; /* uppercase consensus base according to base-counts, after read-level filtering. */
int coverage_plp; /* original samtools value. upper count limit for all kept values */
int num_bases; /* number of bases after base filtering */
int num_alt_bases; /* number of alt bases after base filtering (for a snp call) */
/* num_ins and num_dels gives 'num_indels' */
int num_ign_indels; /* a hack: indels often get filtered because of low quality of missing qualities in bam file. we need to know nevertheless they are present. this is the count of all "ignored" indels */

Expand Down
9 changes: 8 additions & 1 deletion src/lofreq/snpcaller.c
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,14 @@ plp_to_errprobs(double **err_probs, int *num_err_probs,
if (p->base_quals[i].n) {
bq = p->base_quals[i].data[j];

/*
* count alt bases before applying bq filter
* AF will be calculated based on raw counts
*/
if (is_alt_base) {
alt_raw_counts[alt_idx] += 1;
}

/* bq filtering for all
* FIXME min_bq was meant to just influence quality calculations, not af etc
* but the filtering leaks out later, because alt_counts is computed and returned after filtering
Expand All @@ -418,7 +426,6 @@ plp_to_errprobs(double **err_probs, int *num_err_probs,

/* alt bq threshold and overwrite if needed */
if (is_alt_base) {
alt_raw_counts[alt_idx] += 1;
/* ignore altogether if below alt bq threshold */
if (bq < conf->min_alt_bq) {
continue;
Expand Down
7 changes: 5 additions & 2 deletions src/lofreq/vcf.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
#include "vcf.h"
#include "defaults.h"

#define LINE_BUF_SIZE 1<<12
#define LINE_BUF_SIZE 1<<13


/* this is the actual header. all the other stuff is actually called meta-info
Expand Down Expand Up @@ -587,7 +587,7 @@ void vcf_var_sprintf_info(var_t *var,
const int dp, const float af, const int sb,
const dp4_counts_t *dp4,
const int indel, const int hrun,
const int consvar)
const int consvar, const int num_alt_bases)
{
char buf[LINE_BUF_SIZE];
snprintf(buf, sizeof(buf),
Expand All @@ -596,6 +596,8 @@ void vcf_var_sprintf_info(var_t *var,
if (indel) {
strcat(buf, ";INDEL");
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ";HRUN=%d", hrun);
} else {
snprintf(buf +strlen(buf), sizeof(buf) - strlen(buf), ";HQA=%d", num_alt_bases);
}
if (consvar) {
strcat(buf, ";CONSVAR");
Expand Down Expand Up @@ -644,6 +646,7 @@ void vcf_write_new_header(vcf_file_t *vcf_file, const char *src, const char *ref
vcf_printf(vcf_file, "##INFO=<ID=AF,Number=1,Type=Float,Description=\"Allele Frequency\">\n");
vcf_printf(vcf_file, "##INFO=<ID=SB,Number=1,Type=Integer,Description=\"Phred-scaled strand bias at this position\">\n");
vcf_printf(vcf_file, "##INFO=<ID=DP4,Number=4,Type=Integer,Description=\"Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases\">\n");
vcf_printf(vcf_file, "##INFO=<ID=HQA,Number=1,Type=Integer,Description=\"Count of high quality alt bases supporting SNP call\">\n");
vcf_printf(vcf_file, "##INFO=<ID=INDEL,Number=0,Type=Flag,Description=\"Indicates that the variant is an INDEL.\">\n");
vcf_printf(vcf_file, "##INFO=<ID=CONSVAR,Number=0,Type=Flag,Description=\"Indicates that the variant is a consensus variant (as opposed to a low frequency variant).\">\n");
vcf_printf(vcf_file, "##INFO=<ID=HRUN,Number=1,Type=Integer,Description=\"Homopolymer length to the right of report indel position\">\n");
Expand Down
2 changes: 1 addition & 1 deletion src/lofreq/vcf.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ char *vcf_var_add_to_info(var_t *var, const char *info_str);
void vcf_var_sprintf_info(var_t *var,
const int dp, const float af, const int sb,
const dp4_counts_t *dp4,
const int is_indel, const int hrun, const int is_consvar);
const int is_indel, const int hrun, const int is_consvar, const int num_alt_bases);
void vcf_write_var(vcf_file_t *vcf_file, const var_t *var);
void vcf_write_header(vcf_file_t *vcf_file, const char *header);
void vcf_write_new_header(vcf_file_t *vcf_file, const char *srcprog, const char *reffa);
Expand Down