Skip to content

Commit

Permalink
feat: add confidence threshold option --conf
Browse files Browse the repository at this point in the history
  • Loading branch information
mbhall88 committed Oct 1, 2024
1 parent 23ef1b6 commit 419ede9
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 3 deletions.
18 changes: 16 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ sequencing technology. Read more about the development of this method [here][pap

[![Conda (channel only)](https://img.shields.io/conda/vn/bioconda/nohuman)](https://anaconda.org/bioconda/nohuman)
[![bioconda version](https://anaconda.org/bioconda/nohuman/badges/platforms.svg)](https://anaconda.org/bioconda/nohuman)
![Conda](https://img.shields.io/conda/dn/bioconda/nohuman)
![Conda Downloads](https://img.shields.io/conda/d/bioconda/nohuman)

```shell
$ conda install -c bioconda nohuman
Expand Down Expand Up @@ -187,6 +187,12 @@ or to specify a different path for the output
$ nohuman -t 4 --out1 clean_1.fq --out2 clean_2.fq in_1.fq in_2.fq
```

Set a [minimum confidence score][conf] for kraken2 classifications

```
$ nohuman --conf 0.5 in.fq
```

> [!TIP]
> Compressed output will be inferred from the specified output path(s). If no output path is provided, the same
> compression as the input will be used. To override the output compression format, use the `--output-type` option.
Expand Down Expand Up @@ -215,6 +221,7 @@ Options:
-F, --output-type <FORMAT> Output compression format. u: uncompressed; b: Bzip2; g: Gzip; x: Xz (Lzma); z: Zstd
-t, --threads <INT> Number of threads to use in kraken2 and optional output compression. Cannot be 0 [default: 1]
-H, --human Output human reads instead of removing them
-C, --conf <[0, 1]> Kraken2 minimum confidence score [default: 0.0]
-v, --verbose Set the logging level to verbose
-h, --help Print help (see more with '--help')
-V, --version Print version
Expand Down Expand Up @@ -275,7 +282,12 @@ Options:
-H, --human
Output human reads instead of removing them
-C, --conf <[0, 1]>
Kraken2 minimum confidence score
[default: 0.0]
-v, --verbose
Set the logging level to verbose
Expand Down Expand Up @@ -326,4 +338,6 @@ more details and for other alternate approaches.

[paper]: https://doi.org/10.1093/gigascience/giae010

[ghcr]: https://github.com/mbhall88/nohuman/pkgs/container/nohuman
[ghcr]: https://github.com/mbhall88/nohuman/pkgs/container/nohuman

[conf]: https://github.com/DerrickWood/kraken2/blob/master/docs/MANUAL.markdown#confidence-scoring
31 changes: 31 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,16 @@ pub fn validate_db_directory(path: &Path) -> Result<PathBuf, String> {
))
}

/// Parse confidence score from the command line. Will be passed on to kraken2. Must be in the
/// closed interval [0, 1] - i.e. 0 <= confidence <= 1.
pub fn parse_confidence_score(s: &str) -> Result<f32, String> {
let confidence: f32 = s.parse().map_err(|_| "Confidence score must be a number")?;
if !(0.0..=1.0).contains(&confidence) {
return Err("Confidence score must be in the closed interval [0, 1]".to_string());
}
Ok(confidence)
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -205,4 +215,25 @@ mod tests {
let expected = PathBuf::from("Cargo.toml");
assert_eq!(actual, expected)
}

#[test]
fn test_parse_confidence_score() {
let result = parse_confidence_score("0.5");
assert!(result.is_ok());
assert_eq!(result.unwrap(), 0.5);

let result = parse_confidence_score("1.0");
assert!(result.is_ok());
assert_eq!(result.unwrap(), 1.0);

let result = parse_confidence_score("0.0");
assert!(result.is_ok());
assert_eq!(result.unwrap(), 0.0);

let result = parse_confidence_score("1.1");
assert!(result.is_err());

let result = parse_confidence_score("-0.1");
assert!(result.is_err());
}
}
10 changes: 9 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ use env_logger::Builder;
use log::{debug, error, info, warn, LevelFilter};
use nohuman::compression::CompressionFormat;
use nohuman::{
check_path_exists, download::download_database, validate_db_directory, CommandRunner,
check_path_exists, download::download_database, parse_confidence_score, validate_db_directory,
CommandRunner,
};

static DEFAULT_DB_LOCATION: LazyLock<String> = LazyLock::new(|| {
Expand Down Expand Up @@ -72,6 +73,10 @@ struct Args {
#[arg(short = 'H', long = "human")]
keep_human_reads: bool,

/// Kraken2 minimum confidence score
#[arg(short = 'C', long = "conf", value_name = "[0, 1]", default_value = "0.0", value_parser = parse_confidence_score)]
confidence: f32,

/// Set the logging level to verbose
#[arg(short, long)]
verbose: bool,
Expand Down Expand Up @@ -142,6 +147,7 @@ fn main() -> Result<()> {
let temp_kraken_output =
tempfile::NamedTempFile::new().context("Failed to create temporary kraken output file")?;
let threads = args.threads.to_string();
let confidence = args.confidence.to_string();
let db = validate_db_directory(&args.database)
.map_err(|e| anyhow::anyhow!(e))?
.to_string_lossy()
Expand All @@ -153,6 +159,8 @@ fn main() -> Result<()> {
&db,
"--output",
temp_kraken_output.path().to_str().unwrap(),
"--confidence",
&confidence,
];
match input.len() {
0 => bail!("No input files provided"),
Expand Down

0 comments on commit 419ede9

Please sign in to comment.