From 877b17f4477ea206d8bb3732f2b781cefb843cfc Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Sun, 5 Jan 2025 10:56:34 +1300 Subject: [PATCH 1/4] Resolves #109 --- Cargo.toml | 2 +- minimap2-sys/src/lib.rs | 31 ++++++++++++++++++++++++++++++- src/lib.rs | 15 ++++++--------- 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 73e01b0..3a8b601 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,7 +31,7 @@ libc = "0.2" needletail = { version = "0.6", optional = true, default-features = false} # Dep for development -minimap2-sys = { path = "./minimap2-sys", version = "0.1.20+minimap2.2.28" } +minimap2-sys = { path = "./minimap2-sys" } #, version = "0.1.20+minimap2.2.28" } # minimap2-sys = "0.1.19" rust-htslib = { version = "0.48", default-features = false, optional = true } diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index 2540154..0e55ff4 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -8,6 +8,8 @@ include!(concat!(env!("OUT_DIR"), "/bindings.rs")); #[cfg(all(not(feature = "bindgen")))] include!("bindings.rs"); +use std::{mem::MaybeUninit, ops::{DerefMut, Deref}}; + unsafe impl Send for mm_idx_t {} unsafe impl Send for mm_idx_reader_t {} unsafe impl Send for mm_mapopt_t {} @@ -18,8 +20,35 @@ impl Drop for mm_idx_t { } } +pub struct MmIdx { + pub idx: *mut mm_idx_t, +} + +impl From<*mut mm_idx_t> for MmIdx { + fn from(idx: *mut mm_idx_t) -> Self { + MmIdx { idx } + } +} -use std::mem::MaybeUninit; +impl Drop for MmIdx { + fn drop(&mut self) { + unsafe { mm_idx_destroy(self.idx) }; + } +} + +impl Deref for MmIdx { + type Target = mm_idx_t; + + fn deref(&self) -> &Self::Target { + unsafe { &*self.idx } + } +} + +impl DerefMut for MmIdx { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut *self.idx } + } +} impl Default for mm_mapopt_t { fn default() -> Self { diff --git a/src/lib.rs b/src/lib.rs index f07ca0a..838e325 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -308,7 +308,7 @@ pub struct Aligner { pub threads: usize, /// Index created by minimap2 - pub idx: Option>, + pub idx: Option>, /// Index reader created by minimap2 pub idx_reader: Option>, @@ -745,9 +745,7 @@ where } let mm_idx = unsafe { idx.assume_init() }; - self.idx = Some(Arc::new(mm_idx)); - let mm_idx = unsafe { idx.assume_init() }; - self.idx = Some(Arc::new(mm_idx)); + self.idx = Some(Arc::new(mm_idx.into())); Ok(Aligner { idxopt: self.idxopt, @@ -863,9 +861,8 @@ where }); let mm_idx = unsafe { idx.assume_init() }; - self.idx = Some(Arc::new(mm_idx)); - let mm_idx = unsafe { idx.assume_init() }; - self.idx = Some(Arc::new(mm_idx)); + self.idx = Some(Arc::new(mm_idx.into())); + self.mapopt.mid_occ = 1000; let aln = Aligner { @@ -897,8 +894,8 @@ impl Aligner { /// Returns the number of sequences in the index pub fn n_seq(&self) -> u32 { unsafe { - let idx = Arc::as_ptr(self.idx.as_ref().unwrap()); - let idx: *const mm_idx_t = *idx; + // let idx = Arc::as_ptr(self.idx.as_ref().unwrap()); + let idx: *const mm_idx_t = &(***self.idx.as_ref().unwrap()); (*idx).n_seq as u32 } } From 781d2953afe46b121233cd5095614ee0ca5f6ed8 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Sun, 5 Jan 2025 10:57:06 +1300 Subject: [PATCH 2/4] Resolves #109 --- src/lib.rs | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 838e325..cf368da 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -906,8 +906,9 @@ impl Aligner { /// Remainds valid as long as the aligner is valid pub fn get_seq<'aln>(&'aln self, i: usize) -> Option<&'aln mm_idx_seq_t> { unsafe { - let idx = Arc::as_ptr(self.idx.as_ref().unwrap()); - let idx: *const mm_idx_t = *idx; + // let idx = Arc::as_ptr(self.idx.as_ref().unwrap()); + // let idx: *const mm_idx_t = *idx; + let idx: *const mm_idx_t = &(***self.idx.as_ref().unwrap()); // todo, should this be > or >= if i > self.n_seq() as usize { return None; @@ -1110,7 +1111,8 @@ impl Aligner { }; let (cs_str, md_str) = if cs || md { - let idx: *const mm_idx_t = *Arc::as_ptr(self.idx.as_ref().unwrap()); + // let idx: *const mm_idx_t = *Arc::as_ptr(self.idx.as_ref().unwrap()); + let idx: *const mm_idx_t = &(***self.idx.as_ref().unwrap()); let cs_str = if cs { let mut cs_string: *mut libc::c_char = std::ptr::null_mut(); @@ -2243,4 +2245,18 @@ mod tests { MM_F_CIGAR as i64 ); } + + #[test] + fn build_aligner_memory_leak() { + for _ in 0..100000 { + let aligner = Aligner::builder().map_ont(); + let aligner = aligner + .with_index_threads(1) + .with_cigar() + .with_sam_out() + .with_sam_hit_only() + .with_seq_and_id(b"ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA", b"ref") + .unwrap(); + } + } } From 6ea99b6f02378957851d6892cf36aa1c76d55106 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Sun, 5 Jan 2025 11:04:43 +1300 Subject: [PATCH 3/4] More updates --- CHANGELOG.md | 100 ++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 8 ++-- README.md | 99 +-------------------------------------- minimap2-sys/Cargo.toml | 2 +- minimap2-sys/README.md | 3 ++ 5 files changed, 110 insertions(+), 102 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..f3322c0 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,100 @@ +### 0.1.23 minimap2 2.28 ++ Fixed memory leak when not dropping mm_idx_t properly. This is done by adding in syntactic sugar in minimap2-sys @jguhlin + +### 0.1.22 minimap2 2.28 +#### Changes ++ Fixed a memory segfault when re-using a thread local buffer. Not sure why it occurs, but this fix seems to solve it. + +### 0.1.21 minimap2 2.28 +Contributors to this release: @mbhall88 @rob-p @Sam-Sims @charlesgregory @PB-DB +#### Breaking Changes ++ Map now returns Arc String's to reduce memory allocation for large and/or repetitive jobs ++ map now takes an additional argument, query_name: Option<&[u8]>, possibly solves [#75](https://github.com/jguhlin/minimap2-rs/issues/75) (@rob-p @mbhall88 @jguhlin) ++ Arc the Index, to prevent double-frees, solves [#71](https://github.com/jguhlin/minimap2-rs/issues/71) ++ Map file now passes in query name, which should help with [#75](https://github.com/jguhlin/minimap2-rs/issues/75) ++ Supplementary flag now better detected (@rob-p) ++ FIX: Cigar string missing softclip operation (@Sam-Sims) + +#### Migration Guide ++ Make all setting changes before calling a with_index, with_seq's function ++ Change all map calls to include a query name, or None if not needed + +### Other Changes ++ Add ergonomic functions n_seq and get_seq. ++ Better docs on applying presets, solves [#84](https://github.com/jguhlin/minimap2-rs/issues/84) ++ Better detection of target arch c_char's and ptr's, solves [#82](https://github.com/jguhlin/minimap2-rs/issues/82) ++ Support for M1 Mac compilation and addition of github workflows to test it, solving [#81](https://github.com/jguhlin/minimap2-rs/issues/81) ++ Rayon test, so some support, closes [#5](https://github.com/jguhlin/minimap2-rs/issues/5) ++ Static str's and now static CStr's ++ FIX: memory leak due to sequences allocated by minimap2 not being freed @charlesgregory ++ Add Send + Sync to Aligner, along with unit test @PB-DB ++ Experimental Android support (tested on aarch64 and x86_64), solves [#66](https://github.com/jguhlin/minimap2-rs/issues/66) ++ Added flag and option documents ++ Added with_gap_open penalty ergonomic function + +### 0.1.20 minimap2 2.28 ++ Fix htslib errors. No update to -sys crate needed. + +### 0.1.19 minimap2 2.28 ++ Fix memory leak by @charlesgregory + +### 0.1.18 minimap2 2.28 ++ Update to minimap2 v2.28 @jguhlin ++ Support for lrhqae preset @jguhlin + +### 0.1.17 minimap2 2.27 +* Mark bam::Record objects as supplementary. #52 @PB-DB +* Only use rust-htslib/curl when curl feature is enabled. #53 @PB-DB +* Update to minimap2 v2.27 @jguhlin +* Switch to needletail for reading fast files (features map-file) @jguhlin +* Convert functions to take slices of vectors instead of refs to vecs `&[Vec]` instead of `&Vec>` @jguhlin +* _breaking_ Curl is no longer a default option for htslib, please re-enable it as needed with cargo.toml features +* _breaking_ Now using needletail for map-files, enabled by default. However, compression algorithms are disabled. Please enable with cargo.toml features +* Experimental rayon support +* aligner.with_cigar_clipping() to add soft clipping to the CIGAR vec (with_cigar() still adds to only the string, following the minimap2 outputs for PAF) +* _breaking_ .with_threads(_) is now .with_index_threads(_) to make it more clear + +### 0.1.16 minimap2 2.26 +* Much better cross compilation support thanks to @Adoni5 + +### 0.1.15 minimap2 2.26 +* Compilation on aarch64 thanks to @leiste375 +* README corrections thanks to @wdecoster +* Better support for static builds / linking +* Update fffx to a version that uses bytelines without tokio. Drastically reduces compile times and dependency tree. + +### 0.1.14 minimap2 2.26 +* Memory leak fixed by @Adoni5 +* Updated deps + +### 0.1.13 minimap2 2.26 +* Add with_seq to support indexing a single sequence (as per mappy: https://github.com/lh3/minimap2/blob/master/python/mappy.pyx#L115) +* minimap2-rs: update rust-htslib deps +* simdutf8 now optional dependency requiring map-file feature to be enabled +* Support soft-clipping string in CIGAR. WARNING: Does not support hard clipping. Please open an issue if you need this. +* Update minimap to 2.26 +* Not convinced SSE41/SSE2 are working properly. Recommend simde. + +### 0.1.11 +* HTS lib: add support for optional quality scores by @eharr + +### 0.1.10 +* HTS lib support by @eharr +* HTS lib: Output sam/bam files by @eharr +* More tests by @eharr +* Display impl for Strand thanks to @ahcm +* Update minimap2-sys to latest version by @jguhlin +* -sys crate mm2fast added as additional backend by @jguhlin +* zlib dep changes by @jguhlin (hopefully now it is more portable and robust) +* -sys crate now supports SIMDe + +## 0.1.9 +* Thanks for @Adoni5 for switching to builder pattern, and @eharr for adding additional fields to alignment. +* Do not require libclang for normal compilation. +## 0.1.8 +* Multithreading support (use less raw pointers, and treat more like rust Struct's) +## 0.1.7 +* use libc instead of std:ffi::c_int as well +## 0.1.6 +* Support slightly older versions of rustc by using libc:: rather than std::ffi for c_char (Thanks dwpeng!) +* Use fffx module for fasta/q parsing diff --git a/Cargo.toml b/Cargo.toml index 3a8b601..a59f329 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "minimap2" -version = "0.1.22+minimap2.2.28" +version = "0.1.23+minimap2.2.28" edition = "2021" authors = ["Joseph Guhlin "] license = "MIT OR Apache-2.0" @@ -31,14 +31,14 @@ libc = "0.2" needletail = { version = "0.6", optional = true, default-features = false} # Dep for development -minimap2-sys = { path = "./minimap2-sys" } #, version = "0.1.20+minimap2.2.28" } +minimap2-sys = { path = "./minimap2-sys" , version = "0.1.20+minimap2.2.28" } # minimap2-sys = "0.1.19" -rust-htslib = { version = "0.48", default-features = false, optional = true } +rust-htslib = { version = "0.49", default-features = false, optional = true } [dev-dependencies] rayon = "1.10" crossbeam = "0.8.4" -clap = { version = "4.5.21", features = ["derive"] } +clap = { version = "4.5.23", features = ["derive"] } needletail = { version = "0.6", default-features = false} # The end-user should decide this... diff --git a/README.md b/README.md index 46fec14..5863961 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ Tested with rustc 1.82.0 and nightly. So probably a good idea to upgrade before ## Minimap2 Version Table | minimap2-rs | minimap2 | |-------------|----------| +| 0.1.23 | 2.28 | | 0.1.22 | 2.28 | | 0.1.21 | 2.28 | | 0.1.20 | 2.28 | @@ -334,103 +335,7 @@ See [customization](#customization) for how to use these. | `MM_I_NO_NAME` | `4` | Do not store sequence names in the index. | # Changelog -### 0.1.22 minimap2 2.28 -#### Changes -+ Fixed a memory segfault when re-using a thread local buffer. Not sure why it occurs, but this fix seems to solve it. - -### 0.1.21 minimap2 2.28 -Contributors to this release: @mbhall88 @rob-p @Sam-Sims @charlesgregory @PB-DB -#### Breaking Changes -+ Map now returns Arc String's to reduce memory allocation for large and/or repetitive jobs -+ map now takes an additional argument, query_name: Option<&[u8]>, possibly solves [#75](https://github.com/jguhlin/minimap2-rs/issues/75) (@rob-p @mbhall88 @jguhlin) -+ Arc the Index, to prevent double-frees, solves [#71](https://github.com/jguhlin/minimap2-rs/issues/71) -+ Map file now passes in query name, which should help with [#75](https://github.com/jguhlin/minimap2-rs/issues/75) -+ Supplementary flag now better detected (@rob-p) -+ FIX: Cigar string missing softclip operation (@Sam-Sims) - -#### Migration Guide -+ Make all setting changes before calling a with_index, with_seq's function -+ Change all map calls to include a query name, or None if not needed - -### Other Changes -+ Add ergonomic functions n_seq and get_seq. -+ Better docs on applying presets, solves [#84](https://github.com/jguhlin/minimap2-rs/issues/84) -+ Better detection of target arch c_char's and ptr's, solves [#82](https://github.com/jguhlin/minimap2-rs/issues/82) -+ Support for M1 Mac compilation and addition of github workflows to test it, solving [#81](https://github.com/jguhlin/minimap2-rs/issues/81) -+ Rayon test, so some support, closes [#5](https://github.com/jguhlin/minimap2-rs/issues/5) -+ Static str's and now static CStr's -+ FIX: memory leak due to sequences allocated by minimap2 not being freed @charlesgregory -+ Add Send + Sync to Aligner, along with unit test @PB-DB -+ Experimental Android support (tested on aarch64 and x86_64), solves [#66](https://github.com/jguhlin/minimap2-rs/issues/66) -+ Added flag and option documents -+ Added with_gap_open penalty ergonomic function - -### 0.1.20 minimap2 2.28 -+ Fix htslib errors. No update to -sys crate needed. - -### 0.1.19 minimap2 2.28 -+ Fix memory leak by @charlesgregory - -### 0.1.18 minimap2 2.28 -+ Update to minimap2 v2.28 @jguhlin -+ Support for lrhqae preset @jguhlin - -### 0.1.17 minimap2 2.27 -* Mark bam::Record objects as supplementary. #52 @PB-DB -* Only use rust-htslib/curl when curl feature is enabled. #53 @PB-DB -* Update to minimap2 v2.27 @jguhlin -* Switch to needletail for reading fast files (features map-file) @jguhlin -* Convert functions to take slices of vectors instead of refs to vecs `&[Vec]` instead of `&Vec>` @jguhlin -* _breaking_ Curl is no longer a default option for htslib, please re-enable it as needed with cargo.toml features -* _breaking_ Now using needletail for map-files, enabled by default. However, compression algorithms are disabled. Please enable with cargo.toml features -* Experimental rayon support -* aligner.with_cigar_clipping() to add soft clipping to the CIGAR vec (with_cigar() still adds to only the string, following the minimap2 outputs for PAF) -* _breaking_ .with_threads(_) is now .with_index_threads(_) to make it more clear - -### 0.1.16 minimap2 2.26 -* Much better cross compilation support thanks to @Adoni5 - -### 0.1.15 minimap2 2.26 -* Compilation on aarch64 thanks to @leiste375 -* README corrections thanks to @wdecoster -* Better support for static builds / linking -* Update fffx to a version that uses bytelines without tokio. Drastically reduces compile times and dependency tree. - -### 0.1.14 minimap2 2.26 -* Memory leak fixed by @Adoni5 -* Updated deps - -### 0.1.13 minimap2 2.26 -* Add with_seq to support indexing a single sequence (as per mappy: https://github.com/lh3/minimap2/blob/master/python/mappy.pyx#L115) -* minimap2-rs: update rust-htslib deps -* simdutf8 now optional dependency requiring map-file feature to be enabled -* Support soft-clipping string in CIGAR. WARNING: Does not support hard clipping. Please open an issue if you need this. -* Update minimap to 2.26 -* Not convinced SSE41/SSE2 are working properly. Recommend simde. - -### 0.1.11 -* HTS lib: add support for optional quality scores by @eharr - -### 0.1.10 -* HTS lib support by @eharr -* HTS lib: Output sam/bam files by @eharr -* More tests by @eharr -* Display impl for Strand thanks to @ahcm -* Update minimap2-sys to latest version by @jguhlin -* -sys crate mm2fast added as additional backend by @jguhlin -* zlib dep changes by @jguhlin (hopefully now it is more portable and robust) -* -sys crate now supports SIMDe - -## 0.1.9 -* Thanks for @Adoni5 for switching to builder pattern, and @eharr for adding additional fields to alignment. -* Do not require libclang for normal compilation. -## 0.1.8 -* Multithreading support (use less raw pointers, and treat more like rust Struct's) -## 0.1.7 -* use libc instead of std:ffi::c_int as well -## 0.1.6 -* Support slightly older versions of rustc by using libc:: rather than std::ffi for c_char (Thanks dwpeng!) -* Use fffx module for fasta/q parsing +See [CHANGELOG.md](CHANGELOG.md) # Funding ![Genomics Aotearoa](info/genomics-aotearoa.png) diff --git a/minimap2-sys/Cargo.toml b/minimap2-sys/Cargo.toml index 82e45af..dcb148e 100644 --- a/minimap2-sys/Cargo.toml +++ b/minimap2-sys/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "minimap2-sys" -version = "0.1.20+minimap2.2.28" +version = "0.1.21+minimap2.2.28" edition = "2021" links = "libminimap2" authors = ["Joseph Guhlin "] diff --git a/minimap2-sys/README.md b/minimap2-sys/README.md index c066a37..f800064 100644 --- a/minimap2-sys/README.md +++ b/minimap2-sys/README.md @@ -19,6 +19,9 @@ mm2-fast and minimap2 have diverged. At this point mm2-fast is no longer support * Can we decouple from pthread? This would allow Windows and (possibly) WASM compilation. ## Changelog +### 0.1.21 minimap2.2.28 +Syntactic sugar for mm_idx_t to support Drop, Deref, and DerefMut + ### 0.1.20 minimap2.2.28 * Move Drop impl to -sys crate From c721f417c87e75fac1bc58a9a41a9fbb653643fe Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Sun, 5 Jan 2025 11:11:05 +1300 Subject: [PATCH 4/4] Update htslib support to new MmIdx --- Cargo.toml | 2 +- src/htslib.rs | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a59f329..a4f58f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,7 +52,7 @@ needletail = { version = "0.6", default-features = false} [features] default = ["map-file"] -map-file = ["needletail"] #, "simdutf8"] +map-file = ["needletail"] htslib = ['rust-htslib'] simde = ["minimap2-sys/simde"] zlib-ng = ["minimap2-sys/zlib-ng"] diff --git a/src/htslib.rs b/src/htslib.rs index 5f0ad6f..9941065 100644 --- a/src/htslib.rs +++ b/src/htslib.rs @@ -303,9 +303,8 @@ pub struct SeqMetaData { pub is_alt: bool, } -#[derive(Debug)] pub struct MMIndex { - pub inner: Arc<*mut mm_ffi::mm_idx_t>, + pub inner: Arc, } impl MMIndex { @@ -345,8 +344,7 @@ impl MMIndex { impl From<&Aligner> for MMIndex { fn from(aligner: &Aligner) -> Self { MMIndex { - // inner: aligner.idx.unwrap(), - inner: std::sync::Arc::clone(&aligner.idx.as_ref().unwrap()), + inner: std::sync::Arc::clone(aligner.idx.as_ref().unwrap()), } } }