Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow nested rtxn from wtxn for v0.20 #309

Draft
wants to merge 13 commits into
base: release-v0.20.5
Choose a base branch
from
4 changes: 2 additions & 2 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[submodule "lmdb-master-sys/lmdb"]
path = lmdb-master-sys/lmdb
url = https://github.com/LMDB/lmdb
branch = mdb.master
url = https://github.com/meilisearch/lmdb.git
branch = allow-nested-rtxn-from-wtxn
29 changes: 26 additions & 3 deletions heed/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "heed"
version = "0.20.4"
version = "0.20.5"
authors = ["Kerollmops <[email protected]>"]
description = "A fully typed LMDB wrapper with minimum overhead"
license = "MIT"
Expand All @@ -23,8 +23,11 @@ serde = { version = "1.0.203", features = ["derive"], optional = true }
synchronoise = "1.0.1"

[dev-dependencies]
serde = { version = "1.0.203", features = ["derive"] }
tempfile = "3.10.1"
rand = "0.9.0"
rayon = "1.10.0"
roaring = "0.10.10"
serde = { version = "1.0.217", features = ["derive"] }
tempfile = "3.15.0"

[target.'cfg(windows)'.dependencies]
url = "2.5.2"
Expand Down Expand Up @@ -61,6 +64,10 @@ arbitrary_precision = ["heed-types/arbitrary_precision"]
raw_value = ["heed-types/raw_value"]
unbounded_depth = ["heed-types/unbounded_depth"]

asan = ["lmdb-master-sys/asan"]
fuzzer = ["lmdb-master-sys/fuzzer"]
fuzzer-no-link = ["lmdb-master-sys/fuzzer-no-link"]

# Whether to tell LMDB to use POSIX semaphores during compilation
# (instead of the default, which are System V semaphores).
# POSIX semaphores are required for Apple's App Sandbox on iOS & macOS,
Expand All @@ -70,6 +77,18 @@ unbounded_depth = ["heed-types/unbounded_depth"]
# <https://github.com/LMDB/lmdb/blob/3947014aed7ffe39a79991fa7fb5b234da47ad1a/libraries/liblmdb/lmdb.h#L46-L69>
posix-sem = ["lmdb-master-sys/posix-sem"]

# Prints debugging information on stderr.
debug-simple = ["lmdb-master-sys/debug-simple"]
# Add copious tracing. It enables the debug-simple feature.
debug-copious-tracing = ["lmdb-master-sys/debug-copious-tracing"]
# Add dumps of all IDLs read from and written to the database
# (used for free space management). It enables the debug-simple
# and debug-dump-idls features.
debug-dump-idls = ["lmdb-master-sys/debug-dump-idls"]
# Do an audit after each commit. It enables the debug-simple,
# debug-copious-tracing, and debug-dump-idls features.
debug-audit-commit = ["lmdb-master-sys/debug-audit-commit"]

# These features configure the MDB_IDL_LOGN macro, which determines
# the size of the free and dirty page lists (and thus the amount of memory
# allocated when opening an LMDB environment in read-write mode).
Expand Down Expand Up @@ -110,6 +129,10 @@ mdb_idl_logn_16 = ["lmdb-master-sys/mdb_idl_logn_16"]
# computers then you need to keep your keys within the smaller 1982 byte limit.
longer-keys = ["lmdb-master-sys/longer-keys"]

[[example]]
name = "nested-rtxns"
required-features = ["read-txn-no-tls"]

[[example]]
name = "rmp-serde"
required-features = ["serde-rmp"]
77 changes: 77 additions & 0 deletions heed/examples/nested-rtxns.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
use heed::types::*;
use heed::{Database, EnvOpenOptions};
use rand::prelude::*;
use rayon::prelude::*;
use roaring::RoaringBitmap;

fn main() -> Result<(), Box<dyn std::error::Error>> {
let dir = tempfile::tempdir()?;
let env = unsafe {
EnvOpenOptions::new()
.map_size(2 * 1024 * 1024 * 1024) // 2 GiB
.open(dir.path())?
};

// opening a write transaction
let mut wtxn = env.write_txn()?;
// we will open the default unnamed database
let db: Database<U32<byteorder::BigEndian>, Bytes> = env.create_database(&mut wtxn, None)?;

let mut buffer = Vec::new();
for i in 0..1000 {
let mut rng = StdRng::seed_from_u64(i as u64);
let max = rng.random_range(10_000..=100_000);
let roaring = RoaringBitmap::from_sorted_iter(0..max)?;
buffer.clear();
roaring.serialize_into(&mut buffer)?;
db.put(&mut wtxn, &i, &buffer)?;
}

// opening multiple read-only transactions
// to check if those values are now available
// without committing beforehand
let rtxns = (0..1000).map(|_| env.nested_read_txn(&wtxn)).collect::<heed::Result<Vec<_>>>()?;

rtxns.into_par_iter().enumerate().for_each(|(i, rtxn)| {
let mut rng = StdRng::seed_from_u64(i as u64);
let max = rng.random_range(10_000..=100_000);
let roaring = RoaringBitmap::from_sorted_iter(0..max).unwrap();

let mut buffer = Vec::new();
roaring.serialize_into(&mut buffer).unwrap();

let i = i as u32;
let ret = db.get(&rtxn, &i).unwrap();
assert_eq!(ret, Some(&buffer[..]));
});

for i in 1000..10_000 {
let mut rng = StdRng::seed_from_u64(i as u64);
let max = rng.random_range(10_000..=100_000);
let roaring = RoaringBitmap::from_sorted_iter(0..max)?;
buffer.clear();
roaring.serialize_into(&mut buffer)?;
db.put(&mut wtxn, &i, &buffer)?;
}

// opening multiple read-only transactions
// to check if those values are now available
// without committing beforehand
let rtxns =
(1000..10_000).map(|_| env.nested_read_txn(&wtxn)).collect::<heed::Result<Vec<_>>>()?;

rtxns.into_par_iter().enumerate().for_each(|(i, rtxn)| {
let mut rng = StdRng::seed_from_u64(i as u64);
let max = rng.random_range(10_000..=100_000);
let roaring = RoaringBitmap::from_sorted_iter(0..max).unwrap();

let mut buffer = Vec::new();
roaring.serialize_into(&mut buffer).unwrap();

let i = i as u32;
let ret = db.get(&rtxn, &i).unwrap();
assert_eq!(ret, Some(&buffer[..]));
});

Ok(())
}
46 changes: 46 additions & 0 deletions heed/src/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,52 @@ impl Env {
RoTxn::new(self)
}

/// Create a nested transaction with read only access for use with the environment.
///
/// The new transaction will be a nested transaction, with the transaction indicated by parent
/// as its parent. Transactions may be nested to any level.
///
/// ```
/// use std::fs;
/// use std::path::Path;
/// use heed::{EnvOpenOptions, Database};
/// use heed::types::*;
///
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let dir = tempfile::tempdir()?;
/// let env = unsafe {
/// EnvOpenOptions::new()
/// .map_size(2 * 1024 * 1024) // 2 MiB
/// .open(dir.path())?
/// };
///
/// // we will open the default unnamed database
/// let mut wtxn = env.write_txn()?;
/// let db: Database<U32<byteorder::BigEndian>, U32<byteorder::BigEndian>> = env.create_database(&mut wtxn, None)?;
///
/// // opening a write transaction
/// for i in 0..1000 {
/// db.put(&mut wtxn, &i, &i)?;
/// }
///
/// // opening multiple read-only transactions
/// // to check if those values are now available
/// // without committing beforehand
/// let rtxns = (0..1000).map(|_| env.nested_read_txn(&wtxn)).collect::<heed::Result<Vec<_>>>()?;
///
/// for (i, rtxn) in rtxns.iter().enumerate() {
/// let i = i as u32;
/// let ret = db.get(&rtxn, &i)?;
/// assert_eq!(ret, Some(i));
/// }
///
/// # Ok(()) }
/// ```
#[cfg(feature = "read-txn-no-tls")]
pub fn nested_read_txn<'p>(&'p self, parent: &'p RwTxn) -> Result<RoTxn<'p>> {
RoTxn::nested(self, parent)
}

/// Create a transaction with read-only access for use with the environment.
/// Contrary to [`Self::read_txn`], this version **owns** the environment, which
/// means you won't be able to close the environment while this transaction is alive.
Expand Down
16 changes: 16 additions & 0 deletions heed/src/txn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,22 @@ impl<'e> RoTxn<'e> {
Ok(RoTxn { txn, env: Cow::Owned(env) })
}

#[cfg(feature = "read-txn-no-tls")]
pub(crate) fn nested<'p>(env: &'p Env, parent: &'p RwTxn) -> Result<RoTxn<'p>> {
let mut txn: *mut ffi::MDB_txn = ptr::null_mut();
let parent_ptr: *mut ffi::MDB_txn = parent.txn.txn;

unsafe {
// Note that we open a write transaction here and this is the (current)
// ugly way to trick LMDB and let me create multiple write txn.
mdb_result(ffi::mdb_txn_begin(env.env_mut_ptr(), parent_ptr, 0, &mut txn))?
};

// Note that we wrap the write txn into a RoTxn so it's
// safe as the user cannot do any modification with it.
Ok(RoTxn { txn, env: Cow::Borrowed(env) })
}

pub(crate) fn env_mut_ptr(&self) -> *mut ffi::MDB_env {
self.env.env_mut_ptr()
}
Expand Down
16 changes: 15 additions & 1 deletion lmdb-master-sys/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ doctest = false
libc = "0.2.155"

[build-dependencies]
bindgen = { version = "0.69.4", default-features = false, optional = true, features = ["runtime"] }
bindgen = { version = "0.69.4", default-features = false, optional = true, features = [
"runtime",
] }
cc = "1.0.104"
doxygen-rs = "0.4.2"

Expand All @@ -41,6 +43,18 @@ fuzzer = []
fuzzer-no-link = []
posix-sem = []

# Prints debugging information on stderr.
debug-simple = []
# Add copious tracing. It enables the debug-simple feature.
debug-copious-tracing = []
# Add dumps of all IDLs read from and written to the database
# (used for free space management). It enables the debug-simple
# and debug-dump-idls features.
debug-dump-idls = []
# Do an audit after each commit. It enables the debug-simple,
# debug-copious-tracing, and debug-dump-idls features.
debug-audit-commit = []

# These features configure the MDB_IDL_LOGN macro, which determines
# the size of the free and dirty page lists (and thus the amount of memory
# allocated when opening an LMDB environment in read-write mode).
Expand Down
31 changes: 31 additions & 0 deletions lmdb-master-sys/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,32 @@ const MDB_IDL_LOGN: u8 = 15;
))]
const MDB_IDL_LOGN: u8 = 16;

#[cfg(not(any(
feature = "debug-simple",
feature = "debug-copious-tracing",
feature = "debug-dump-idls",
feature = "debug-audit-commit"
)))]
const MDB_DEBUG: Option<u8> = None;
#[cfg(all(
feature = "debug-simple",
not(any(
feature = "debug-copious-tracing",
feature = "debug-dump-idls",
feature = "debug-audit-commit"
))
))]
const MDB_DEBUG: Option<u8> = Some(0);
#[cfg(all(
feature = "debug-copious-tracing",
not(any(feature = "debug-dump-idls", feature = "debug-audit-commit"))
))]
const MDB_DEBUG: Option<u8> = Some(1);
#[cfg(all(feature = "debug-dump-idls", not(feature = "debug-audit-commit")))]
const MDB_DEBUG: Option<u8> = Some(2);
#[cfg(feature = "debug-audit-commit")]
const MDB_DEBUG: Option<u8> = Some(3);

macro_rules! warn {
($message:expr) => {
println!("cargo:warning={}", $message);
Expand Down Expand Up @@ -133,12 +159,17 @@ fn main() {
.flag_if_supported("-Wbad-function-cast")
.flag_if_supported("-Wuninitialized");

if let Some(debug) = MDB_DEBUG {
builder.define("MDB_DEBUG", Some(debug.to_string().as_str()));
}

if cfg!(feature = "posix-sem") {
builder.define("MDB_USE_POSIX_SEM", None);
}

if cfg!(feature = "asan") {
builder.flag("-fsanitize=address");
builder.flag("-static-libasan");
}

if cfg!(feature = "fuzzer") {
Expand Down
Loading