Merge branch 'master' into hypergeo_fix

rust-random · Nov 13, 2024 · a42e3c9 · a42e3c9
2 parents d96e219 + ad67294
commit a42e3c9
Show file tree

Hide file tree

Showing 17 changed files with 329 additions and 125 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -21,14 +21,11 @@ You may also find the [Upgrade Guide](https://rust-random.github.io/book/update.
 - Rename `rand::distributions` to `rand::distr` (#1470)
 - The `serde1` feature has been renamed `serde` (#1477)
 - The implicit feature `rand_chacha` has been removed. This is enabled by `std_rng`. (#1473)
-- Mark `WeightError`, `PoissonError`, `BinomialError` as `#[non_exhaustive]` (#1480).
+- Mark `WeightError` as `#[non_exhaustive]` (#1480).
 - Add `p()` for `Bernoulli` to access probability (#1481)
 - Add `UniformUsize` and use to make `Uniform` for `usize` portable (#1487)
-- Remove support for generating `isize` and `usize` values with `Standard`, `Uniform` and `Fill` and usage as a `WeightedAliasIndex` weight (#1487)
 - Require `Clone` and `AsRef` bound for `SeedableRng::Seed`. (#1491)
 - Improve SmallRng initialization performance (#1482)
-- Implement `Distribution<u64>` for `Poisson<f64>` (#1498)
-- Limit the maximal acceptable lambda for `Poisson` to solve (#1312) (#1498)
 - Rename `Rng::gen_iter` to `random_iter` (#1500)
 - Rename `rand::thread_rng()` to `rand::rng()`, and remove from the prelude (#1506)
 - Remove `rand::random()` from the prelude (#1506)

diff --git a/README.md b/README.md
@@ -6,20 +6,20 @@
 [![API](https://img.shields.io/badge/api-master-yellow.svg)](https://rust-random.github.io/rand/rand)
 [![API](https://docs.rs/rand/badge.svg)](https://docs.rs/rand)
 
-Rand is a Rust library supporting random generators:
+Rand is a set of crates supporting (pseudo-)random generators:
 
--   A standard RNG trait: [`rand_core::RngCore`](https://docs.rs/rand_core/latest/rand_core/trait.RngCore.html)
--   Fast implementations of the best-in-class [cryptographic](https://rust-random.github.io/book/guide-rngs.html#cryptographically-secure-pseudo-random-number-generators-csprngs) and
-    [non-cryptographic](https://rust-random.github.io/book/guide-rngs.html#basic-pseudo-random-number-generators-prngs) generators: [`rand::rngs`](https://docs.rs/rand/latest/rand/rngs/index.html), and more RNGs: [`rand_chacha`](https://docs.rs/rand_chacha), [`rand_xoshiro`](https://docs.rs/rand_xoshiro/), [`rand_pcg`](https://docs.rs/rand_pcg/), [rngs repo](https://github.com/rust-random/rngs/)
--   [`rand::rng`](https://docs.rs/rand/latest/rand/fn.rng.html) is an asymtotically-fast, reasonably secure generator available on all `std` targets
--   Secure seeding via the [`getrandom` crate](https://crates.io/crates/getrandom)
+-   Built over a standard RNG trait: [`rand_core::RngCore`](https://docs.rs/rand_core/latest/rand_core/trait.RngCore.html)
+-   With fast implementations of both [strong](https://rust-random.github.io/book/guide-rngs.html#cryptographically-secure-pseudo-random-number-generators-csprngs) and
+    [small](https://rust-random.github.io/book/guide-rngs.html#basic-pseudo-random-number-generators-prngs) generators: [`rand::rngs`](https://docs.rs/rand/latest/rand/rngs/index.html), and more RNGs: [`rand_chacha`](https://docs.rs/rand_chacha), [`rand_xoshiro`](https://docs.rs/rand_xoshiro/), [`rand_pcg`](https://docs.rs/rand_pcg/), [rngs repo](https://github.com/rust-random/rngs/)
+-   [`rand::rng`](https://docs.rs/rand/latest/rand/fn.rng.html) is an asymptotically-fast, automatically-seeded and reasonably strong generator available on all `std` targets
+-   Direct support for seeding generators from the [`getrandom` crate](https://crates.io/crates/getrandom)
 
-Supporting random value generation and random processes:
+With broad support for random value generation and random processes:
 
--   [`Standard`](https://docs.rs/rand/latest/rand/distributions/struct.Standard.html) random value generation
--   Ranged [`Uniform`](https://docs.rs/rand/latest/rand/distributions/struct.Uniform.html) number generation for many types
--   A flexible [`distributions`](https://docs.rs/rand/*/rand/distr/index.html) module
--   Samplers for a large number of random number distributions via our own
+-   [`Standard`](https://docs.rs/rand/latest/rand/distributions/struct.Standard.html) random value sampling,
+    [`Uniform`](https://docs.rs/rand/latest/rand/distributions/struct.Uniform.html)-ranged value sampling
+    and [more](https://docs.rs/rand/latest/rand/distr/index.html)
+-   Samplers for a large number of non-uniform random number distributions via our own
     [`rand_distr`](https://docs.rs/rand_distr) and via
     the [`statrs`](https://docs.rs/statrs/0.13.0/statrs/)
 -   Random processes (mostly choose and shuffle) via [`rand::seq`](https://docs.rs/rand/latest/rand/seq/index.html) traits
@@ -28,19 +28,23 @@ All with:
 
 -   [Portably reproducible output](https://rust-random.github.io/book/portability.html)
 -   `#[no_std]` compatibility (partial)
--   *Many* performance optimisations
+-   *Many* performance optimisations thanks to contributions from the wide
+    user-base
 
-It's also worth pointing out what Rand *is not*:
+Rand **is not**:
 
--   Small. Most low-level crates are small, but the higher-level `rand` and
-    `rand_distr` each contain a lot of functionality.
+-   Small (LOC). Most low-level crates are small, but the higher-level `rand`
+    and `rand_distr` each contain a lot of functionality.
 -   Simple (implementation). We have a strong focus on correctness, speed and flexibility, but
     not simplicity. If you prefer a small-and-simple library, there are
     alternatives including [fastrand](https://crates.io/crates/fastrand)
     and [oorandom](https://crates.io/crates/oorandom).
--   Slow. We take performance seriously, with considerations also for set-up
-    time of new distributions, commonly-used parameters, and parameters of the
-    current sampler.
+-   A cryptography library. Rand provides functionality for generating
+    unpredictable random data (potentially applicable depending on requirements)
+    but does not provide high-level cryptography functionality.
+
+Rand is a community project and cannot provide legally-binding guarantees of
+security.
 
 Documentation:
 

diff --git a/SECURITY.md b/SECURITY.md
@@ -1,31 +1,46 @@
 # Security Policy
 
-## No guarantees
+## Disclaimer
 
-Support is provided on a best-effort bases only.
-No binding guarantees can be provided.
+Rand is a community project and cannot provide legally-binding guarantees of
+security.
 
 ## Security premises
 
-Rand provides the trait `rand_core::CryptoRng` aka `rand::CryptoRng` as a marker
-trait. Generators implementing `RngCore` *and* `CryptoRng`, and given the
-additional constraints that:
+### Marker traits
+
+Rand provides the marker traits `CryptoRng`, `TryCryptoRng` and
+`CryptoBlockRng`. Generators implementing one of these traits and used in a way
+which meets the following additional constraints:
 
 -   Instances of seedable RNGs (those implementing `SeedableRng`) are
     constructed with cryptographically secure seed values
--   The state (memory) of the RNG and its seed value are not be exposed
+-   The state (memory) of the RNG and its seed value are not exposed
 
 are expected to provide the following:
 
--   An attacker can gain no advantage over chance (50% for each bit) in
-    predicting the RNG output, even with full knowledge of all prior outputs.
+-   An attacker cannot predict the output with more accuracy than what would be
+    expected through pure chance since each possible output value of any method
+    under the above traits which generates output bytes (including
+    `RngCore::next_u32`, `RngCore::next_u64`, `RngCore::fill_bytes`,
+    `TryRngCore::try_next_u32`, `TryRngCore::try_next_u64`,
+    `TryRngCore::try_fill_bytes` and `BlockRngCore::generate`) should be equally
+    likely
+-   Knowledge of prior outputs from the generator does not aid an attacker in
+    predicting future outputs
+
+### Specific generators
+
+`OsRng` is a stateless "generator" implemented via [getrandom]. As such, it has
+no possible state to leak and cannot be improperly seeded.
+
+`ThreadRng` will periodically reseed itself, thus placing an upper bound on the
+number of bits of output from an instance before any advantage an attacker may
+have gained through state-compromising side-channel attacks is lost.
 
-For some RNGs, notably `OsRng`, `ThreadRng` and those wrapped by `ReseedingRng`,
-we provide limited mitigations against side-channel attacks:
+[getrandom]: https://crates.io/crates/getrandom
 
--   After the state (memory) of an RNG is leaked, there is an upper-bound on the
-    number of bits of output by the RNG before prediction of output by an
-    observer again becomes computationally-infeasible
+### Distributions
 
 Additionally, derivations from such an RNG (including the `Rng` trait,
 implementations of the `Distribution` trait, and `seq` algorithms) should not

diff --git a/benches/benches/distr.rs b/benches/benches/distr.rs
@@ -159,7 +159,7 @@ fn bench(c: &mut Criterion<CyclesPerByte>) {
     g.finish();
 
     let mut g = c.benchmark_group("zipf");
-    distr_float!(g, "zipf", f64, Zipf::new(10, 1.5).unwrap());
+    distr_float!(g, "zipf", f64, Zipf::new(10.0, 1.5).unwrap());
     distr_float!(g, "zeta", f64, Zeta::new(1.5).unwrap());
     g.finish();
 

diff --git a/rand_core/src/os.rs b/rand_core/src/os.rs
@@ -11,10 +11,9 @@
 use crate::{TryCryptoRng, TryRngCore};
 use getrandom::getrandom;
 
-/// A random number generator that retrieves randomness from the
-/// operating system.
+/// An interface over the operating-system's random data source
 ///
-/// This is a zero-sized struct. It can be freely constructed with `OsRng`.
+/// This is a zero-sized struct. It can be freely constructed with just `OsRng`.
 ///
 /// The implementation is provided by the [getrandom] crate. Refer to
 /// [getrandom] documentation for details.
@@ -32,7 +31,8 @@ use getrandom::getrandom;
 ///
 /// After the first successful call, it is highly unlikely that failures or
 /// significant delays will occur (although performance should be expected to
-/// be much slower than a user-space PRNG).
+/// be much slower than a user-space
+/// [PRNG](https://rust-random.github.io/book/guide-gen.html#pseudo-random-number-generators)).
 ///
 /// # Usage example
 /// ```

diff --git a/rand_distr/CHANGELOG.md b/rand_distr/CHANGELOG.md
@@ -10,7 +10,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Move some of the computations in Binomial from `sample` to `new` (#1484)
 - Add Kolmogorov Smirnov test for sampling of `Normal` and `Binomial` (#1494)
 - Add Kolmogorov Smirnov test for more distributions (#1504)
+- Mark `WeightError`, `PoissonError`, `BinomialError` as `#[non_exhaustive]` (#1480).
+- Remove support for generating `isize` and `usize` values with `Standard`, `Uniform` and `Fill` and usage as a `WeightedAliasIndex` weight (#1487)
+- Limit the maximal acceptable lambda for `Poisson` to solve (#1312) (#1498)
 - Fix bug in `Hypergeometric`, this is a Value-breaking change (#1510)
+- Change parameter type of `Zipf::new`: `n` is now floating-point (#1518)
 
 ### Added
 - Add plots for `rand_distr` distributions to documentation (#1434)

diff --git a/rand_distr/src/poisson.rs b/rand_distr/src/poisson.rs
@@ -39,6 +39,17 @@ use rand::Rng;
 /// let v: f64 = poi.sample(&mut rand::rng());
 /// println!("{} is from a Poisson(2) distribution", v);
 /// ```
+///
+/// # Integer vs FP return type
+///
+/// This implementation uses floating-point (FP) logic internally.
+///
+/// Due to the parameter limit <code>λ < [Self::MAX_LAMBDA]</code>, it
+/// statistically impossible to sample a value larger [`u64::MAX`]. As such, it
+/// is reasonable to cast generated samples to `u64` using `as`:
+/// `distr.sample(&mut rng) as u64` (and memory safe since Rust 1.45).
+/// Similarly, when `λ < 4.2e9` it can be safely assumed that samples are less
+/// than `u32::MAX`.
 #[derive(Clone, Copy, Debug, PartialEq)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct Poisson<F>(Method<F>)
@@ -238,14 +249,6 @@ where
     }
 }
 
-impl Distribution<u64> for Poisson<f64> {
-    #[inline]
-    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u64 {
-        // `as` from float to int saturates
-        <Poisson<f64> as Distribution<f64>>::sample(self, rng) as u64
-    }
-}
-
 #[cfg(test)]
 mod test {
     use super::*;

diff --git a/rand_distr/src/zeta.rs b/rand_distr/src/zeta.rs
@@ -40,6 +40,17 @@ use rand::{distr::OpenClosed01, Rng};
 /// println!("{}", val);
 /// ```
 ///
+/// # Integer vs FP return type
+///
+/// This implementation uses floating-point (FP) logic internally, which can
+/// potentially generate very large samples (exceeding e.g. `u64::MAX`).
+///
+/// It is *safe* to cast such results to an integer type using `as`
+/// (e.g. `distr.sample(&mut rng) as u64`), since such casts are saturating
+/// (e.g. `2f64.powi(64) as u64 == u64::MAX`). It is up to the user to
+/// determine whether this potential loss of accuracy is acceptable
+/// (this determination may depend on the distribution's parameters).
+///
 /// # Notes
 ///
 /// The zeta distribution has no upper limit. Sampled values may be infinite.

diff --git a/rand_distr/src/zipf.rs b/rand_distr/src/zipf.rs
@@ -35,10 +35,17 @@ use rand::Rng;
 /// use rand::prelude::*;
 /// use rand_distr::Zipf;
 ///
-/// let val: f64 = rand::rng().sample(Zipf::new(10, 1.5).unwrap());
+/// let val: f64 = rand::rng().sample(Zipf::new(10.0, 1.5).unwrap());
 /// println!("{}", val);
 /// ```
 ///
+/// # Integer vs FP return type
+///
+/// This implementation uses floating-point (FP) logic internally. It may be
+/// expected that the samples are no greater than `n`, thus it is reasonable to
+/// cast generated samples to any integer type which can also represent `n`
+/// (e.g. `distr.sample(&mut rng) as u64`).
+///
 /// # Implementation details
 ///
 /// Implemented via [rejection sampling](https://en.wikipedia.org/wiki/Rejection_sampling),
@@ -85,16 +92,17 @@ where
     /// Construct a new `Zipf` distribution for a set with `n` elements and a
     /// frequency rank exponent `s`.
     ///
-    /// For large `n`, rounding may occur to fit the number into the float type.
+    /// The parameter `n` is typically integral, however we use type
+    /// <pre><code>F: [Float]</code></pre> in order to permit very large values
+    /// and since our implementation requires a floating-point type.
     #[inline]
-    pub fn new(n: u64, s: F) -> Result<Zipf<F>, Error> {
+    pub fn new(n: F, s: F) -> Result<Zipf<F>, Error> {
         if !(s >= F::zero()) {
             return Err(Error::STooSmall);
         }
-        if n < 1 {
+        if n < F::one() {
             return Err(Error::NTooSmall);
         }
-        let n = F::from(n).unwrap(); // This does not fail.
         let q = if s != F::one() {
             // Make sure to calculate the division only once.
             F::one() / (F::one() - s)
@@ -166,24 +174,24 @@ mod tests {
     #[test]
     #[should_panic]
     fn zipf_s_too_small() {
-        Zipf::new(10, -1.).unwrap();
+        Zipf::new(10., -1.).unwrap();
     }
 
     #[test]
     #[should_panic]
     fn zipf_n_too_small() {
-        Zipf::new(0, 1.).unwrap();
+        Zipf::new(0., 1.).unwrap();
     }
 
     #[test]
     #[should_panic]
     fn zipf_nan() {
-        Zipf::new(10, f64::NAN).unwrap();
+        Zipf::new(10., f64::NAN).unwrap();
     }
 
     #[test]
     fn zipf_sample() {
-        let d = Zipf::new(10, 0.5).unwrap();
+        let d = Zipf::new(10., 0.5).unwrap();
         let mut rng = crate::test::rng(2);
         for _ in 0..1000 {
             let r = d.sample(&mut rng);
@@ -193,7 +201,7 @@ mod tests {
 
     #[test]
     fn zipf_sample_s_1() {
-        let d = Zipf::new(10, 1.).unwrap();
+        let d = Zipf::new(10., 1.).unwrap();
         let mut rng = crate::test::rng(2);
         for _ in 0..1000 {
             let r = d.sample(&mut rng);
@@ -203,7 +211,7 @@ mod tests {
 
     #[test]
     fn zipf_sample_s_0() {
-        let d = Zipf::new(10, 0.).unwrap();
+        let d = Zipf::new(10., 0.).unwrap();
         let mut rng = crate::test::rng(2);
         for _ in 0..1000 {
             let r = d.sample(&mut rng);
@@ -214,7 +222,7 @@ mod tests {
 
     #[test]
     fn zipf_sample_large_n() {
-        let d = Zipf::new(u64::MAX, 1.5).unwrap();
+        let d = Zipf::new(f64::MAX, 1.5).unwrap();
         let mut rng = crate::test::rng(2);
         for _ in 0..1000 {
             let r = d.sample(&mut rng);
@@ -225,12 +233,12 @@ mod tests {
 
     #[test]
     fn zipf_value_stability() {
-        test_samples(Zipf::new(10, 0.5).unwrap(), 0f32, &[10.0, 2.0, 6.0, 7.0]);
-        test_samples(Zipf::new(10, 2.0).unwrap(), 0f64, &[1.0, 2.0, 3.0, 2.0]);
+        test_samples(Zipf::new(10., 0.5).unwrap(), 0f32, &[10.0, 2.0, 6.0, 7.0]);
+        test_samples(Zipf::new(10., 2.0).unwrap(), 0f64, &[1.0, 2.0, 3.0, 2.0]);
     }
 
     #[test]
     fn zipf_distributions_can_be_compared() {
-        assert_eq!(Zipf::new(1, 2.0), Zipf::new(1, 2.0));
+        assert_eq!(Zipf::new(1.0, 2.0), Zipf::new(1.0, 2.0));
     }
 }
diff --git a/rand_distr/tests/cdf.rs b/rand_distr/tests/cdf.rs
@@ -385,7 +385,7 @@ fn zipf() {
     let parameters = [(1000, 1.0), (500, 2.0), (1000, 0.5)];
 
     for (seed, (n, x)) in parameters.into_iter().enumerate() {
-        let dist = rand_distr::Zipf::new(n, x).unwrap();
+        let dist = rand_distr::Zipf::new(n as f64, x).unwrap();
         test_discrete(seed as u64, dist, |k| cdf(k, n, x));
     }
 }