diff --git a/c14n/src/rdfc10.rs b/c14n/src/rdfc10.rs index e18d93d3..2fa825e9 100644 --- a/c14n/src/rdfc10.rs +++ b/c14n/src/rdfc10.rs @@ -18,7 +18,7 @@ use crate::_cnq::nq; use crate::_permutations::for_each_permutation_of; use crate::hash::{HashFunction, Sha256, Sha384}; -/// Return a canonical N-quads representation of `d`, where +/// Write into `w` a canonical N-quads representation of `d`, where /// + blank nodes are canonically [relabelled](`relabel`) with /// - the [SHA-256](Sha256) hash function, /// - the [`DEFAULT_DEPTH_FACTOR`], @@ -30,8 +30,8 @@ pub fn normalize(d: &D, w: W) -> Result<(), C14nError< normalize_with::(d, w, DEFAULT_DEPTH_FACTOR, DEFAULT_PERMUTATION_LIMIT) } -/// Return a canonical N-quads representation of `d`, where -/// + blank nodes are canonically [relabelled](`relabel`) with +/// Write into `w` a canonical N-quads representation of `d`, where +/// + blank nodes are canonically [relabelled](`relabel_sha384`) with /// - the [SHA-384](Sha384) hash function, /// - the [`DEFAULT_DEPTH_FACTOR`], /// - the [`DEFAULT_PERMUTATION_LIMIT`]; @@ -42,8 +42,11 @@ pub fn normalize_sha384(d: &D, w: W) -> Result<(), C14 normalize_with::(d, w, DEFAULT_DEPTH_FACTOR, DEFAULT_PERMUTATION_LIMIT) } -/// Return a canonical N-quads representation of `d`, where -/// - blank nodes are canonically [relabelled](`relabel_with`) with the given `depth_factor`, +/// Write into `w` a canonical N-quads representation of `d`, where +/// + blank nodes are canonically [relabelled](`relabel_with`) with +/// - the [hash function](HashFunction) `H`, +/// - the given `depth_factor`, +/// - the given `permutation_limit`; /// - quads are sorted in codepoint order. /// /// See also [`normalize`]. @@ -111,12 +114,24 @@ pub fn relabel_sha384(d: &D) -> Result<(C14nQuads, C14nIdMap), C1 relabel_with::(d, DEFAULT_DEPTH_FACTOR, DEFAULT_PERMUTATION_LIMIT) } -/// Return a [`Dataset`] isomorphic to `d`, with canonical blank node labels, -/// restricting the number of recursion of RDFC-1.0 to `depth_factor` per blank node, -/// and restricting the size of permutations to `permutation_limit`!. +/// Return a [`Dataset`] isomorphic to `d`, with canonical blank node labels. +/// +/// The generic parameter `H` determines which [hash function](HashFunction) +/// the algorithm should use internally +/// (RDFC-1.0 uses [SHA-256](Sha256) by default). +/// +/// The parameters `depth_factor` and `permutation_limit` +/// are used to stop the algorithm if the computation becomes too complex, +/// in order to secure it agains [dataset poisoning](https://www.w3.org/TR/rdf-canon/#dataset-poisoning). +/// The default values ([`DEFAULT_DEPTH_FACTOR`]) and [`DEFAULT_PERMUTATION_LIMIT`]) +/// are expected to work with any "realistic" dataset. /// -/// These restrictions prevents the algorithm from blocking on pathological graphs with little practical utility -/// (e.g. big cycles or cliques of undistinguishable blank nodes). +/// More preciselity: +/// * the algorithm will not recurse more deeply than`depth_factor`*N, +/// where N is the total number of blank nodes in the dataset; +/// * the algorithl will not try to disambiguate more than +/// `permutation_limit` undistinguishable blank nodes +/// (blank nodes with the same immediate neighbourhood). /// /// Implements /// @@ -133,6 +148,11 @@ pub fn relabel_with<'a, H: HashFunction, D: Dataset>( let mut state = C14nState::::new(depth_factor, permutation_limit); // Step 2 for quad in &quads { + if quad.p().is_blank_node() { + return Err(C14nError::Unsupported( + "RDFC-1.0 does not support blank node as predicate".to_string(), + )); + } for component in iter_spog(quad.spog()) { if component.is_triple() || component.is_variable() { return Err(C14nError::Unsupported( @@ -148,11 +168,6 @@ pub fn relabel_with<'a, H: HashFunction, D: Dataset>( } } } - if quad.p().is_blank_node() { - return Err(C14nError::Unsupported( - "RDFC-1.0 does not support blank node as predicate".to_string(), - )); - } } // Step 3 for (bnid, quads) in state.b2q.iter() {