-
Notifications
You must be signed in to change notification settings - Fork 185
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ZeroTrie feedback and refactoring (#4553)
Follow-ups to #4549
- Loading branch information
Showing
6 changed files
with
163 additions
and
287 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
//! Options for building and reading from a ZeroTrie. | ||
//! | ||
//! These options are internal to the crate. A small selection of options | ||
//! are exported by way of the different public types on this crate. | ||
/// Whether to use the perfect hash function in the ZeroTrie. | ||
pub(crate) enum PhfMode { | ||
/// Use binary search for all branch nodes. | ||
BinaryOnly, | ||
/// Use the perfect hash function for large branch nodes. | ||
UsePhf, | ||
} | ||
|
||
/// Whether to support non-ASCII data in the ZeroTrie. | ||
pub(crate) enum AsciiMode { | ||
/// Support only ASCII, returning an error if non-ASCII is found. | ||
AsciiOnly, | ||
/// Support all data, creating span nodes for non-ASCII bytes. | ||
BinarySpans, | ||
} | ||
|
||
/// Whether to enforce a limit to the capacity of the ZeroTrie. | ||
pub(crate) enum CapacityMode { | ||
/// Return an error if the trie requires a branch of more than 2^32 bytes. | ||
Normal, | ||
/// Construct the trie without returning an error. | ||
Extended, | ||
} | ||
|
||
/// How to handle strings with mixed ASCII case at a node, such as "abc" and "Abc" | ||
pub(crate) enum CaseSensitivity { | ||
/// Allow all strings and sort them by byte value. | ||
Sensitive, | ||
/// Reject strings with different case and sort them as if `to_ascii_lowercase` is called. | ||
IgnoreCase, | ||
} | ||
|
||
pub(crate) struct ZeroTrieBuilderOptions { | ||
pub phf_mode: PhfMode, | ||
pub ascii_mode: AsciiMode, | ||
pub capacity_mode: CapacityMode, | ||
pub case_sensitivity: CaseSensitivity, | ||
} | ||
|
||
pub(crate) trait ZeroTrieWithOptions { | ||
const OPTIONS: ZeroTrieBuilderOptions; | ||
} | ||
|
||
/// All branch nodes are binary search | ||
/// and there are no span nodes. | ||
impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieSimpleAscii<S> { | ||
const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions { | ||
phf_mode: PhfMode::BinaryOnly, | ||
ascii_mode: AsciiMode::AsciiOnly, | ||
capacity_mode: CapacityMode::Normal, | ||
case_sensitivity: CaseSensitivity::Sensitive, | ||
}; | ||
} | ||
|
||
/// All branch nodes are binary search | ||
/// and nodes use case-insensitive matching. | ||
impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroAsciiIgnoreCaseTrie<S> { | ||
const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions { | ||
phf_mode: PhfMode::BinaryOnly, | ||
ascii_mode: AsciiMode::AsciiOnly, | ||
capacity_mode: CapacityMode::Normal, | ||
case_sensitivity: CaseSensitivity::IgnoreCase, | ||
}; | ||
} | ||
|
||
/// Branch nodes could be either binary search or PHF. | ||
impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTriePerfectHash<S> { | ||
const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions { | ||
phf_mode: PhfMode::UsePhf, | ||
ascii_mode: AsciiMode::BinarySpans, | ||
capacity_mode: CapacityMode::Normal, | ||
case_sensitivity: CaseSensitivity::Sensitive, | ||
}; | ||
} | ||
|
||
/// No limited capacity assertion. | ||
impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieExtendedCapacity<S> { | ||
const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions { | ||
phf_mode: PhfMode::UsePhf, | ||
ascii_mode: AsciiMode::BinarySpans, | ||
capacity_mode: CapacityMode::Extended, | ||
case_sensitivity: CaseSensitivity::Sensitive, | ||
}; | ||
} |
Oops, something went wrong.