Skip to content

Commit

Permalink
Baked data: use VarULE to store data when specified (#6133)
Browse files Browse the repository at this point in the history
#5230

I did it by creating a trait, `MaybeAsVarULE`, which is implemented on
data structs, similar to how `Bake` and `Serialize` are implemented on
data structs.

The trait has an associated type that I can leverage in the baked
exporter without having to do `dyn Any` downcasting.

I tried to keep my understanding of @robertbastian's position in mind
while writing this PR (don't add anything hyper-specific to bake in the
provider crate), and I think I achieved this, since the thing I added is
on the same level as the existing format-specific traits.

See the hello world data for an example. I didn't apply it to the other
data markers yet.

---------

Co-authored-by: Robert Bastian <[email protected]>
  • Loading branch information
sffc and robertbastian authored Feb 25, 2025
1 parent e275e86 commit dabb344
Show file tree
Hide file tree
Showing 30 changed files with 580 additions and 60 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions components/calendar/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,11 @@ pub struct JapaneseEras<'data> {
pub dates_to_eras: ZeroVec<'data, (EraStartDate, TinyStr16)>,
}

icu_provider::data_struct_new!(
JapaneseEras<'_>,
#[cfg(feature = "datagen")]
);

/// An ICU4X mapping to a subset of CLDR weekData.
/// See CLDR-JSON's weekData.json for more context.
///
Expand All @@ -159,6 +164,11 @@ pub struct WeekData {
pub weekend: WeekdaySet,
}

icu_provider::data_struct_new!(
WeekData,
#[cfg(feature = "datagen")]
);

/// Bitset representing weekdays.
//
// This Bitset uses an [u8] to represent the weekend, thus leaving one bit free.
Expand Down
5 changes: 5 additions & 0 deletions components/calendar/src/provider/chinese_based.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ pub struct ChineseBasedCache<'data> {
pub data: ZeroVec<'data, PackedChineseBasedYearInfo>,
}

icu_provider::data_struct_new!(
ChineseBasedCache<'_>,
#[cfg(feature = "datagen")]
);

impl ChineseBasedCache<'_> {
/// Compute this data for a range of years
#[cfg(feature = "datagen")]
Expand Down
5 changes: 5 additions & 0 deletions components/calendar/src/provider/islamic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ pub struct IslamicCache<'data> {
pub data: ZeroVec<'data, PackedIslamicYearInfo>,
}

icu_provider::data_struct_new!(
IslamicCache<'_>,
#[cfg(feature = "datagen")]
);

impl IslamicCache<'_> {
/// Compute this data for a range of years
#[cfg(feature = "datagen")]
Expand Down
5 changes: 5 additions & 0 deletions components/casemap/src/provider/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ pub struct CaseMap<'data> {
pub exceptions: CaseMapExceptions<'data>,
}

icu_provider::data_struct_new!(
CaseMap<'_>,
#[cfg(feature = "datagen")]
);

#[cfg(feature = "serde")]
impl<'de> serde::Deserialize<'de> for CaseMap<'de> {
fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
Expand Down
5 changes: 5 additions & 0 deletions components/casemap/src/provider/unfold.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ pub struct CaseMapUnfold<'data> {
pub map: ZeroMap<'data, PotentialUtf8, str>,
}

icu_provider::data_struct_new!(
CaseMapUnfold<'_>,
#[cfg(feature = "datagen")]
);

impl CaseMapUnfold<'_> {
/// Creates a new CaseMapUnfold using data exported by the `icuexportdata` tool in ICU4C.
///
Expand Down
30 changes: 30 additions & 0 deletions components/collator/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,11 @@ pub struct CollationData<'data> {
pub contexts: ZeroVec<'data, u16>,
}

icu_provider::data_struct_new!(
CollationData<'_>,
#[cfg(feature = "datagen")]
);

impl<'data> CollationData<'data> {
pub(crate) fn ce32_for_char(&self, c: char) -> CollationElement32 {
CollationElement32::new(self.trie.get32(c as u32))
Expand Down Expand Up @@ -303,6 +308,11 @@ pub struct CollationDiacritics<'data> {
pub secondaries: ZeroVec<'data, u16>,
}

icu_provider::data_struct_new!(
CollationDiacritics<'_>,
#[cfg(feature = "datagen")]
);

/// `CollationElement32`s for the Hangul Jamo Unicode Block
///
/// <div class="stab unstable">
Expand All @@ -321,6 +331,11 @@ pub struct CollationJamo<'data> {
pub ce32s: ZeroVec<'data, u32>,
}

icu_provider::data_struct_new!(
CollationJamo<'_>,
#[cfg(feature = "datagen")]
);

/// Script reordering data
///
/// <div class="stab unstable">
Expand Down Expand Up @@ -371,6 +386,11 @@ pub struct CollationReordering<'data> {
pub reorder_ranges: ZeroVec<'data, u32>,
}

icu_provider::data_struct_new!(
CollationReordering<'_>,
#[cfg(feature = "datagen")]
);

impl CollationReordering<'_> {
pub(crate) fn reorder(&self, primary: u32) -> u32 {
if let Some(b) = self.reorder_table.get((primary >> 24) as usize) {
Expand Down Expand Up @@ -429,6 +449,11 @@ pub struct CollationMetadata {
pub bits: u32,
}

icu_provider::data_struct_new!(
CollationMetadata,
#[cfg(feature = "datagen")]
);

impl CollationMetadata {
const MAX_VARIABLE_MASK: u32 = 0b11;
const TAILORED_MASK: u32 = 1 << 3;
Expand Down Expand Up @@ -518,6 +543,11 @@ pub struct CollationSpecialPrimaries<'data> {
pub numeric_primary: u8,
}

icu_provider::data_struct_new!(
CollationSpecialPrimaries<'_>,
#[cfg(feature = "datagen")]
);

impl CollationSpecialPrimaries<'_> {
#[allow(clippy::unwrap_used)]
pub(crate) fn last_primary_for_group(&self, max_variable: MaxVariable) -> u32 {
Expand Down
5 changes: 5 additions & 0 deletions components/decimal/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,11 @@ pub struct DecimalSymbols<'data> {
pub grouping_sizes: GroupingSizes,
}

icu_provider::data_struct_new!(
DecimalSymbols<'_>,
#[cfg(feature = "datagen")]
);

impl DecimalSymbols<'_> {
/// Return (prefix, suffix) for the minus sign
pub fn minus_sign_affixes(&self) -> (&str, &str) {
Expand Down
5 changes: 5 additions & 0 deletions components/list/src/provider/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ data_marker!(
ListFormatterPatterns<'static>,
);

icu_provider::data_struct_new!(
ListFormatterPatterns<'_>,
#[cfg(feature = "datagen")]
);

/// Symbols and metadata required for [`ListFormatter`](crate::ListFormatter).
///
/// <div class="stab unstable">
Expand Down
35 changes: 35 additions & 0 deletions components/locale/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,11 @@ pub struct Aliases<'data> {
pub subdivision: ZeroMap<'data, UnvalidatedSubdivision, SemivalidatedSubdivision>,
}

icu_provider::data_struct_new!(
Aliases<'_>,
#[cfg(feature = "datagen")]
);

#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_locale::provider))]
Expand Down Expand Up @@ -325,6 +330,11 @@ pub struct LikelySubtagsForLanguage<'data> {
pub und: (Language, Script, Region),
}

icu_provider::data_struct_new!(
LikelySubtagsForLanguage<'_>,
#[cfg(feature = "datagen")]
);

#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_locale::provider))]
Expand Down Expand Up @@ -364,6 +374,11 @@ pub struct LikelySubtagsForScriptRegion<'data> {
pub region: ZeroMap<'data, UnvalidatedRegion, (Language, Script)>,
}

icu_provider::data_struct_new!(
LikelySubtagsForScriptRegion<'_>,
#[cfg(feature = "datagen")]
);

#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_locale::provider))]
Expand Down Expand Up @@ -398,6 +413,11 @@ pub struct LikelySubtagsExtended<'data> {
pub region: ZeroMap<'data, UnvalidatedRegion, (Language, Script)>,
}

icu_provider::data_struct_new!(
LikelySubtagsExtended<'_>,
#[cfg(feature = "datagen")]
);

/// Locale fallback rules derived from CLDR parent locales data.
#[derive(Default, Clone, PartialEq, Debug, yoke::Yokeable, zerofrom::ZeroFrom)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
Expand All @@ -411,6 +431,11 @@ pub struct Parents<'data> {
pub parents: ZeroMap<'data, PotentialUtf8, (Language, Option<Script>, Option<Region>)>,
}

icu_provider::data_struct_new!(
Parents<'_>,
#[cfg(feature = "datagen")]
);

#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_locale::provider))]
Expand All @@ -432,6 +457,11 @@ pub struct ScriptDirection<'data> {
pub ltr: ZeroVec<'data, UnvalidatedScript>,
}

icu_provider::data_struct_new!(
ScriptDirection<'_>,
#[cfg(feature = "datagen")]
);

/// A set of characters and strings which share a particular property value.
///
/// <div class="stab unstable">
Expand All @@ -449,3 +479,8 @@ pub struct ScriptDirection<'data> {
pub struct ExemplarCharactersData<'data>(
#[cfg_attr(feature = "serde", serde(borrow))] pub CodePointInversionListAndStringList<'data>,
);

icu_provider::data_struct_new!(
ExemplarCharactersData<'_>,
#[cfg(feature = "datagen")]
);
20 changes: 20 additions & 0 deletions components/normalizer/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,11 @@ pub struct DecompositionData<'data> {
pub passthrough_cap: u16,
}

icu_provider::data_struct_new!(
DecompositionData<'_>,
#[cfg(feature = "datagen")]
);

/// The expansion tables for cases where the decomposition isn't
/// contained in the trie value
///
Expand All @@ -154,6 +159,11 @@ pub struct DecompositionTables<'data> {
pub scalars24: ZeroVec<'data, char>,
}

icu_provider::data_struct_new!(
DecompositionTables<'_>,
#[cfg(feature = "datagen")]
);

/// Non-Hangul canonical compositions
///
/// <div class="stab unstable">
Expand All @@ -173,6 +183,11 @@ pub struct CanonicalCompositions<'data> {
pub canonical_compositions: Char16Trie<'data>,
}

icu_provider::data_struct_new!(
CanonicalCompositions<'_>,
#[cfg(feature = "datagen")]
);

/// Non-recursive canonical decompositions that differ from
/// `DecompositionData`.
///
Expand All @@ -194,3 +209,8 @@ pub struct NonRecursiveDecompositionSupplement<'data> {
#[cfg_attr(feature = "serde", serde(borrow))]
pub scalars24: ZeroVec<'data, char>,
}

icu_provider::data_struct_new!(
NonRecursiveDecompositionSupplement<'_>,
#[cfg(feature = "datagen")]
);
10 changes: 10 additions & 0 deletions components/plurals/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,11 @@ pub struct PluralRulesData<'data> {
pub many: Option<Rule<'data>>,
}

icu_provider::data_struct_new!(
PluralRulesData<'_>,
#[cfg(feature = "datagen")]
);

#[cfg(feature = "experimental")]
pub use ranges::*;

Expand Down Expand Up @@ -345,6 +350,11 @@ mod ranges {
#[cfg_attr(feature = "serde", serde(borrow))]
pub ranges: ZeroMap<'data, UnvalidatedPluralRange, RawPluralCategory>,
}

icu_provider::data_struct_new!(
PluralRanges<'_>,
#[cfg(feature = "datagen")]
);
}

/// A sized packed [`PluralElements`] suitable for use in data structs.
Expand Down
5 changes: 5 additions & 0 deletions components/properties/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,11 @@ pub enum PropertyCodePointMap<'data, T: TrieValue> {
// https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant
}

icu_provider::data_struct_new!(
<T: TrieValue> PropertyCodePointMap<'_, T>,
#[cfg(feature = "datagen")]
);

macro_rules! data_struct_generic {
($(marker($marker:ident, $ty:ident, $path:literal),)+) => {
$(
Expand Down
10 changes: 10 additions & 0 deletions components/time/src/provider/iana.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ pub struct IanaToBcp47Map<'data> {
pub bcp47_ids: ZeroVec<'data, TimeZone>,
}

icu_provider::data_struct_new!(
IanaToBcp47Map<'_>,
#[cfg(feature = "datagen")]
);

/// A mapping from IANA time zone identifiers to BCP-47 time zone identifiers.
///
/// The BCP-47 time zone ID maps to the default IANA time zone ID according to the CLDR data.
Expand All @@ -105,3 +110,8 @@ pub struct IanaNames<'data> {
#[cfg_attr(feature = "serde", serde(borrow))]
pub normalized_iana_ids: VarZeroVec<'data, str>,
}

icu_provider::data_struct_new!(
IanaNames<'_>,
#[cfg(feature = "datagen")]
);
5 changes: 5 additions & 0 deletions components/time/src/provider/windows.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,8 @@ pub struct WindowsZonesToBcp47Map<'data> {
#[cfg_attr(feature = "serde", serde(borrow))]
pub bcp47_ids: ZeroVec<'data, TimeZone>,
}

icu_provider::data_struct_new!(
WindowsZonesToBcp47Map<'_>,
#[cfg(feature = "datagen")]
);
1 change: 1 addition & 0 deletions provider/baked/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ include.workspace = true
icu_provider = { workspace = true }
writeable = { workspace = true }
zerotrie = { workspace = true, features = ["alloc"] }
zerovec = { workspace = true }

crlify = { workspace = true, optional = true }
databake = { workspace = true, optional = true}
Expand Down
2 changes: 2 additions & 0 deletions provider/baked/src/binary_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

//! Data stored as slices, looked up with binary search
//!
//! TODO(#6164): This code is stale; update it before use.
use icu_provider::prelude::*;

Expand Down
Loading

0 comments on commit dabb344

Please sign in to comment.