Skip to content

Commit

Permalink
Adding Percent to DataGen (#4505)
Browse files Browse the repository at this point in the history
  • Loading branch information
blaynem authored Jan 23, 2024
1 parent 3925bbe commit 5a780ea
Show file tree
Hide file tree
Showing 24 changed files with 467 additions and 1 deletion.
57 changes: 56 additions & 1 deletion experimental/dimension/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ use zerovec::{VarZeroVec, ZeroMap};

#[cfg(feature = "datagen")]
/// The latest minimum set of keys required by this component.
pub const KEYS: &[DataKey] = &[CurrencyEssentialsV1Marker::KEY];
pub const KEYS: &[DataKey] = &[
CurrencyEssentialsV1Marker::KEY,
PercentEssentialsV1Marker::KEY,
];

/// This type contains all of the essential data for currency formatting.
///
Expand Down Expand Up @@ -114,3 +117,55 @@ pub struct CurrencyPatterns {
/// If the value is `None`, this means that the narrow pattern does not have a place holder.
pub narrow_place_holder_index: Option<PlaceholderValue>,
}

#[icu_provider::data_struct(PercentEssentialsV1Marker = "percent/essentials@1")]
#[derive(Default, Clone, PartialEq, Debug)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_dimension::provider),
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct PercentEssentialsV1<'data> {
/// The index of the number placeholder in the standard pattern.
pub number_index: u8,

/// Prefix and suffix to apply to a percent sign when needed.
#[cfg_attr(feature = "serde", serde(borrow))]
pub percent_sign_affixes: PercentAffixesV1<'data>,

/// The percent symbol.
#[cfg_attr(feature = "serde", serde(borrow))]
pub percent_sign_symbol: Cow<'data, str>,

/// The index of the percent symbol in the standard pattern.
pub percent_symbol_index: u8,

/// Represents the standard pattern.
#[cfg_attr(feature = "serde", serde(borrow))]
pub standard: Cow<'data, str>,
}

/// A collection of strings to affix to a percent number.
///
/// <div class="stab unstable">
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
/// to be stable, their Rust representation might not be. Use with caution.
/// </div>
#[derive(Default, Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_dimension::provider),
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct PercentAffixesV1<'data> {
/// String to prepend before the percent sign.
#[cfg_attr(feature = "serde", serde(borrow))]
pub prefix: Cow<'data, str>,

/// String to append after the percent sign.
#[cfg_attr(feature = "serde", serde(borrow))]
pub suffix: Cow<'data, str>,
}
1 change: 1 addition & 0 deletions provider/datagen/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ macro_rules! registry {
registry!(
#[cfg(test)]
icu_dimension::provider::CurrencyEssentialsV1Marker = "currency/essentials@1",
icu_dimension::provider::PercentEssentialsV1Marker = "percent/essentials@1",
#[cfg(any(all(), feature = "icu_calendar"))]
icu_calendar::provider::ChineseCacheV1Marker = "calendar/chinesecache@1",
icu_calendar::provider::DangiCacheV1Marker = "calendar/dangicache@1",
Expand Down
14 changes: 14 additions & 0 deletions provider/datagen/src/transform/cldr/cldr_serde/numbers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ pub struct Symbols {
pub minus_sign: String,
#[serde(rename = "plusSign")]
pub plus_sign: String,
#[serde(rename = "percentSign")]
pub percent_sign: String,
}

#[derive(PartialEq, Debug, Deserialize)]
Expand Down Expand Up @@ -96,6 +98,12 @@ pub struct CurrencyFormattingPatterns {
pub standard_alpha_next_to_number: Option<String>,
}

#[derive(PartialEq, Debug, Deserialize)]
pub struct PercentFormattingPatterns {
/// Standard pattern
pub standard: String,
}

#[derive(PartialEq, Debug, Default)]
pub struct NumberingSystemData {
/// Map from numbering system to symbols
Expand All @@ -104,6 +112,8 @@ pub struct NumberingSystemData {
pub formats: HashMap<TinyStr8, DecimalFormats>,
/// Map from numbering system to patterns
pub currency_patterns: HashMap<TinyStr8, CurrencyFormattingPatterns>,
/// Map from numbering system to percent patterns
pub percent_patterns: HashMap<TinyStr8, PercentFormattingPatterns>,
}

pub struct NumberingSystemDataVisitor;
Expand Down Expand Up @@ -143,6 +153,10 @@ impl<'de> Visitor<'de> for NumberingSystemDataVisitor {
let value: CurrencyFormattingPatterns = access.next_value()?;
result.currency_patterns.insert(numsys, value);
}
"percentFormats" => {
let value: PercentFormattingPatterns = access.next_value()?;
result.percent_patterns.insert(numsys, value);
}
_ => {
// When needed, consume "scientificFormats", "percentFormats", ...
// For now, ignore them.
Expand Down
2 changes: 2 additions & 0 deletions provider/datagen/src/transform/cldr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ pub mod displaynames;
pub mod fallback;
pub mod list;
pub mod locale_canonicalizer;
#[cfg(test)] // keep as test until bakeddata is needed
pub mod percent;
pub mod plurals;
#[cfg(feature = "icu_relativetime")]
pub mod relativetime;
Expand Down
196 changes: 196 additions & 0 deletions provider/datagen/src/transform/cldr/percent/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use std::collections::HashSet;

use crate::provider::IterableDataProviderInternal;
use crate::transform::cldr::cldr_serde;

use icu_dimension::provider::*;
use icu_provider::prelude::*;
use icu_provider::DataProvider;
use tinystr::tinystr;

impl DataProvider<PercentEssentialsV1Marker> for crate::DatagenProvider {
fn load(&self, req: DataRequest) -> Result<DataResponse<PercentEssentialsV1Marker>, DataError> {
self.check_req::<PercentEssentialsV1Marker>(req)?;
let langid = req.locale.get_langid();

let numbers_resource: &cldr_serde::numbers::Resource = self
.cldr()?
.numbers()
.read_and_parse(&langid, "numbers.json")?;

let result = extract_percent_essentials(numbers_resource);

Ok(DataResponse {
metadata: Default::default(),
payload: Some(DataPayload::from_owned(result?)),
})
}
}

impl IterableDataProviderInternal<PercentEssentialsV1Marker> for crate::DatagenProvider {
fn supported_locales_impl(&self) -> Result<HashSet<DataLocale>, DataError> {
Ok(self
.cldr()?
.numbers()
.list_langs()?
.map(DataLocale::from)
.collect())
}
}

fn extract_percent_essentials<'data>(
numbers_resource: &cldr_serde::numbers::Resource,
) -> Result<PercentEssentialsV1<'data>, DataError> {
// TODO(#3838): these patterns might be numbering system dependent.
let percent_patterns = &&numbers_resource
.main
.value
.numbers
.numsys_data
.percent_patterns
.get(&tinystr!(8, "latn"))
.ok_or_else(|| DataError::custom("Could not find the standard pattern"))?;

// TODO(#3838): these patterns might be numbering system dependent.
let symbols = &&numbers_resource
.main
.value
.numbers
.numsys_data
.symbols
.get(&tinystr!(8, "latn"))
.ok_or_else(|| DataError::custom("Could not find the percent symbol"))?;

let standard_pattern = &percent_patterns.standard;

let percent_sign = '%';
let percent_sign_index = standard_pattern.find(percent_sign).unwrap();
let first_num_index = standard_pattern.find(['0', '#']).unwrap();
let last_num_index = standard_pattern.rfind(['0', '#']).unwrap();

// For the prefix, if the first character is a percent sign, then we have no prefix.
// If the percent sign is first, then all characters before the percent sign are the prefix.
// If the percent comes after, then all characters between final number and the percent sign are the prefix.
let percent_prefix = if percent_sign_index == 0 {
""
} else if percent_sign_index < first_num_index {
&standard_pattern[0..percent_sign_index]
} else {
&standard_pattern[last_num_index + 1..percent_sign_index]
};

// For the suffix, if the first character is a percent sign, OR the percent sign is before the first number,
// then all characters between are the suffix.
// If the percent sign comes after the first number, then all proceeding characters are the suffix.
let percent_suffix = if percent_sign_index == 0 || percent_sign_index < first_num_index {
&standard_pattern[1..first_num_index]
} else {
&standard_pattern[percent_sign_index + 1..]
};

Ok(PercentEssentialsV1 {
standard: standard_pattern.to_owned().into(),
percent_sign_symbol: symbols.percent_sign.to_owned().into(),
percent_symbol_index: percent_sign_index as u8,
number_index: first_num_index as u8,
percent_sign_affixes: PercentAffixesV1 {
prefix: percent_prefix.to_owned().into(),
suffix: percent_suffix.to_owned().into(),
},
})
}

#[test]
fn test_basic() {
use icu_dimension::provider::*;
use icu_locid::locale;

let provider = crate::DatagenProvider::new_testing();

let en: DataPayload<PercentEssentialsV1Marker> = provider
.load(DataRequest {
locale: &locale!("en").into(),
metadata: Default::default(),
})
.unwrap()
.take_payload()
.unwrap();

assert_eq!(
en.clone().get().to_owned(),
PercentEssentialsV1 {
standard: "#,##0%".into(),
percent_sign_symbol: "%".into(),
percent_symbol_index: 5,
number_index: 0,
percent_sign_affixes: PercentAffixesV1 {
prefix: "".into(),
suffix: "".into(),
},
}
);

let fr: DataPayload<PercentEssentialsV1Marker> = provider
.load(DataRequest {
locale: &locale!("fr").into(),
metadata: Default::default(),
})
.unwrap()
.take_payload()
.unwrap();

assert_eq!(
fr.clone().get().to_owned(),
PercentEssentialsV1 {
standard: "#,##0\u{a0}%".into(),
percent_sign_symbol: "%".into(),
percent_symbol_index: 7,
number_index: 0,
percent_sign_affixes: PercentAffixesV1 {
prefix: "\u{a0}".into(),
suffix: "".into(),
},
}
);

let tr: DataPayload<PercentEssentialsV1Marker> = provider
.load(DataRequest {
locale: &locale!("tr").into(),
metadata: Default::default(),
})
.unwrap()
.take_payload()
.unwrap();

assert_eq!(
tr.clone().get().to_owned(),
PercentEssentialsV1 {
standard: "%#,##0".into(),
percent_sign_symbol: "%".into(),
percent_symbol_index: 0,
number_index: 1,
percent_sign_affixes: PercentAffixesV1 {
prefix: "".into(),
suffix: "".into(),
},
}
);

let ar_eg: DataPayload<PercentEssentialsV1Marker> = provider
.load(DataRequest {
locale: &locale!("ar-EG").into(),
metadata: Default::default(),
})
.unwrap()
.take_payload()
.unwrap();

assert_eq!(
ar_eg.clone().get().to_owned().percent_sign_symbol,
"\u{200e}%\u{200e}" // "٪؜"
);
}
10 changes: 10 additions & 0 deletions provider/datagen/tests/data/json/percent/essentials@1/ar-EG.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions provider/datagen/tests/data/json/percent/essentials@1/ar.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions provider/datagen/tests/data/json/percent/essentials@1/bn.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions provider/datagen/tests/data/json/percent/essentials@1/ccp.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions provider/datagen/tests/data/json/percent/essentials@1/en-001.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions provider/datagen/tests/data/json/percent/essentials@1/en-ZA.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 5a780ea

Please sign in to comment.