Skip to content

Commit

Permalink
Provide APIs to get directionality of locale (#3474)
Browse files Browse the repository at this point in the history
  • Loading branch information
skius authored Jun 8, 2023
1 parent c010e99 commit a8ef673
Show file tree
Hide file tree
Showing 22 changed files with 3,387 additions and 1 deletion.
1 change: 1 addition & 0 deletions components/icu/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ experimental = [
"icu_casemapping",
"icu_datetime_experimental",
"icu_displaynames",
"icu_locid_transform/experimental",
"icu_relativetime",
"icu_compactdecimal",
]
Expand Down
1 change: 1 addition & 0 deletions components/locid_transform/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ std = []
bench = ["serde"]
serde = ["dep:serde", "icu_locid/serde", "tinystr/serde", "zerovec/serde", "icu_provider/serde"]
datagen = ["serde", "dep:databake", "zerovec/databake", "icu_locid/databake", "tinystr/databake"]
experimental = []

[[bench]]
name = "locale_canonicalizer"
Expand Down
5 changes: 5 additions & 0 deletions components/locid_transform/src/canonicalizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,11 @@ impl LocaleCanonicalizer {
/// Creates a [`LocaleCanonicalizer`] with a custom [`LocaleExpander`] object.
///
/// For example, use this constructor if you wish to support all languages.
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
/// <div class="stab unstable">
/// ⚠️ The bounds on this function may change over time, including in SemVer minor releases.
/// </div>
pub fn try_new_with_expander_unstable<P>(
provider: &P,
expander: LocaleExpander,
Expand Down
218 changes: 218 additions & 0 deletions components/locid_transform/src/directionality.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::provider::*;
use crate::{LocaleExpander, LocaleTransformError};
use icu_locid::subtags::Script;
use icu_locid::Locale;
use icu_provider::prelude::*;

/// Represents the direction of a script.
///
/// [`LocaleDirectionality`] can be used to get this information.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
#[non_exhaustive]
pub enum Direction {
/// The script is left-to-right.
LeftToRight,
/// The script is right-to-left.
RightToLeft,
}

/// The `LocaleDirectionality` provides methods to determine the direction of a locale based
/// on [`CLDR`] data.
///
/// # Examples
///
/// ```
/// use icu_locid::locale;
/// use icu_locid_transform::{Direction, LocaleDirectionality};
///
/// let ld = LocaleDirectionality::try_new_unstable(&icu_testdata::unstable())
/// .expect("create failed");
///
/// assert_eq!(ld.get(&locale!("en")), Some(Direction::LeftToRight));
/// ```
///
/// [`CLDR`]: http://cldr.unicode.org/
#[derive(Debug)]
pub struct LocaleDirectionality {
script_direction: DataPayload<ScriptDirectionV1Marker>,
expander: LocaleExpander,
}

impl LocaleDirectionality {
/// A constructor which takes a [`DataProvider`] and creates a [`LocaleDirectionality`].
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
/// <div class="stab unstable">
/// ⚠️ The bounds on this function may change over time, including in SemVer minor releases.
/// </div>
pub fn try_new_unstable<P>(provider: &P) -> Result<LocaleDirectionality, LocaleTransformError>
where
P: DataProvider<ScriptDirectionV1Marker>
+ DataProvider<LikelySubtagsForLanguageV1Marker>
+ DataProvider<LikelySubtagsForScriptRegionV1Marker>
+ ?Sized,
{
let expander = LocaleExpander::try_new_unstable(provider)?;
Self::try_new_with_expander_unstable(provider, expander)
}

// Note: This is a custom impl because the bounds on `try_new_unstable` don't suffice
#[doc = icu_provider::gen_any_buffer_docs!(ANY, icu_provider, Self::try_new_unstable)]
pub fn try_new_with_any_provider(
provider: &(impl AnyProvider + ?Sized),
) -> Result<LocaleDirectionality, LocaleTransformError> {
let expander = LocaleExpander::try_new_with_any_provider(provider)?;
Self::try_new_with_expander_unstable(&provider.as_downcasting(), expander)
}

// Note: This is a custom impl because the bounds on `try_new_unstable` don't suffice
#[doc = icu_provider::gen_any_buffer_docs!(BUFFER, icu_provider, Self::try_new_unstable)]
#[cfg(feature = "serde")]
pub fn try_new_with_buffer_provider(
provider: &(impl BufferProvider + ?Sized),
) -> Result<LocaleDirectionality, LocaleTransformError> {
let expander = LocaleExpander::try_new_with_buffer_provider(provider)?;
Self::try_new_with_expander_unstable(&provider.as_deserializing(), expander)
}

/// Creates a [`LocaleDirectionality`] with a custom [`LocaleExpander`] object.
///
/// For example, use this constructor if you wish to support all languages.
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
/// <div class="stab unstable">
/// ⚠️ The bounds on this function may change over time, including in SemVer minor releases.
/// </div>
///
/// # Examples
///
/// ```
/// use icu_locid::locale;
/// use icu_locid_transform::{Direction, LocaleDirectionality, LocaleExpander};
///
/// let ld_default = LocaleDirectionality::try_new_unstable(&icu_testdata::unstable())
/// .expect("create failed");
///
/// assert_eq!(ld_default.get(&locale!("jbn")), None);
///
/// let expander = LocaleExpander::try_new_extended_unstable(&icu_testdata::unstable())
/// .expect("create failed");
/// let ld_extended = LocaleDirectionality::try_new_with_expander_unstable(
/// &icu_testdata::unstable(),
/// expander,
/// ).expect("create failed");
///
/// assert_eq!(ld_extended.get(&locale!("jbn")), Some(Direction::RightToLeft));
/// ```
pub fn try_new_with_expander_unstable<P>(
provider: &P,
expander: LocaleExpander,
) -> Result<LocaleDirectionality, LocaleTransformError>
where
P: DataProvider<ScriptDirectionV1Marker> + ?Sized,
{
let script_direction = provider.load(Default::default())?.take_payload()?;

Ok(LocaleDirectionality {
script_direction,
expander,
})
}

/// Returns the script direction of the given locale.
///
/// Note that the direction is a property of the script of a locale, not of the language. As such,
/// when given a locale without an associated script tag (i.e., `locale!("en")` vs. `locale!("en-Latn")`),
/// this method first tries to infer the script using the language and region before returning its direction.
///
/// If you already have a script struct and want to get its direction, you should use
/// `Locale::from(Some(my_script))` and call this method.
///
/// # Examples
///
/// Using an existing locale:
///
/// ```
/// use icu_locid::locale;
/// use icu_locid_transform::{Direction, LocaleDirectionality};
///
/// let ld = LocaleDirectionality::try_new_unstable(&icu_testdata::unstable())
/// .expect("create failed");
///
/// assert_eq!(ld.get(&locale!("en-US")), Some(Direction::LeftToRight));
///
/// assert_eq!(ld.get(&locale!("ar")), Some(Direction::RightToLeft));
///
/// assert_eq!(ld.get(&locale!("foo")), None);
/// ```
///
/// Using a script directly:
///
/// ```
/// use icu_locid::subtags_script as script;
/// use icu_locid::Locale;
/// use icu_locid_transform::{Direction, LocaleDirectionality};
///
/// let ld = LocaleDirectionality::try_new_unstable(&icu_testdata::unstable())
/// .expect("create failed");
///
/// assert_eq!(ld.get(&Locale::from(Some(script!("Latn")))), Some(Direction::LeftToRight));
/// ```
pub fn get(&self, locale: &Locale) -> Option<Direction> {
let script = self.expander.get_likely_script(&locale.id)?;

if self.script_in_ltr(script) {
Some(Direction::LeftToRight)
} else if self.script_in_rtl(script) {
Some(Direction::RightToLeft)
} else {
None
}
}

/// Returns true if the given locale is right-to-left.
///
/// Note that if this method returns `false`, it does not mean that the locale is left-to-right.
/// You should use `LocaleDirectionality::get` if you need to differentiate between these cases.
///
/// See [`LocaleDirectionality::get`] for more information.
pub fn is_right_to_left(&self, locale: &Locale) -> bool {
self.expander
.get_likely_script(&locale.id)
.map(|s| self.script_in_rtl(s))
.unwrap_or(false)
}

/// Returns true if the given locale is left-to-right.
///
/// Note that if this method returns `false`, it does not mean that the locale is right-to-left.
/// You should use `LocaleDirectionality::get` if you need to differentiate between these cases.
///
/// See [`LocaleDirectionality::get`] for more information.
pub fn is_left_to_right(&self, locale: &Locale) -> bool {
self.expander
.get_likely_script(&locale.id)
.map(|s| self.script_in_ltr(s))
.unwrap_or(false)
}

fn script_in_rtl(&self, script: Script) -> bool {
self.script_direction
.get()
.rtl
.binary_search(&script.into_tinystr().to_unvalidated())
.is_ok()
}

fn script_in_ltr(&self, script: Script) -> bool {
self.script_direction
.get()
.ltr
.binary_search(&script.into_tinystr().to_unvalidated())
.is_ok()
}
}
45 changes: 45 additions & 0 deletions components/locid_transform/src/expander.rs
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,51 @@ impl LocaleExpander {
TransformResult::Unmodified
}
}

// TODO(3492): consider turning this and a future get_likely_region/get_likely_language public
#[allow(dead_code)]
#[inline]
pub(crate) fn get_likely_script<T: AsRef<LanguageIdentifier>>(
&self,
langid: T,
) -> Option<Script> {
let langid = langid.as_ref();
langid
.script
.or_else(|| self.infer_likely_script(langid.language, langid.region))
}

fn infer_likely_script(&self, language: Language, region: Option<Region>) -> Option<Script> {
let data = self.as_borrowed();

// proceed through _all possible cases_ in order of specificity
// (borrowed from LocaleExpander::maximize):
// 1. language + region
// 2. language
// 3. region
// we need to check all cases, because e.g. for "en-US" the default script is associated
// with "en" but not "en-US"
if language != Language::UND {
if let Some(region) = region {
// 1. we know both language and region
if let Some(script) = data.get_lr(language, region) {
return Some(script);
}
}
// 2. we know language, but we either do not know region or knowing region did not help
if let Some((script, _)) = data.get_l(language) {
return Some(script);
}
}
if let Some(region) = region {
// 3. we know region, but we either do not know language or knowing language did not help
if let Some((_, script)) = data.get_r(region) {
return Some(script);
}
}
// we could not figure out the script from the given locale
None
}
}

#[cfg(feature = "serde")]
Expand Down
4 changes: 4 additions & 0 deletions components/locid_transform/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,15 @@
extern crate alloc;

mod canonicalizer;
#[cfg(feature = "experimental")]
mod directionality;
mod error;
mod expander;
pub mod provider;

pub use canonicalizer::LocaleCanonicalizer;
#[cfg(feature = "experimental")]
pub use directionality::{Direction, LocaleDirectionality};
pub use error::LocaleTransformError;
pub use expander::LocaleExpander;

Expand Down
27 changes: 26 additions & 1 deletion components/locid_transform/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use alloc::borrow::Cow;
use icu_locid::subtags::{Language, Region, Script, Variant};
use icu_provider::prelude::*;
use tinystr::{TinyAsciiStr, UnvalidatedTinyAsciiStr};
use zerovec::{VarZeroVec, ZeroMap, ZeroSlice};
use zerovec::{VarZeroVec, ZeroMap, ZeroSlice, ZeroVec};

// We use raw TinyAsciiStrs for map keys, as we then don't have to
// validate them as subtags on deserialization. Map lookup can be
Expand Down Expand Up @@ -135,6 +135,31 @@ pub struct AliasesV1<'data> {
pub subdivision: ZeroMap<'data, UnvalidatedSubdivision, SemivalidatedSubdivision>,
}

#[icu_provider::data_struct(ScriptDirectionV1Marker = "locid_transform/script_dir@1")]
#[derive(Debug, PartialEq, Clone)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_locid_transform::provider),
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
/// This directionality data is used to determine the script directionality of a locale.
///
/// <div class="stab unstable">
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
/// to be stable, their Rust representation might not be. Use with caution.
/// </div>
#[yoke(prove_covariance_manually)]
pub struct ScriptDirectionV1<'data> {
/// Scripts in right-to-left direction.
#[cfg_attr(feature = "serde", serde(borrow))]
pub rtl: ZeroVec<'data, UnvalidatedScript>,
/// Scripts in left-to-right direction.
#[cfg_attr(feature = "serde", serde(borrow))]
pub ltr: ZeroVec<'data, UnvalidatedScript>,
}

#[icu_provider::data_struct(LikelySubtagsV1Marker = "locid_transform/likelysubtags@1")]
#[derive(Debug, PartialEq, Clone)]
#[cfg_attr(
Expand Down
7 changes: 7 additions & 0 deletions ffi/diplomat/tests/missing_apis.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@
# Please check in with @Manishearth, @robertbastian, or @sffc if you have questions


icu::locid_transform::Direction#Enum
icu::locid_transform::LocaleDirectionality#Struct
icu::locid_transform::LocaleDirectionality::get#FnInStruct
icu::locid_transform::LocaleDirectionality::is_left_to_right#FnInStruct
icu::locid_transform::LocaleDirectionality::is_right_to_left#FnInStruct
icu::locid_transform::LocaleDirectionality::try_new_unstable#FnInStruct
icu::locid_transform::LocaleDirectionality::try_new_with_expander_unstable#FnInStruct
icu::plurals::PluralRules::try_new#FnInStruct
icu::plurals::PluralRules::try_new_cardinal#FnInStruct
icu::plurals::PluralRules::try_new_ordinal#FnInStruct
Expand Down
1 change: 1 addition & 0 deletions provider/datagen/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ registry!(
RegionDisplayNamesV1Marker,
LanguageDisplayNamesV1Marker,
LocaleDisplayNamesV1Marker,
ScriptDirectionV1Marker,
ScriptDisplayNamesV1Marker,
VariantDisplayNamesV1Marker,
LongSecondRelativeTimeFormatDataV1Marker,
Expand Down
Loading

0 comments on commit a8ef673

Please sign in to comment.