Skip to content

Commit

Permalink
Merge pull request #7 from yeslogic/wezm/unicode-16
Browse files Browse the repository at this point in the history
Upgrade to Unicode 16.0
  • Loading branch information
wezm authored Oct 8, 2024
2 parents fcec08d + 3ed2a21 commit 2bdc736
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 220 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,10 @@ jobs:
runs-on: ${{ matrix.os }}
continue-on-error: ${{ matrix.experimental }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@v1
with:
toolchain: ${{ matrix.rust }}
components: rustfmt
- run: cargo fmt -- --check
- run: cargo test
- run: cargo test --features harfbuzz
8 changes: 1 addition & 7 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ authors = [
"The Servo Project Developers",
"YesLogic Pty. Ltd. <[email protected]>"
]
edition = "2018"

homepage = "https://github.com/yeslogic/unicode-script"
repository = "https://github.com/yeslogic/unicode-script"
Expand All @@ -19,10 +20,3 @@ categories = ["text-processing"]

[lib]
name = "unicode_script"

[features]
harfbuzz = ["harfbuzz-sys"]

[dependencies.harfbuzz-sys]
version = "0.5"
optional = true
9 changes: 9 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
UCD:=16.0.0

tables:
yeslogic-ucd-generate script --rust-enum --name Script ../ucd-generate/ucd-$(UCD) > src/tables.rs
cargo fmt


.PHONY: tables

4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ yeslogic-unicode-script
<a href="https://docs.rs/yeslogic-unicode-script">
<img src="https://docs.rs/yeslogic-unicode-script/badge.svg" alt="Documentation">
</a>
<img src="https://img.shields.io/badge/unicode-15.0-informational" alt="Unicode Version">
<img src="https://img.shields.io/badge/unicode-16.0-informational" alt="Unicode Version">
<a href="https://crates.io/crates/yeslogic-unicode-script">
<img src="https://img.shields.io/crates/v/yeslogic-unicode-script.svg" alt="Version">
</a>
Expand All @@ -16,7 +16,7 @@ yeslogic-unicode-script
<br>

Fast lookup of the Unicode Script property for `char` in Rust using
Unicode 15.0 data.
Unicode 16.0 data.

Usage
-----
Expand Down
188 changes: 1 addition & 187 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,193 +25,7 @@ pub use tables::Script;

/// The version of [Unicode](http://www.unicode.org/)
/// that this version of unicode-script is based on.
pub const UNICODE_VERSION: (u64, u64, u64) = (15, 0, 0);

#[cfg(feature = "harfbuzz")]
extern crate harfbuzz_sys;

impl Script {
#[cfg(feature = "harfbuzz")]
pub fn to_hb_script(self) -> harfbuzz_sys::hb_script_t {
use harfbuzz_sys::*;
use Script::*;
match self {
Adlam => HB_SCRIPT_ADLAM,
Ahom => HB_SCRIPT_AHOM,
AnatolianHieroglyphs => HB_SCRIPT_ANATOLIAN_HIEROGLYPHS,
Arabic => HB_SCRIPT_ARABIC,
Armenian => HB_SCRIPT_ARMENIAN,
Avestan => HB_SCRIPT_AVESTAN,
Balinese => HB_SCRIPT_BALINESE,
Bamum => HB_SCRIPT_BAMUM,
BassaVah => HB_SCRIPT_BASSA_VAH,
Batak => HB_SCRIPT_BATAK,
Bengali => HB_SCRIPT_BENGALI,
Bhaiksuki => HB_SCRIPT_BHAIKSUKI,
Bopomofo => HB_SCRIPT_BOPOMOFO,
Brahmi => HB_SCRIPT_BRAHMI,
Braille => HB_SCRIPT_BRAILLE,
Buginese => HB_SCRIPT_BUGINESE,
Buhid => HB_SCRIPT_BUHID,
CanadianAboriginal => HB_SCRIPT_CANADIAN_SYLLABICS,
Carian => HB_SCRIPT_CARIAN,
CaucasianAlbanian => HB_SCRIPT_CAUCASIAN_ALBANIAN,
Chakma => HB_SCRIPT_CHAKMA,
Cham => HB_SCRIPT_CHAM,
Cherokee => HB_SCRIPT_CHEROKEE,
Chorasmian => HB_SCRIPT_CHORASMIAN,
Common => HB_SCRIPT_COMMON,
Coptic => HB_SCRIPT_COPTIC,
Cuneiform => HB_SCRIPT_CUNEIFORM,
Cypriot => HB_SCRIPT_CYPRIOT,
Cyrillic => HB_SCRIPT_CYRILLIC,
Deseret => HB_SCRIPT_DESERET,
Devanagari => HB_SCRIPT_DEVANAGARI,
DivesAkuru => HB_SCRIPT_DIVES_AKURU,
Dogra => HB_SCRIPT_DOGRA,
Duployan => HB_SCRIPT_DUPLOYAN,
EgyptianHieroglyphs => HB_SCRIPT_EGYPTIAN_HIEROGLYPHS,
Elbasan => HB_SCRIPT_ELBASAN,
Elymaic => HB_SCRIPT_ELYMAIC,
Ethiopic => HB_SCRIPT_ETHIOPIC,
Georgian => HB_SCRIPT_GEORGIAN,
Glagolitic => HB_SCRIPT_GLAGOLITIC,
Gothic => HB_SCRIPT_GOTHIC,
Grantha => HB_SCRIPT_GRANTHA,
Greek => HB_SCRIPT_GREEK,
Gujarati => HB_SCRIPT_GUJARATI,
GunjalaGondi => HB_SCRIPT_GUNJALA_GONDI,
Gurmukhi => HB_SCRIPT_GURMUKHI,
Han => HB_SCRIPT_HAN,
Hangul => HB_SCRIPT_HANGUL,
HanifiRohingya => HB_SCRIPT_HANIFI_ROHINGYA,
Hanunoo => HB_SCRIPT_HANUNOO,
Hatran => HB_SCRIPT_HATRAN,
Hebrew => HB_SCRIPT_HEBREW,
Hiragana => HB_SCRIPT_HIRAGANA,
ImperialAramaic => HB_SCRIPT_IMPERIAL_ARAMAIC,
Inherited => HB_SCRIPT_INHERITED,
InscriptionalPahlavi => HB_SCRIPT_INSCRIPTIONAL_PAHLAVI,
InscriptionalParthian => HB_SCRIPT_INSCRIPTIONAL_PARTHIAN,
Javanese => HB_SCRIPT_JAVANESE,
Kaithi => HB_SCRIPT_KAITHI,
Kannada => HB_SCRIPT_KANNADA,
Katakana => HB_SCRIPT_KATAKANA,
KayahLi => HB_SCRIPT_KAYAH_LI,
Kharoshthi => HB_SCRIPT_KHAROSHTHI,
KhitanSmallScript => HB_SCRIPT_KHITAN_SMALL_SCRIPT,
Khmer => HB_SCRIPT_KHMER,
Khojki => HB_SCRIPT_KHOJKI,
Khudawadi => HB_SCRIPT_KHUDAWADI,
Lao => HB_SCRIPT_LAO,
Latin => HB_SCRIPT_LATIN,
Lepcha => HB_SCRIPT_LEPCHA,
Limbu => HB_SCRIPT_LIMBU,
LinearA => HB_SCRIPT_LINEAR_A,
LinearB => HB_SCRIPT_LINEAR_B,
Lisu => HB_SCRIPT_LISU,
Lycian => HB_SCRIPT_LYCIAN,
Lydian => HB_SCRIPT_LYDIAN,
Mahajani => HB_SCRIPT_MAHAJANI,
Makasar => HB_SCRIPT_MAKASAR,
Malayalam => HB_SCRIPT_MALAYALAM,
Mandaic => HB_SCRIPT_MANDAIC,
Manichaean => HB_SCRIPT_MANICHAEAN,
Marchen => HB_SCRIPT_MARCHEN,
MasaramGondi => HB_SCRIPT_MASARAM_GONDI,
Medefaidrin => HB_SCRIPT_MEDEFAIDRIN,
MeeteiMayek => HB_SCRIPT_MEETEI_MAYEK,
MendeKikakui => HB_SCRIPT_MENDE_KIKAKUI,
MeroiticCursive => HB_SCRIPT_MEROITIC_CURSIVE,
MeroiticHieroglyphs => HB_SCRIPT_MEROITIC_HIEROGLYPHS,
Miao => HB_SCRIPT_MIAO,
Modi => HB_SCRIPT_MODI,
Mongolian => HB_SCRIPT_MONGOLIAN,
Mro => HB_SCRIPT_MRO,
Multani => HB_SCRIPT_MULTANI,
Myanmar => HB_SCRIPT_MYANMAR,
Nabataean => HB_SCRIPT_NABATAEAN,
Nandinagari => HB_SCRIPT_NANDINAGARI,
NewTaiLue => HB_SCRIPT_NEW_TAI_LUE,
Newa => HB_SCRIPT_NEWA,
Nko => HB_SCRIPT_NKO,
Nushu => HB_SCRIPT_NUSHU,
NyiakengPuachueHmong => HB_SCRIPT_NYIAKENG_PUACHUE_HMONG,
Ogham => HB_SCRIPT_OGHAM,
OlChiki => HB_SCRIPT_OL_CHIKI,
OldHungarian => HB_SCRIPT_OLD_HUNGARIAN,
OldItalic => HB_SCRIPT_OLD_ITALIC,
OldNorthArabian => HB_SCRIPT_OLD_NORTH_ARABIAN,
OldPermic => HB_SCRIPT_OLD_PERMIC,
OldPersian => HB_SCRIPT_OLD_PERSIAN,
OldSogdian => HB_SCRIPT_OLD_SOGDIAN,
OldSouthArabian => HB_SCRIPT_OLD_SOUTH_ARABIAN,
OldTurkic => HB_SCRIPT_OLD_TURKIC,
Oriya => HB_SCRIPT_ORIYA,
Osage => HB_SCRIPT_OSAGE,
Osmanya => HB_SCRIPT_OSMANYA,
PahawhHmong => HB_SCRIPT_PAHAWH_HMONG,
Palmyrene => HB_SCRIPT_PALMYRENE,
PauCinHau => HB_SCRIPT_PAU_CIN_HAU,
PhagsPa => HB_SCRIPT_PHAGS_PA,
Phoenician => HB_SCRIPT_PHOENICIAN,
PsalterPahlavi => HB_SCRIPT_PSALTER_PAHLAVI,
Rejang => HB_SCRIPT_REJANG,
Runic => HB_SCRIPT_RUNIC,
Samaritan => HB_SCRIPT_SAMARITAN,
Saurashtra => HB_SCRIPT_SAURASHTRA,
Sharada => HB_SCRIPT_SHARADA,
Shavian => HB_SCRIPT_SHAVIAN,
Siddham => HB_SCRIPT_SIDDHAM,
Signwriting => HB_SCRIPT_SIGNWRITING,
Sinhala => HB_SCRIPT_SINHALA,
Sogdian => HB_SCRIPT_SOGDIAN,
SoraSompeng => HB_SCRIPT_SORA_SOMPENG,
Soyombo => HB_SCRIPT_SOYOMBO,
Sundanese => HB_SCRIPT_SUNDANESE,
SylotiNagri => HB_SCRIPT_SYLOTI_NAGRI,
Syriac => HB_SCRIPT_SYRIAC,
Tagalog => HB_SCRIPT_TAGALOG,
Tagbanwa => HB_SCRIPT_TAGBANWA,
TaiLe => HB_SCRIPT_TAI_LE,
TaiTham => HB_SCRIPT_TAI_THAM,
TaiViet => HB_SCRIPT_TAI_VIET,
Takri => HB_SCRIPT_TAKRI,
Tamil => HB_SCRIPT_TAMIL,
Tangut => HB_SCRIPT_TANGUT,
Telugu => HB_SCRIPT_TELUGU,
Thaana => HB_SCRIPT_THAANA,
Thai => HB_SCRIPT_THAI,
Tibetan => HB_SCRIPT_TIBETAN,
Tifinagh => HB_SCRIPT_TIFINAGH,
Tirhuta => HB_SCRIPT_TIRHUTA,
Ugaritic => HB_SCRIPT_UGARITIC,
Unknown => HB_SCRIPT_UNKNOWN,
Vai => HB_SCRIPT_VAI,
Wancho => HB_SCRIPT_WANCHO,
WarangCiti => HB_SCRIPT_WARANG_CITI,
Yezidi => HB_SCRIPT_YEZIDI,
Yi => HB_SCRIPT_YI,
ZanabazarSquare => HB_SCRIPT_ZANABAZAR_SQUARE,

// Added in Harfbuzz 3.0.0 but harfbuzz_sys has not been updated to that version
// CyproMinoan => HB_SCRIPT_CYPRO_MINOAN,
// OldUyghur => HB_SCRIPT_OLD_UYGHUR,
// Tangsa => HB_SCRIPT_TANGSA,
// Toto => HB_SCRIPT_TOTO,
// Vithkuqi => HB_SCRIPT_VITHKUQI,

// Added in Unicode 15.0
// Kawi => HB_SCRIPT_KAWI
// NagMundari => HB_SCRIPT_NAG_MUNDARI

// So for now we return HB_SCRIPT_INVALID
CyproMinoan | Kawi | NagMundari | OldUyghur | Tangsa | Toto | Vithkuqi => {
HB_SCRIPT_INVALID
}
}
}
}
pub const UNICODE_VERSION: (u64, u64, u64) = (16, 0, 0);

#[cfg(test)]
mod test {
Expand Down
Loading

0 comments on commit 2bdc736

Please sign in to comment.