Skip to content

Commit

Permalink
Remove obsolete dependency on once_cell
Browse files Browse the repository at this point in the history
  • Loading branch information
pemistahl committed Dec 23, 2024
1 parent f850643 commit 6a55364
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 61 deletions.
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ fraction = "0.15.3"
include_dir = "0.7.4"
itertools = "0.13.0"
maplit = "1.0.2"
once_cell = "1.20.2"
regex = "1.11.1"
serde = { version = "1.0.216", features = ["derive"] }
serde_json = "1.0.133"
Expand Down
40 changes: 20 additions & 20 deletions src/alphabet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
*/

use std::collections::HashMap;
use std::sync::LazyLock;

use ahash::AHashSet;
use once_cell::sync::Lazy;
use strum::IntoEnumIterator;
use strum_macros::EnumIter;

Expand Down Expand Up @@ -75,7 +75,7 @@ impl Alphabet {
languages
}

fn char_set(&self) -> &Lazy<CharSet> {
fn char_set(&self) -> &LazyLock<CharSet> {
match self {
Alphabet::Arabic => &ARABIC,
Alphabet::Armenian => &ARMENIAN,
Expand Down Expand Up @@ -137,21 +137,21 @@ impl CharSet {
}
}

static ARABIC: Lazy<CharSet> = Lazy::new(|| CharSet::from_char_class("Arabic"));
static ARMENIAN: Lazy<CharSet> = Lazy::new(|| CharSet::from_char_class("Armenian"));
static BENGALI: Lazy<CharSet> = Lazy::new(|| CharSet::from_char_class("Bengali"));
static CYRILLIC: Lazy<CharSet> = Lazy::new(|| CharSet::from_char_class("Cyrillic"));
static DEVANAGARI: Lazy<CharSet> = Lazy::new(|| CharSet::from_char_class("Devanagari"));
static GEORGIAN: Lazy<CharSet> = Lazy::new(|| CharSet::from_char_class("Georgian"));
static GREEK: Lazy<CharSet> = Lazy::new(|| CharSet::from_char_class("Greek"));
static GUJARATI: Lazy<CharSet> = Lazy::new(|| CharSet::from_char_class("Gujarati"));
static GURMUKHI: Lazy<CharSet> = Lazy::new(|| CharSet::from_char_class("Gurmukhi"));
static HAN: Lazy<CharSet> = Lazy::new(|| CharSet::from_char_class("Han"));
static HANGUL: Lazy<CharSet> = Lazy::new(|| CharSet::from_char_class("Hangul"));
static HEBREW: Lazy<CharSet> = Lazy::new(|| CharSet::from_char_class("Hebrew"));
static HIRAGANA: Lazy<CharSet> = Lazy::new(|| CharSet::from_char_class("Hiragana"));
static KATAKANA: Lazy<CharSet> = Lazy::new(|| CharSet::from_char_class("Katakana"));
static LATIN: Lazy<CharSet> = Lazy::new(|| CharSet::from_char_class("Latin"));
static TAMIL: Lazy<CharSet> = Lazy::new(|| CharSet::from_char_class("Tamil"));
static TELUGU: Lazy<CharSet> = Lazy::new(|| CharSet::from_char_class("Telugu"));
static THAI: Lazy<CharSet> = Lazy::new(|| CharSet::from_char_class("Thai"));
static ARABIC: LazyLock<CharSet> = LazyLock::new(|| CharSet::from_char_class("Arabic"));
static ARMENIAN: LazyLock<CharSet> = LazyLock::new(|| CharSet::from_char_class("Armenian"));
static BENGALI: LazyLock<CharSet> = LazyLock::new(|| CharSet::from_char_class("Bengali"));
static CYRILLIC: LazyLock<CharSet> = LazyLock::new(|| CharSet::from_char_class("Cyrillic"));
static DEVANAGARI: LazyLock<CharSet> = LazyLock::new(|| CharSet::from_char_class("Devanagari"));
static GEORGIAN: LazyLock<CharSet> = LazyLock::new(|| CharSet::from_char_class("Georgian"));
static GREEK: LazyLock<CharSet> = LazyLock::new(|| CharSet::from_char_class("Greek"));
static GUJARATI: LazyLock<CharSet> = LazyLock::new(|| CharSet::from_char_class("Gujarati"));
static GURMUKHI: LazyLock<CharSet> = LazyLock::new(|| CharSet::from_char_class("Gurmukhi"));
static HAN: LazyLock<CharSet> = LazyLock::new(|| CharSet::from_char_class("Han"));
static HANGUL: LazyLock<CharSet> = LazyLock::new(|| CharSet::from_char_class("Hangul"));
static HEBREW: LazyLock<CharSet> = LazyLock::new(|| CharSet::from_char_class("Hebrew"));
static HIRAGANA: LazyLock<CharSet> = LazyLock::new(|| CharSet::from_char_class("Hiragana"));
static KATAKANA: LazyLock<CharSet> = LazyLock::new(|| CharSet::from_char_class("Katakana"));
static LATIN: LazyLock<CharSet> = LazyLock::new(|| CharSet::from_char_class("Latin"));
static TAMIL: LazyLock<CharSet> = LazyLock::new(|| CharSet::from_char_class("Tamil"));
static TELUGU: LazyLock<CharSet> = LazyLock::new(|| CharSet::from_char_class("Telugu"));
static THAI: LazyLock<CharSet> = LazyLock::new(|| CharSet::from_char_class("Thai"));
20 changes: 10 additions & 10 deletions src/bin/accuracy_reports.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ use std::collections::HashMap;
use std::fs;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::sync::LazyLock;
use std::time::Instant;

use cld2::{detect_language as cld2_detect_language, Format, Lang as CLD2Language};
use fraction::{Decimal, Zero};
use include_dir::Dir;
use indoc::formatdoc;
use itertools::Itertools;
use once_cell::sync::Lazy;
use strum::IntoEnumIterator;
use titlecase::titlecase;
use whatlang::{Detector, Lang as WhatlangLanguage};
Expand Down Expand Up @@ -329,16 +329,16 @@ impl Statistic {
}
}

static WHATLANG_DETECTOR: Lazy<Detector> = Lazy::new(Detector::new);
static WHATLANG_DETECTOR: LazyLock<Detector> = LazyLock::new(Detector::new);

static LINGUA_DETECTOR_WITH_LOW_ACCURACY: Lazy<LanguageDetector> = Lazy::new(|| {
static LINGUA_DETECTOR_WITH_LOW_ACCURACY: LazyLock<LanguageDetector> = LazyLock::new(|| {
LanguageDetectorBuilder::from_all_languages()
.with_low_accuracy_mode()
.with_preloaded_language_models()
.build()
});

static LINGUA_DETECTOR_WITH_HIGH_ACCURACY: Lazy<LanguageDetector> = Lazy::new(|| {
static LINGUA_DETECTOR_WITH_HIGH_ACCURACY: LazyLock<LanguageDetector> = LazyLock::new(|| {
LanguageDetectorBuilder::from_all_languages()
.with_preloaded_language_models()
.build()
Expand Down Expand Up @@ -390,14 +390,14 @@ fn get_file_content(file_name: &str) -> HashMap<Language, Vec<&str>> {
.collect()
}

static SINGLE_WORDS: Lazy<HashMap<Language, Vec<&str>>> =
Lazy::new(|| get_file_content("single-words.txt"));
static SINGLE_WORDS: LazyLock<HashMap<Language, Vec<&str>>> =
LazyLock::new(|| get_file_content("single-words.txt"));

static WORD_PAIRS: Lazy<HashMap<Language, Vec<&str>>> =
Lazy::new(|| get_file_content("word-pairs.txt"));
static WORD_PAIRS: LazyLock<HashMap<Language, Vec<&str>>> =
LazyLock::new(|| get_file_content("word-pairs.txt"));

static SENTENCES: Lazy<HashMap<Language, Vec<&str>>> =
Lazy::new(|| get_file_content("sentences.txt"));
static SENTENCES: LazyLock<HashMap<Language, Vec<&str>>> =
LazyLock::new(|| get_file_content("sentences.txt"));

fn collect_statistics(
detector_name: &str,
Expand Down
21 changes: 11 additions & 10 deletions src/constant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,32 +16,33 @@

use std::collections::{HashMap, HashSet};
use std::str::FromStr;
use std::sync::LazyLock;

use once_cell::sync::Lazy;
use regex::Regex;

use crate::alphabet::CharSet;
use crate::language::Language;

pub(crate) static JAPANESE_CHARACTER_SET: Lazy<CharSet> =
Lazy::new(|| CharSet::from_char_classes(&["Hiragana", "Katakana", "Han"]));
pub(crate) static MULTIPLE_WHITESPACE: Lazy<Regex> = Lazy::new(|| Regex::new("\\s+").unwrap());
pub(crate) static NUMBERS: Lazy<Regex> = Lazy::new(|| Regex::new("\\p{N}").unwrap());
pub(crate) static PUNCTUATION: Lazy<Regex> = Lazy::new(|| Regex::new("\\p{P}").unwrap());
pub(crate) static TOKENS_WITHOUT_WHITESPACE: Lazy<Regex> = Lazy::new(|| {
pub(crate) static JAPANESE_CHARACTER_SET: LazyLock<CharSet> =
LazyLock::new(|| CharSet::from_char_classes(&["Hiragana", "Katakana", "Han"]));
pub(crate) static MULTIPLE_WHITESPACE: LazyLock<Regex> =
LazyLock::new(|| Regex::new("\\s+").unwrap());
pub(crate) static NUMBERS: LazyLock<Regex> = LazyLock::new(|| Regex::new("\\p{N}").unwrap());
pub(crate) static PUNCTUATION: LazyLock<Regex> = LazyLock::new(|| Regex::new("\\p{P}").unwrap());
pub(crate) static TOKENS_WITHOUT_WHITESPACE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
"\\p{Bengali}+|\\p{Devanagari}+|\\p{Gujarati}+|\\p{Gurmukhi}+|\\p{Han}|\\p{Hangul}+|\\p{Hiragana}|\\p{Katakana}|\\p{Tamil}+|\\p{Telugu}+|\\p{Thai}+|\\p{L}+",
)
.unwrap()
});
pub(crate) static TOKENS_WITH_OPTIONAL_WHITESPACE: Lazy<Regex> = Lazy::new(|| {
pub(crate) static TOKENS_WITH_OPTIONAL_WHITESPACE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
"\\s*(?:\\p{Bengali}+|\\p{Devanagari}+|\\p{Gujarati}+|\\p{Gurmukhi}+|\\p{Han}|\\p{Hangul}+|\\p{Hiragana}|\\p{Katakana}|\\p{Tamil}+|\\p{Telugu}+|\\p{Thai}+|[\\p{L}'-]+)[\\p{N}\\p{P}]*\\s*",
)
.unwrap()
});
pub(crate) static CHARS_TO_LANGUAGES_MAPPING: Lazy<HashMap<&'static str, HashSet<Language>>> =
Lazy::new(|| {
pub(crate) static CHARS_TO_LANGUAGES_MAPPING: LazyLock<HashMap<&'static str, HashSet<Language>>> =
LazyLock::new(|| {
let mut mapping = hashmap!();

if cfg!(feature = "portuguese") || cfg!(feature = "vietnamese") {
Expand Down
37 changes: 18 additions & 19 deletions src/detector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,12 @@ use std::cmp::Ordering;
use std::collections::{HashMap, HashSet};
use std::hash::Hash;
use std::str::FromStr;
use std::sync::RwLock;
use std::sync::{LazyLock, RwLock};

use ahash::AHashMap;
use compact_str::CompactString;
use fraction::Zero;
use itertools::Itertools;
use once_cell::sync::Lazy;
#[cfg(not(target_family = "wasm"))]
use rayon::prelude::*;
use strum::IntoEnumIterator;
Expand All @@ -39,15 +38,15 @@ use crate::language::Language;
use crate::model::{TestDataLanguageModel, TrainingDataLanguageModel};
use crate::result::DetectionResult;

type LazyLanguageModelMap = Lazy<RwLock<HashMap<Language, AHashMap<CompactString, f64>>>>;
type LazyLanguageModelMap = LazyLock<RwLock<HashMap<Language, AHashMap<CompactString, f64>>>>;
type StaticLanguageModelMap = &'static RwLock<HashMap<Language, AHashMap<CompactString, f64>>>;
type LanguageModelArray<'a> = [Option<&'a HashMap<Language, AHashMap<CompactString, f64>>>; 5];

static UNIGRAM_MODELS: LazyLanguageModelMap = Lazy::new(|| RwLock::new(HashMap::new()));
static BIGRAM_MODELS: LazyLanguageModelMap = Lazy::new(|| RwLock::new(HashMap::new()));
static TRIGRAM_MODELS: LazyLanguageModelMap = Lazy::new(|| RwLock::new(HashMap::new()));
static QUADRIGRAM_MODELS: LazyLanguageModelMap = Lazy::new(|| RwLock::new(HashMap::new()));
static FIVEGRAM_MODELS: LazyLanguageModelMap = Lazy::new(|| RwLock::new(HashMap::new()));
static UNIGRAM_MODELS: LazyLanguageModelMap = LazyLock::new(|| RwLock::new(HashMap::new()));
static BIGRAM_MODELS: LazyLanguageModelMap = LazyLock::new(|| RwLock::new(HashMap::new()));
static TRIGRAM_MODELS: LazyLanguageModelMap = LazyLock::new(|| RwLock::new(HashMap::new()));
static QUADRIGRAM_MODELS: LazyLanguageModelMap = LazyLock::new(|| RwLock::new(HashMap::new()));
static FIVEGRAM_MODELS: LazyLanguageModelMap = LazyLock::new(|| RwLock::new(HashMap::new()));

/// This struct detects the language of given input text.
#[cfg_attr(feature = "python", pyo3::prelude::pyclass(module = "lingua"))]
Expand Down Expand Up @@ -1343,8 +1342,8 @@ fn merge_adjacent_results(
#[cfg(test)]
mod tests {
use float_cmp::approx_eq;
use once_cell::sync::OnceCell;
use rstest::*;
use std::sync::OnceLock;

use crate::builder::LanguageDetectorBuilder;
use crate::language::Language::*;
Expand Down Expand Up @@ -1499,9 +1498,9 @@ mod tests {
unigram_language_model_for_english: AHashMap<CompactString, f64>,
unigram_language_model_for_german: AHashMap<CompactString, f64>,
) -> StaticLanguageModelMap {
static UNIGRAM_MODELS_FIXTURE: OnceCell<
static UNIGRAM_MODELS_FIXTURE: OnceLock<
RwLock<HashMap<Language, AHashMap<CompactString, f64>>>,
> = OnceCell::new();
> = OnceLock::new();
UNIGRAM_MODELS_FIXTURE.get_or_init(|| {
RwLock::new(hashmap!(
English => unigram_language_model_for_english,
Expand All @@ -1515,9 +1514,9 @@ mod tests {
bigram_language_model_for_english: AHashMap<CompactString, f64>,
bigram_language_model_for_german: AHashMap<CompactString, f64>,
) -> StaticLanguageModelMap {
static BIGRAM_MODELS_FIXTURE: OnceCell<
static BIGRAM_MODELS_FIXTURE: OnceLock<
RwLock<HashMap<Language, AHashMap<CompactString, f64>>>,
> = OnceCell::new();
> = OnceLock::new();
BIGRAM_MODELS_FIXTURE.get_or_init(|| {
RwLock::new(hashmap!(
English => bigram_language_model_for_english,
Expand All @@ -1531,9 +1530,9 @@ mod tests {
trigram_language_model_for_english: AHashMap<CompactString, f64>,
trigram_language_model_for_german: AHashMap<CompactString, f64>,
) -> StaticLanguageModelMap {
static TRIGRAM_MODELS_FIXTURE: OnceCell<
static TRIGRAM_MODELS_FIXTURE: OnceLock<
RwLock<HashMap<Language, AHashMap<CompactString, f64>>>,
> = OnceCell::new();
> = OnceLock::new();
TRIGRAM_MODELS_FIXTURE.get_or_init(|| {
RwLock::new(hashmap!(
English => trigram_language_model_for_english,
Expand All @@ -1547,9 +1546,9 @@ mod tests {
quadrigram_language_model_for_english: AHashMap<CompactString, f64>,
quadrigram_language_model_for_german: AHashMap<CompactString, f64>,
) -> StaticLanguageModelMap {
static QUADRIGRAM_MODELS_FIXTURE: OnceCell<
static QUADRIGRAM_MODELS_FIXTURE: OnceLock<
RwLock<HashMap<Language, AHashMap<CompactString, f64>>>,
> = OnceCell::new();
> = OnceLock::new();
QUADRIGRAM_MODELS_FIXTURE.get_or_init(|| {
RwLock::new(hashmap!(
English => quadrigram_language_model_for_english,
Expand All @@ -1563,9 +1562,9 @@ mod tests {
fivegram_language_model_for_english: AHashMap<CompactString, f64>,
fivegram_language_model_for_german: AHashMap<CompactString, f64>,
) -> StaticLanguageModelMap {
static FIVEGRAM_MODELS_FIXTURE: OnceCell<
static FIVEGRAM_MODELS_FIXTURE: OnceLock<
RwLock<HashMap<Language, AHashMap<CompactString, f64>>>,
> = OnceCell::new();
> = OnceLock::new();
FIVEGRAM_MODELS_FIXTURE.get_or_init(|| {
RwLock::new(hashmap!(
English => fivegram_language_model_for_english,
Expand Down

0 comments on commit 6a55364

Please sign in to comment.