diff --git a/.lock b/.lock new file mode 100644 index 0000000..e69de29 diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/bleuscore/all.html b/bleuscore/all.html new file mode 100644 index 0000000..7a8e3a2 --- /dev/null +++ b/bleuscore/all.html @@ -0,0 +1,2 @@ +
Redirecting to ../../bleuscore/fn.compute_score.html...
+ + + \ No newline at end of file diff --git a/bleuscore/bleu/struct.BleuScore.html b/bleuscore/bleu/struct.BleuScore.html new file mode 100644 index 0000000..7a7e490 --- /dev/null +++ b/bleuscore/bleu/struct.BleuScore.html @@ -0,0 +1,11 @@ + + + + +Redirecting to ../../bleuscore/struct.BleuScore.html...
+ + + \ No newline at end of file diff --git a/bleuscore/fn.compute_score.html b/bleuscore/fn.compute_score.html new file mode 100644 index 0000000..5e2723c --- /dev/null +++ b/bleuscore/fn.compute_score.html @@ -0,0 +1,9 @@ +pub fn compute_score(
+ references: &[Vec<String>],
+ predictions: &[String],
+ max_order: usize,
+ smooth: bool
+) -> BleuScore
compute the BLEU score with Tokenizer13a
as default tokenizer.
+The implementation is based on huggingface/nmt
bleuscore is a BLEU score calculator written in pure rust.
+The crate is called bleuscore
and you can depend on it via cargo:
[dependencies]
+bleuscore = "*"
+
use bleuscore::compute_score;
+
+// get the references and prediction data:
+let references: Vec<Vec<String>> = vec![vec!["Hello, World!".to_string()]];
+let predictions: Vec<String> = vec!["Yellow, World!".to_string()];
+
+// set the parameters:
+let max_order: usize = 4;
+let smooth: bool = true;
+
+// calculate the BLEU score:
+let res = compute_score(references, predictions, max_order, smooth);
+println!("result: {:?}", res);
+// result: BleuScore { bleu: 0.668740304976422, precisions: [0.8, 0.75, 0.6666666666666666, 0.5],
+// brevity_penalty: 1.0, length_ratio: 1.0, translation_length: 4, reference_length: 4 }
!
+Tokenizer13a
as default tokenizer.
+The implementation is based on huggingface/nmtpub struct BleuScore {
+ pub bleu: f64,
+ pub precisions: Vec<f64>,
+ pub brevity_penalty: f64,
+ pub length_ratio: f64,
+ pub translation_length: usize,
+ pub reference_length: usize,
+}
The BLEU score data struct
+bleu: f64
§precisions: Vec<f64>
§brevity_penalty: f64
§length_ratio: f64
§translation_length: usize
§reference_length: usize
pub struct Tokenizer13a {
+ pub signature: String,
+}
Same implementation with huggingface/sacrebleu
+signature: String
pub struct TokenizerRegex {
+ pub signature: String,
+}
Same implementation with huggingface/sacrebleu
+signature: String
Redirecting to ../../bleuscore/struct.Tokenizer13a.html...
+ + + \ No newline at end of file diff --git a/bleuscore/tokenizer/struct.TokenizerRegex.html b/bleuscore/tokenizer/struct.TokenizerRegex.html new file mode 100644 index 0000000..4dc09b4 --- /dev/null +++ b/bleuscore/tokenizer/struct.TokenizerRegex.html @@ -0,0 +1,11 @@ + + + + +Redirecting to ../../bleuscore/struct.TokenizerRegex.html...
+ + + \ No newline at end of file diff --git a/bleuscore/tokenizer/trait.Tokenizer.html b/bleuscore/tokenizer/trait.Tokenizer.html new file mode 100644 index 0000000..6ec6ae2 --- /dev/null +++ b/bleuscore/tokenizer/trait.Tokenizer.html @@ -0,0 +1,11 @@ + + + + +Redirecting to ../../bleuscore/trait.Tokenizer.html...
+ + + \ No newline at end of file diff --git a/bleuscore/trait.Tokenizer.html b/bleuscore/trait.Tokenizer.html new file mode 100644 index 0000000..7bf5e9d --- /dev/null +++ b/bleuscore/trait.Tokenizer.html @@ -0,0 +1,7 @@ +pub trait Tokenizer {
+ // Required methods
+ fn signature(&self) -> &str;
+ fn tokenize(&self, line: &str) -> Vec<String>;
+}
tokenize function is used to tokenize the strings
+Tokenizer13a
as default …","","","","","","","Returns the argument unchanged.","Returns the argument unchanged.","Returns the argument unchanged.","Calls U::from(self)
.","Calls U::from(self)
.","Calls U::from(self)
.","","","","","","","","","","","","","","","","","","","","","","",""],"i":[0,0,0,0,6,7,8,6,7,8,6,6,0,7,8,6,7,8,6,7,8,6,7,8,6,6,7,8,6,6,11,7,8,7,8,11,7,8,6,7,8,6,7,8,6,7,8,6],"f":"`````{ce{}{}}00000`{{{f{{d{b}}}}{f{b}}hj}l}{{}n}{{}A`}{{}l}{{nAb}Ad}{{A`Ab}Ad}{{lAb}Ad}{cc{}}00888`65``{AfAh}{nAh}{A`Ah}``{{AfAh}{{d{b}}}}{{nAh}{{d{b}}}}{{A`Ah}{{d{b}}}}`{c{{Aj{e}}}{}{}}00000{cAl{}}00","c":[],"p":[[5,"String",48],[5,"Vec",49],[1,"slice"],[1,"usize"],[1,"bool"],[5,"BleuScore",0],[5,"TokenizerRegex",0],[5,"Tokenizer13a",0],[5,"Formatter",50],[8,"Result",50],[10,"Tokenizer",0],[1,"str"],[6,"Result",51],[5,"TypeId",52]],"b":[]}]\
+]'));
+if (typeof exports !== 'undefined') exports.searchIndex = searchIndex;
+else if (window.initSearch) window.initSearch(searchIndex);
diff --git a/settings.html b/settings.html
new file mode 100644
index 0000000..bef2a08
--- /dev/null
+++ b/settings.html
@@ -0,0 +1,2 @@
+1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +
use crate::ngram::get_token_ngram_counter;
+use crate::tokenizer::{Tokenizer, Tokenizer13a};
+use std::cmp::min;
+use std::collections::HashMap;
+
+/// The BLEU score data struct
+#[derive(Debug, Default)]
+pub struct BleuScore {
+ pub bleu: f64,
+ pub precisions: Vec<f64>,
+ pub brevity_penalty: f64,
+ pub length_ratio: f64,
+ pub translation_length: usize,
+ pub reference_length: usize,
+}
+
+/// compute the BLEU score with `Tokenizer13a` as default tokenizer.
+/// The implementation is based on [huggingface/nmt](https://github.com/huggingface/evaluate/blob/main/metrics/bleu/bleu.py)
+pub fn compute_score(
+ references: &[Vec<String>],
+ predictions: &[String],
+ max_order: usize,
+ smooth: bool,
+) -> BleuScore {
+ // init
+ let mut matches_by_order: Vec<usize> = vec![0; max_order];
+ let mut possible_matches_by_order: Vec<usize> = vec![0; max_order];
+ let mut reference_length: usize = 0;
+ let mut translation_length: usize = 0;
+ let tokenizer = Tokenizer13a::new();
+
+ for (references, translation) in references.iter().zip(predictions.iter()) {
+ // tokenize
+ let translation_tokens = tokenizer.tokenize(translation);
+ let references_tokens: Vec<Vec<String>> =
+ references.iter().map(|x| tokenizer.tokenize(x)).collect();
+ // lengths
+ reference_length += references_tokens.iter().map(|x| x.len()).min().unwrap();
+ translation_length += translation_tokens.len();
+
+ // ngram count
+ let translation_ngram_counts = get_token_ngram_counter(&translation_tokens, max_order);
+ let mut merged_ref_ngram_counts = HashMap::new();
+ for reference_tokens in references_tokens.iter() {
+ let reference_ngram_counts = get_token_ngram_counter(reference_tokens, max_order);
+ for (key, value) in reference_ngram_counts {
+ merged_ref_ngram_counts
+ .entry(key)
+ .and_modify(|v| *v += value)
+ .or_insert(value);
+ }
+ }
+
+ // overlap count
+ let mut overlap_counts = HashMap::new();
+ for (k, v) in translation_ngram_counts {
+ if merged_ref_ngram_counts.contains_key(k) {
+ overlap_counts.insert(k, min(merged_ref_ngram_counts[k], v));
+ } else {
+ continue;
+ }
+ }
+ for &key in overlap_counts.keys() {
+ matches_by_order[key.len() - 1] += overlap_counts[key];
+ }
+
+ // possible match
+ for order in 1..=max_order {
+ let possible_matches = translation_tokens.len().saturating_sub(order - 1);
+ if possible_matches > 0 {
+ possible_matches_by_order[order - 1] += possible_matches
+ }
+ }
+ }
+
+ // precisions calculation
+ let mut precisions: Vec<f64> = vec![0.0; max_order];
+ for i in 0..max_order {
+ match smooth {
+ true => {
+ precisions[i] = (matches_by_order[i] as f64 + 1.0)
+ / (possible_matches_by_order[i] as f64 + 1.0);
+ }
+ false => {
+ if possible_matches_by_order[i] > 0 {
+ precisions[i] =
+ (matches_by_order[i] as f64) / (possible_matches_by_order[i] as f64)
+ } else {
+ precisions[i] = 0.0;
+ }
+ }
+ }
+ }
+
+ let mut geo_mean = 0.0;
+
+ if precisions.iter().fold(f64::INFINITY, |a, &b| a.min(b)) > 0.0 {
+ let p_log_sum: f64 =
+ (1.0 / max_order as f64) * precisions.iter().map(|&x| x.ln()).sum::<f64>();
+ geo_mean = p_log_sum.exp();
+ }
+
+ let length_ratio: f64 = translation_length as f64 / reference_length as f64;
+ let mut brevity_penalty = 1.0;
+ if length_ratio <= 1.0 {
+ brevity_penalty = (1.0 - 1.0 / length_ratio).exp();
+ }
+ let bleu = geo_mean * brevity_penalty;
+ BleuScore {
+ bleu,
+ precisions,
+ brevity_penalty,
+ length_ratio,
+ translation_length,
+ reference_length,
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use crate::bleu::compute_score;
+ #[test]
+ fn test_bleu() {
+ let references: Vec<Vec<String>> = vec![vec!["Hello, World!".to_string()]];
+ let predictions: Vec<String> = vec!["Yellow, World!".to_string()];
+ let max_order: usize = 4;
+ let smooth: bool = true;
+ let res = compute_score(&references, &predictions, max_order, smooth);
+ // (0.668740304976422, [0.8, 0.75, 0.6666666666666666, 0.5], 1.0, 1.0, 4, 4)
+ println!("result: {:?}", res);
+ assert!((res.bleu - 0.668740304976422).abs() < 1e-10);
+ }
+}
+
1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +
/*!
+bleuscore is a [BLEU](https://en.wikipedia.org/wiki/BLEU) score calculator written in pure rust.
+
+# Install:
+
+The crate is called `bleuscore` and you can depend on it via cargo:
+
+```ini
+[dependencies]
+bleuscore = "*"
+```
+
+# Features:
+
+- Tokenized BLEU score calculation like
+[huggingface evaluate](https://github.com/huggingface/evaluate/blob/main/metrics/bleu/bleu.py)
+and [sacrebleu](https://github.com/mjpost/sacrebleu)
+
+
+# Basic usage:
+
+```rust
+use bleuscore::compute_score;
+
+// get the references and prediction data:
+let references: Vec<Vec<String>> = vec![vec!["Hello, World!".to_string()]];
+let predictions: Vec<String> = vec!["Yellow, World!".to_string()];
+
+// set the parameters:
+let max_order: usize = 4;
+let smooth: bool = true;
+
+// calculate the BLEU score:
+let res = compute_score(references, predictions, max_order, smooth);
+println!("result: {:?}", res);
+// result: BleuScore { bleu: 0.668740304976422, precisions: [0.8, 0.75, 0.6666666666666666, 0.5],
+// brevity_penalty: 1.0, length_ratio: 1.0, translation_length: 4, reference_length: 4 }
+```
+!*/
+mod tokenizer;
+pub use crate::tokenizer::{Tokenizer, Tokenizer13a, TokenizerRegex};
+mod bleu;
+mod ngram;
+pub use crate::bleu::{compute_score, BleuScore};
+
+use pyo3::prelude::*;
+use pyo3::types::IntoPyDict;
+
+#[pyfunction]
+fn tokenizer_regex(line: &str) -> PyResult<Vec<String>> {
+ let tokenizer_regex = tokenizer::TokenizerRegex::new();
+ let res = tokenizer_regex.tokenize(line);
+ Ok(res)
+}
+
+#[pyfunction]
+fn tokenizer_13a(line: &str) -> PyResult<Vec<String>> {
+ let tokenizer_13a_regex = tokenizer::Tokenizer13a::new();
+ let res = tokenizer_13a_regex.tokenize(line);
+ Ok(res)
+}
+
+#[pyfunction]
+#[pyo3(signature = (references, predictions, max_order=4, smooth=false))]
+fn compute(
+ references: Vec<Vec<String>>,
+ predictions: Vec<String>,
+ max_order: usize,
+ smooth: bool,
+) -> PyResult<PyObject> {
+ let bleu = compute_score(&references, &predictions, max_order, smooth);
+ Python::with_gil(|py| {
+ let bleu_dict = [
+ ("bleu", bleu.bleu.to_object(py)),
+ ("precisions", bleu.precisions.to_object(py)),
+ ("brevity_penalty", bleu.brevity_penalty.to_object(py)),
+ ("length_ratio", bleu.length_ratio.to_object(py)),
+ ("translation_length", bleu.translation_length.to_object(py)),
+ ("reference_length", bleu.reference_length.to_object(py)),
+ ]
+ .into_py_dict_bound(py);
+ Ok(bleu_dict.into())
+ })
+}
+
+/// A Python module implemented in Rust.
+#[pymodule]
+fn bleuscore(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
+ m.add_function(wrap_pyfunction!(tokenizer_regex, m)?)?;
+ m.add_function(wrap_pyfunction!(tokenizer_13a, m)?)?;
+ m.add_function(wrap_pyfunction!(compute, m)?)?;
+ m.add("__version__", env!("CARGO_PKG_VERSION"))?;
+ Ok(())
+}
+
1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +
use counter::Counter;
+use std::collections::HashMap;
+
+/// Here the tokens' type is `&[String]` rather than `&Vec<String>`
+/// to fix `clippy::not_unsafe_ptr_arg_deref` error.
+pub fn get_token_ngram_counter(tokens: &[String], max_order: usize) -> HashMap<&[String], usize> {
+ let mut count_map: HashMap<&[String], usize> = HashMap::new();
+ for order in 1..=max_order {
+ for start_index in 0..(tokens.len().saturating_sub(order - 1)) {
+ let ngram = &tokens[start_index..(start_index + order)];
+ count_map
+ .entry(ngram)
+ .and_modify(|counter| *counter += 1)
+ .or_insert(1);
+ }
+ }
+ count_map
+}
+
+/// TODO: change to use Counter to count ngram
+#[allow(dead_code)]
+fn get_ngram_counter(line: &str, max_order: usize) -> Counter<&str> {
+ let mut counts: Counter<&str> = Counter::new();
+ for order in 1..=max_order {
+ for start_index in 0..(line.len().saturating_sub(order - 1)) {
+ // println!("line: {}, start_index: {}, order: {}", line, start_index, order);
+ let ngram = &line[start_index..(start_index + order)];
+ // println!("ngram: {}", ngram);
+ counts[&ngram] += 1;
+ }
+ }
+ counts
+}
+
+#[cfg(test)]
+mod test {
+ use crate::ngram::{get_ngram_counter, get_token_ngram_counter};
+
+ #[test]
+ fn test_get_token_ngram_short() {
+ let tokens = vec!["a".to_string(), "b".to_string()];
+ let counter = get_token_ngram_counter(&tokens, 4);
+ assert_eq!(counter[&tokens[0..=0]], 1);
+ assert_eq!(counter[&tokens[1..=1]], 1);
+ assert_eq!(counter[&tokens[0..=1]], 1);
+ }
+
+ #[test]
+ fn test_get_token_ngram_long() {
+ // aabc
+ let tokens: Vec<String> = vec![
+ "a".to_string(),
+ "a".to_string(),
+ "b".to_string(),
+ "c".to_string(),
+ ];
+ let counter = get_token_ngram_counter(&tokens, 4);
+ assert_eq!(counter[&tokens[0..=0]], 2); // 'a': 2
+ assert_eq!(counter[&tokens[2..=2]], 1); // 'b': 1
+ assert_eq!(counter[&tokens[3..=3]], 1); // 'c': 1
+
+ assert_eq!(counter[&tokens[0..=1]], 1); // 'aa': 1
+ assert_eq!(counter[&tokens[1..=2]], 1); // 'ab': 1
+ assert_eq!(counter[&tokens[2..=3]], 1); // 'bc': 1
+
+ assert_eq!(counter[&tokens[0..=2]], 1); // 'aab': 1
+ assert_eq!(counter[&tokens[1..=3]], 1); // 'abc': 1
+ assert_eq!(counter[&tokens[0..=3]], 1); // 'abcd': 1
+
+ assert_eq!(counter.len(), 9);
+ }
+
+ #[test]
+ fn test_get_ngram_short() {
+ let counter = get_ngram_counter("ab", 4);
+ assert_eq!(counter[&"a"], 1);
+ assert_eq!(counter[&"b"], 1);
+ assert_eq!(counter[&"ab"], 1);
+ }
+
+ #[test]
+ fn test_get_ngram_long() {
+ let counter = get_ngram_counter("aabc", 4);
+ assert_eq!(counter[&"a"], 2);
+ assert_eq!(counter[&"b"], 1);
+ assert_eq!(counter[&"c"], 1);
+ assert_eq!(counter[&"d"], 0);
+
+ assert_eq!(counter[&"aa"], 1);
+ assert_eq!(counter[&"ab"], 1);
+ assert_eq!(counter[&"bc"], 1);
+ assert_eq!(counter[&"ac"], 0);
+
+ assert_eq!(counter[&"aab"], 1);
+ assert_eq!(counter[&"aabc"], 1);
+ }
+}
+
1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +
use cached::proc_macro::cached;
+use lazy_static::lazy_static;
+use regex::Regex;
+
+lazy_static! {
+ pub static ref REGEX_ARRAY: [(Regex, &'static str); 4] = [
+ (
+ Regex::new(r"([\{-\~\[-\` -\&\(-\+\:-\@\/])").unwrap(),
+ r" $1 "
+ ),
+ (Regex::new(r"([^0-9])([\.,])").unwrap(), r"$1 $2 "),
+ (Regex::new(r"([\.,])([^0-9])").unwrap(), r" $1 $2"),
+ (Regex::new(r"([0-9])(-)").unwrap(), r"$1 $2 "),
+ ];
+}
+
+/// tokenize function is used to tokenize the strings
+pub trait Tokenizer {
+ fn signature(&self) -> &str;
+ fn tokenize(&self, line: &str) -> Vec<String>;
+}
+
+/// Same implementation with [huggingface/sacrebleu](https://github.com/huggingface/evaluate/blob/main/metrics/bleu/tokenizer_13a.py)
+#[derive(Debug)]
+pub struct TokenizerRegex {
+ pub signature: String,
+}
+
+impl Default for TokenizerRegex {
+ fn default() -> Self {
+ Self {
+ signature: "re".to_string(),
+ }
+ }
+}
+
+impl TokenizerRegex {
+ pub fn new() -> Self {
+ Self::default()
+ }
+}
+
+#[cached(size = 65536)]
+fn regex_tokenize_cache(line: String) -> Vec<String> {
+ let mut res = line;
+ for &(ref re_capture, re_replace) in REGEX_ARRAY.iter() {
+ res = re_capture.replace_all(&res, re_replace).to_string();
+ }
+ res.split_whitespace().map(|x| x.to_string()).collect()
+}
+
+impl Tokenizer for TokenizerRegex {
+ fn signature(&self) -> &str {
+ &self.signature
+ }
+ fn tokenize(&self, line: &str) -> Vec<String> {
+ regex_tokenize_cache(line.to_string())
+ }
+}
+
+/// Same implementation with [huggingface/sacrebleu](https://github.com/huggingface/evaluate/blob/main/metrics/bleu/tokenizer_13a.py)
+#[derive(Debug)]
+pub struct Tokenizer13a {
+ pub signature: String,
+}
+
+impl Default for Tokenizer13a {
+ fn default() -> Self {
+ Self {
+ signature: "13a".to_string(),
+ }
+ }
+}
+
+impl Tokenizer13a {
+ pub fn new() -> Self {
+ Self::default()
+ }
+}
+
+#[cached(size = 65536)]
+fn tokenize_13a_cache(line: String) -> Vec<String> {
+ let mut res = line;
+ res = res
+ .replace("<skipped>", "")
+ .replace("-\n", "")
+ .replace('\n', " ");
+ if res.contains('&') {
+ res = res
+ .replace(""", "\"")
+ .replace("&", "&")
+ .replace("<", "<")
+ .replace(">", ">");
+ }
+ TokenizerRegex::new().tokenize(&format!(" {res} "))
+}
+
+impl Tokenizer for Tokenizer13a {
+ fn signature(&self) -> &str {
+ &self.signature
+ }
+ fn tokenize(&self, line: &str) -> Vec<String> {
+ tokenize_13a_cache(line.to_string())
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use crate::tokenizer;
+ use crate::tokenizer::Tokenizer;
+
+ #[test]
+ fn test_tokenize_regex() {
+ let tokenizer_regex = tokenizer::TokenizerRegex::new();
+ let mut line = "Hello, World!";
+ let mut res = tokenizer_regex.tokenize(line);
+ assert_eq!(res, vec!["Hello", ",", "World", "!"]);
+
+ line = "/usr/sbin/sendmail - 0 errors, 12 warnings";
+ res = tokenizer_regex.tokenize(line);
+ assert_eq!(
+ res,
+ vec![
+ "/", "usr", "/", "sbin", "/", "sendmail", "-", "0", "errors", ",", "12", "warnings"
+ ]
+ )
+ }
+
+ #[test]
+ fn test_tokenize_13a_regex() {
+ let tokenizer_regex = tokenizer::Tokenizer13a::new();
+ let mut line = "Hello, "World!<skipped>";
+ let mut res = tokenizer_regex.tokenize(line);
+ assert_eq!(res, vec!["Hello", ",", "\"", "World", "!"]);
+
+ line = "/usr/sbin/sendmail - 0 errors, 12 warnings";
+ res = tokenizer_regex.tokenize(line);
+ assert_eq!(
+ res,
+ vec![
+ "/", "usr", "/", "sbin", "/", "sendmail", "-", "0", "errors", ",", "12", "warnings"
+ ]
+ )
+ }
+}
+
fn:
) to \
+ restrict the search to a given item kind.","Accepted kinds are: fn
, mod
, struct
, \
+ enum
, trait
, type
, macro
, \
+ and const
.","Search functions by type signature (e.g., vec -> usize
or \
+ -> vec
or String, enum:Cow -> bool
)","You can look for items with an exact name by putting double quotes around \
+ your request: \"string\"
","Look for functions that accept or return \
+ slices and \
+ arrays by writing \
+ square brackets (e.g., -> [u8]
or [] -> Option
)","Look for items inside another one by searching for a path: vec::Vec
",].map(x=>""+x+"
").join("");const div_infos=document.createElement("div");addClass(div_infos,"infos");div_infos.innerHTML="${value.replaceAll(" ", " ")}
`}else{error[index]=value}});output+=`