From 5026d82d7054f556fccadd8df2e733ab2aa6a730 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 6 Aug 2024 13:45:19 +0200 Subject: [PATCH] Update the tests. --- tokenizers/src/models/bpe/serialization.rs | 6 +++++- tokenizers/src/models/mod.rs | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tokenizers/src/models/bpe/serialization.rs b/tokenizers/src/models/bpe/serialization.rs index a27306f96..24525a551 100644 --- a/tokenizers/src/models/bpe/serialization.rs +++ b/tokenizers/src/models/bpe/serialization.rs @@ -178,10 +178,14 @@ mod test { .build() .unwrap(); + let legacy = r#"{"type":"BPE","dropout":null,"unk_token":"","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"byte_fallback":false,"ignore_merges":true,"vocab":{"":0,"a":1,"b":2,"ab":3},"merges":["a b"]}"#; + let legacy = serde_json::from_str(&legacy).unwrap(); + assert_eq!(bpe, legacy); + let data = serde_json::to_string(&bpe).unwrap(); assert_eq!( data, - r#"{"type":"BPE","dropout":null,"unk_token":"","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"byte_fallback":false,"ignore_merges":true,"vocab":{"":0,"a":1,"b":2,"ab":3},"merges":["a b"]}"# + r#"{"type":"BPE","dropout":null,"unk_token":"","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"byte_fallback":false,"ignore_merges":true,"vocab":{"":0,"a":1,"b":2,"ab":3},"merges":[["a","b"]]}"# ); let reconstructed = serde_json::from_str(&data).unwrap(); diff --git a/tokenizers/src/models/mod.rs b/tokenizers/src/models/mod.rs index e12e70ffc..59e981fac 100644 --- a/tokenizers/src/models/mod.rs +++ b/tokenizers/src/models/mod.rs @@ -312,11 +312,14 @@ mod tests { .unwrap(); let model = ModelWrapper::BPE(bpe); + let legacy = r#"{"type":"BPE","dropout":null,"unk_token":"","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"byte_fallback":false,"ignore_merges":true,"vocab":{"":0,"a":1,"b":2,"ab":3},"merges":["a b"]}"#; + let legacy = serde_json::from_str(&legacy).unwrap(); + assert_eq!(model, legacy); let data = serde_json::to_string(&model).unwrap(); assert_eq!( data, - r#"{"type":"BPE","dropout":null,"unk_token":"","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"byte_fallback":false,"ignore_merges":true,"vocab":{"":0,"a":1,"b":2,"ab":3},"merges":["a b"]}"# + r#"{"type":"BPE","dropout":null,"unk_token":"","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"byte_fallback":false,"ignore_merges":true,"vocab":{"":0,"a":1,"b":2,"ab":3},"merges":[["a","b"]]}"# ); let reconstructed = serde_json::from_str(&data).unwrap(); assert_eq!(model, reconstructed);