From 37bb179a49057d2e77393b8012f600daab0f3f1d Mon Sep 17 00:00:00 2001 From: Connor Boyle Date: Sat, 5 Aug 2023 11:31:01 -0700 Subject: [PATCH] Add length of second text added tokens check --- tokenizers/src/tokenizer/mod.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tokenizers/src/tokenizer/mod.rs b/tokenizers/src/tokenizer/mod.rs index a0117b6a4..90f472cbb 100644 --- a/tokenizers/src/tokenizer/mod.rs +++ b/tokenizers/src/tokenizer/mod.rs @@ -934,7 +934,11 @@ where (encoding, pair_encoding) } }; - let original_length = encoding.len(); + let original_length = if let Some(second_encoding) = &pair_encoding { + encoding.len() + second_encoding.len() + } else { + encoding.len() + }; // 2. Then We post process let final_encoding = if let Some(processor) = &self.post_processor {