Skip to content

Commit

Permalink
Only check added tokens when add_special_tokens is true
Browse files Browse the repository at this point in the history
  • Loading branch information
boyleconnor committed Aug 5, 2023
1 parent 37bb179 commit 212ac33
Showing 1 changed file with 9 additions and 7 deletions.
16 changes: 9 additions & 7 deletions tokenizers/src/tokenizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -956,13 +956,15 @@ where
}
encodings.pop().unwrap()
};
assert_eq!(
final_encoding.len() - self.get_n_added_tokens(is_pair),
original_length,
"Processor should add {} tokens but instead added {}!",
self.get_n_added_tokens(is_pair),
final_encoding.len() - original_length
);
if add_special_tokens {
assert_eq!(
final_encoding.len() - self.get_n_added_tokens(is_pair),
original_length,
"Processor should add {} tokens but instead added {}!",
self.get_n_added_tokens(is_pair),
final_encoding.len() - original_length
)
};

// 3. Then we pad if needed
let [final_encoding] = if let Some(params) = &self.padding {
Expand Down

0 comments on commit 212ac33

Please sign in to comment.