Skip to content

Commit

Permalink
Add length of second text added tokens check
Browse files Browse the repository at this point in the history
  • Loading branch information
boyleconnor committed Aug 5, 2023
1 parent d05bbb7 commit 37bb179
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion tokenizers/src/tokenizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -934,7 +934,11 @@ where
(encoding, pair_encoding)
}
};
let original_length = encoding.len();
let original_length = if let Some(second_encoding) = &pair_encoding {
encoding.len() + second_encoding.len()
} else {
encoding.len()
};

// 2. Then We post process
let final_encoding = if let Some(processor) = &self.post_processor {
Expand Down

0 comments on commit 37bb179

Please sign in to comment.