Skip to content

Commit

Permalink
forgot to remove from added tokens map!
Browse files Browse the repository at this point in the history
  • Loading branch information
ArthurZucker committed Oct 4, 2024
1 parent 545d723 commit ee7ce80
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 0 deletions.
3 changes: 3 additions & 0 deletions bindings/python/tests/bindings/test_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,9 @@ def test_re_assign_tokens(self):
tokenizer = Tokenizer.from_pretrained("t5-base")
tokenizer.assign_tokens({"<extra_id_0>": "my_new_token"})
assert tokenizer.decode([32099]) == "my_new_token"
assert tokenizer.encode("my_new_token").tokens == ["my_new_token", "</s>"]
assert tokenizer.encode("my_new_token").ids == [32099, 1]
assert tokenizer.encode("<extra_id_0>").ids == [0, 1]
assert tokenizer.encode("<extra_id_0>").tokens == ["▁", "<", "extra", "_", "i", "d", "_", "0", ">", "</s>"]
assert "my_new_token" in tokenizer.get_vocab(True).keys()

Expand Down
4 changes: 4 additions & 0 deletions tokenizers/src/tokenizer/added_vocabulary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,10 @@ impl AddedVocabulary {
.unwrap()
.entry(id)
.and_modify(|t| *t = new_token.clone()); // Replace entire entry with new_token
self.added_tokens_map
.lock()
.unwrap()
.remove(old_token.content.as_str());
self.refresh_added_tokens(model, normalizer);
} else {
error!(
Expand Down

0 comments on commit ee7ce80

Please sign in to comment.