Skip to content

Commit

Permalink
add a small test
Browse files Browse the repository at this point in the history
  • Loading branch information
ArthurZucker committed Oct 4, 2024
1 parent b5640a6 commit ed34ffd
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 1 deletion.
2 changes: 1 addition & 1 deletion bindings/python/py_src/tokenizers/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,7 @@ class Tokenizer:
"""
pass

def assing_tokens(self, old_tokens, new_tokens):
def assign_tokens(self, old_tokens, new_tokens):
"""
Add the given tokens to the vocabulary
Expand Down
7 changes: 7 additions & 0 deletions bindings/python/tests/bindings/test_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,13 @@ def test_setting_to_none(self):
tokenizer.pre_tokenizer = None
assert tokenizer.pre_tokenizer == None

def test_re_assign_tokens(self):
tokenizer = Tokenizer.from_pretrained("t5-base")
tokenizer.assign_tokens({"<extra_id_0>": "my_new_token"})
assert tokenizer.decode([32099]) == "my_new_token"
assert tokenizer.encode("<extra_id_0>").tokens == ["▁", "<", "extra", "_", "i", "d", "_", "0", ">", "</s>"]
assert "my_new_token" in tokenizer.get_vocab(True).keys()


class TestTokenizerRepr:
def test_repr(self):
Expand Down

0 comments on commit ed34ffd

Please sign in to comment.