Skip to content

Commit

Permalink
make sure <eos> is appended
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidrains committed Apr 2, 2021
1 parent b6ebceb commit 4aade13
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
6 changes: 3 additions & 3 deletions dalle_pytorch/simple_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,12 @@ def decode(self, tokens):

tokenizer = SimpleTokenizer()

def tokenize(texts, context_length = 256, add_start_and_end = False, truncate_text=False):
def tokenize(texts, context_length = 256, add_start = False, add_end = True, truncate_text = False):
if isinstance(texts, str):
texts = [texts]

sot_tokens = [tokenizer.encoder["<|startoftext|>"]] if add_start_and_end else []
eot_tokens = [tokenizer.encoder["<|endoftext|>"]] if add_start_and_end else []
sot_tokens = [tokenizer.encoder["<|startoftext|>"]] if add_start else []
eot_tokens = [tokenizer.encoder["<|endoftext|>"]] if add_end else []
all_tokens = [sot_tokens + tokenizer.encode(text) + eot_tokens for text in texts]
result = torch.zeros(len(all_tokens), context_length, dtype=torch.long)

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
name = 'dalle-pytorch',
packages = find_packages(),
include_package_data = True,
version = '0.8.0',
version = '0.8.1',
license='MIT',
description = 'DALL-E - Pytorch',
author = 'Phil Wang',
Expand Down

0 comments on commit 4aade13

Please sign in to comment.