Skip to content

Commit

Permalink
store local tokenizer for tests to avoid connection to the HF hub clo…
Browse files Browse the repository at this point in the history
…ud. Disable stack view cache
  • Loading branch information
ollmer committed Sep 24, 2024
1 parent 2dc42c7 commit f0378ce
Show file tree
Hide file tree
Showing 7 changed files with 412,649 additions and 15 deletions.
5 changes: 4 additions & 1 deletion tapeagents/llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ def log_completion(self, prompt: Prompt, message: LLMMessage, cached: bool = Fal
# Use this variable to force all LLMs to use cache from the sqlite DB
# This is meant to be used for testing purposes only
_REPLAY_SQLITE: str = ""
# force replacement of the tokenizer during testing
_MOCK_TOKENIZER: str = ""


class CachedLLM(LLM):
Expand Down Expand Up @@ -423,7 +425,8 @@ def load_tokenizer(self):
if self.tokenizer is None:
import transformers

self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.tokenizer_name or self.model_name)
name = _MOCK_TOKENIZER if _MOCK_TOKENIZER else (self.tokenizer_name or self.model_name)
self.tokenizer = transformers.AutoTokenizer.from_pretrained(name)

def make_training_text(self, prompt: Prompt, completion: Completion) -> TrainingText:
self.load_tokenizer()
Expand Down
4 changes: 2 additions & 2 deletions tapeagents/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ def put_new_view_on_stack(self, step):
@staticmethod
def compute(tape: Tape) -> TapeViewStack[StepType]:
# TODO: retrieve view from a prefix of the tape, recompute from the prefix
if (cached_view_stack := _view_stack_cache.get(id(tape))) is not None:
return cached_view_stack
# if (cached_view_stack := _view_stack_cache.get(id(tape))) is not None:
# return cached_view_stack
stack = TapeViewStack(stack=[TapeView(agent_name="root", agent_full_name="root")])
for step in tape.steps:
stack.update(step)
Expand Down
4 changes: 4 additions & 0 deletions tests/res/meta_llama_3_70b_tokenizer/special_tokens_map.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"bos_token": "<|begin_of_text|>",
"eos_token": "<|end_of_text|>"
}
Loading

0 comments on commit f0378ce

Please sign in to comment.