From 2cff1ef3720fdab0dc5b272dbd239a16ed40a76d Mon Sep 17 00:00:00 2001 From: cpacker Date: Thu, 21 Nov 2024 13:31:53 -0800 Subject: [PATCH] refactor: change the various namings of CHAR_LIMIT to TOKEN_LIMIT --- letta/config.py | 8 ++++---- letta/constants.py | 12 ++++-------- letta/utils.py | 37 +++++++++++++++---------------------- 3 files changed, 23 insertions(+), 34 deletions(-) diff --git a/letta/config.py b/letta/config.py index 51287e0091..3ddc504947 100644 --- a/letta/config.py +++ b/letta/config.py @@ -8,8 +8,8 @@ import letta import letta.utils as utils from letta.constants import ( - CORE_MEMORY_HUMAN_CHAR_LIMIT, - CORE_MEMORY_PERSONA_CHAR_LIMIT, + CORE_MEMORY_HUMAN_TOKEN_LIMIT, + CORE_MEMORY_PERSONA_TOKEN_LIMIT, DEFAULT_HUMAN, DEFAULT_PERSONA, DEFAULT_PRESET, @@ -88,8 +88,8 @@ class LettaConfig: policies_accepted: bool = False # Default memory limits - core_memory_persona_char_limit: int = CORE_MEMORY_PERSONA_CHAR_LIMIT - core_memory_human_char_limit: int = CORE_MEMORY_HUMAN_CHAR_LIMIT + core_memory_persona_token_limit: int = CORE_MEMORY_PERSONA_TOKEN_LIMIT + core_memory_human_token_limit: int = CORE_MEMORY_HUMAN_TOKEN_LIMIT def __post_init__(self): # ensure types diff --git a/letta/constants.py b/letta/constants.py index fbcb9f3ce8..33e53881f4 100644 --- a/letta/constants.py +++ b/letta/constants.py @@ -133,12 +133,8 @@ # These serve as in-context examples of how to use functions / what user messages look like MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST = 3 -# Default memory limits -CORE_MEMORY_PERSONA_CHAR_LIMIT = 2000 -CORE_MEMORY_HUMAN_CHAR_LIMIT = 2000 - # Function return limits -FUNCTION_RETURN_CHAR_LIMIT = 6000 # ~300 words +FUNCTION_RETURN_TOKEN_LIMIT = 1500 # ~300 words MAX_PAUSE_HEARTBEATS = 360 # in min @@ -155,9 +151,9 @@ RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE = 5 -# TODO Is this config or constant? -CORE_MEMORY_PERSONA_CHAR_LIMIT: int = 2000 -CORE_MEMORY_HUMAN_CHAR_LIMIT: int = 2000 +# Default memory limits +CORE_MEMORY_PERSONA_TOKEN_LIMIT: int = 2000 +CORE_MEMORY_HUMAN_TOKEN_LIMIT: int = 2000 MAX_FILENAME_LENGTH = 255 RESERVED_FILENAMES = {"CON", "PRN", "AUX", "NUL", "COM1", "COM2", "LPT1", "LPT2"} diff --git a/letta/utils.py b/letta/utils.py index a2f65111b9..c6862671a4 100644 --- a/letta/utils.py +++ b/letta/utils.py @@ -26,9 +26,10 @@ import letta from letta.constants import ( CLI_WARNING_PREFIX, - CORE_MEMORY_HUMAN_CHAR_LIMIT, - CORE_MEMORY_PERSONA_CHAR_LIMIT, - FUNCTION_RETURN_CHAR_LIMIT, + CORE_MEMORY_HUMAN_TOKEN_LIMIT, + CORE_MEMORY_PERSONA_TOKEN_LIMIT, + DEFAULT_TIKTOKEN_MODEL, + FUNCTION_RETURN_TOKEN_LIMIT, LETTA_DIR, MAX_FILENAME_LENGTH, TOOL_CALL_ID_MAX_LEN, @@ -790,7 +791,7 @@ def find_class(self, module, name): return super().find_class(module, name) -def count_tokens(s: str, model: str = "gpt-4") -> int: +def count_tokens(s: str, model: str = DEFAULT_TIKTOKEN_MODEL) -> int: encoding = tiktoken.encoding_for_model(model) return len(encoding.encode(s)) @@ -927,11 +928,10 @@ def validate_function_response(function_response_string: any, strict: bool = Fal # Now check the length and make sure it doesn't go over the limit # TODO we should change this to a max token limit that's variable based on tokens remaining (or context-window) - if truncate and len(function_response_string) > FUNCTION_RETURN_CHAR_LIMIT: - print( - f"{CLI_WARNING_PREFIX}function return was over limit ({len(function_response_string)} > {FUNCTION_RETURN_CHAR_LIMIT}) and was truncated" - ) - function_response_string = f"{function_response_string[:FUNCTION_RETURN_CHAR_LIMIT]}... [NOTE: function output was truncated since it exceeded the character limit ({len(function_response_string)} > {FUNCTION_RETURN_CHAR_LIMIT})]" + token_count = count_tokens(function_response_string) + if truncate and token_count > FUNCTION_RETURN_TOKEN_LIMIT: + print(f"{CLI_WARNING_PREFIX}function return was over limit ({token_count} > {FUNCTION_RETURN_TOKEN_LIMIT}) and was truncated") + function_response_string = f"{function_response_string[:FUNCTION_RETURN_TOKEN_LIMIT]}... [NOTE: function output was truncated since it exceeded the token limit ({token_count} > {FUNCTION_RETURN_TOKEN_LIMIT})]" return function_response_string @@ -994,8 +994,9 @@ def get_human_text(name: str, enforce_limit=True): file = os.path.basename(file_path) if f"{name}.txt" == file or name == file: human_text = open(file_path, "r", encoding="utf-8").read().strip() - if enforce_limit and len(human_text) > CORE_MEMORY_HUMAN_CHAR_LIMIT: - raise ValueError(f"Contents of {name}.txt is over the character limit ({len(human_text)} > {CORE_MEMORY_HUMAN_CHAR_LIMIT})") + token_count = count_tokens(human_text, model=DEFAULT_TIKTOKEN_MODEL) + if enforce_limit and token_count > CORE_MEMORY_HUMAN_TOKEN_LIMIT: + raise ValueError(f"Contents of {name}.txt is over the token limit ({token_count} > {CORE_MEMORY_HUMAN_TOKEN_LIMIT})") return human_text raise ValueError(f"Human {name}.txt not found") @@ -1006,22 +1007,14 @@ def get_persona_text(name: str, enforce_limit=True): file = os.path.basename(file_path) if f"{name}.txt" == file or name == file: persona_text = open(file_path, "r", encoding="utf-8").read().strip() - if enforce_limit and len(persona_text) > CORE_MEMORY_PERSONA_CHAR_LIMIT: - raise ValueError( - f"Contents of {name}.txt is over the character limit ({len(persona_text)} > {CORE_MEMORY_PERSONA_CHAR_LIMIT})" - ) + token_count = count_tokens(persona_text, model=DEFAULT_TIKTOKEN_MODEL) + if enforce_limit and token_count > CORE_MEMORY_PERSONA_TOKEN_LIMIT: + raise ValueError(f"Contents of {name}.txt is over the token limit ({token_count} > {CORE_MEMORY_PERSONA_TOKEN_LIMIT})") return persona_text raise ValueError(f"Persona {name}.txt not found") -def get_human_text(name: str): - for file_path in list_human_files(): - file = os.path.basename(file_path) - if f"{name}.txt" == file or name == file: - return open(file_path, "r", encoding="utf-8").read().strip() - - def get_schema_diff(schema_a, schema_b): # Assuming f_schema and linked_function['json_schema'] are your JSON schemas f_schema_json = json_dumps(schema_a)