From f84907c7c77af25972027009837b16c75585de74 Mon Sep 17 00:00:00 2001 From: Mikael Souza Date: Fri, 26 Jan 2024 19:24:18 -0400 Subject: [PATCH] Add `gpt-4-0125` and `gpt-3.5-turbo-1106` context sizes (#59) --- tiktoken-rs/src/model.rs | 6 ++++++ tiktoken-rs/src/tokenizer.rs | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/tiktoken-rs/src/model.rs b/tiktoken-rs/src/model.rs index 9c51eb1..e32a0cb 100644 --- a/tiktoken-rs/src/model.rs +++ b/tiktoken-rs/src/model.rs @@ -32,6 +32,9 @@ macro_rules! starts_with_any { /// /// This function does not panic. It returns a default value of 4096 if the model is not recognized. pub fn get_context_size(model: &str) -> usize { + if starts_with_any!(model, "gpt-4-0125") { + return 128_000; + } if starts_with_any!(model, "gpt-4-1106") { return 128_000; } @@ -41,6 +44,9 @@ pub fn get_context_size(model: &str) -> usize { if starts_with_any!(model, "gpt-4") { return 8192; } + if starts_with_any!(model, "gpt-3.5-turbo-1106") { + return 16_385; + } if starts_with_any!(model, "gpt-3.5-turbo-16k") { return 16_384; } diff --git a/tiktoken-rs/src/tokenizer.rs b/tiktoken-rs/src/tokenizer.rs index d2f0ba4..fc6f4a3 100644 --- a/tiktoken-rs/src/tokenizer.rs +++ b/tiktoken-rs/src/tokenizer.rs @@ -136,11 +136,19 @@ mod tests { #[test] fn test_get_tokenizer() { + assert_eq!( + get_tokenizer("gpt-4-0125-preview"), + Some(Tokenizer::Cl100kBase) + ); assert_eq!(get_tokenizer("gpt-4-32k-0314"), Some(Tokenizer::Cl100kBase)); assert_eq!( get_tokenizer("gpt-4-1106-preview"), Some(Tokenizer::Cl100kBase) ); + assert_eq!( + get_tokenizer("gpt-3.5-turbo-1106"), + Some(Tokenizer::Cl100kBase), + ); assert_eq!(get_tokenizer("gpt-3.5-turbo"), Some(Tokenizer::Cl100kBase)); assert_eq!( get_tokenizer("ft:gpt-3.5-turbo:XXXXXX:2023-11-11"),