From 25e12c299abe729f50241bb0059c5a13897b9df8 Mon Sep 17 00:00:00 2001 From: Michal Moskal Date: Thu, 19 Dec 2024 12:17:53 +0000 Subject: [PATCH] fix JSON output; allow non-auto tool_choice fixes https://github.com/guidance-ai/llguidance/issues/81 --- llgtrt/src/routes/completions.rs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/llgtrt/src/routes/completions.rs b/llgtrt/src/routes/completions.rs index 73f6e2f..a56f603 100644 --- a/llgtrt/src/routes/completions.rs +++ b/llgtrt/src/routes/completions.rs @@ -137,8 +137,8 @@ fn validate_compl(req: &CompletionCreateParams) -> Result<()> { fn validate_chat(req: &ChatCompletionCreateParams) -> Result<()> { let _ = req_params_from_openai(&req.params)?; ensure!( - req.tool_choice == ToolChoice::Simple(ToolChoiceOption::Auto), - "only 'auto' option is currently supported for tool_choice" + matches!(req.tool_choice, ToolChoice::Simple(_)), + "only simple options are currently supported for tool_choice" ); ensure!( req.tools.is_empty() || req.params.response_format.is_none(), @@ -232,11 +232,16 @@ async fn mk_req_info( req_params.use_logits_post_processor = true; - if is_chat { - tokens.extend_from_slice(&llg.process_prompt(vec![])); - } else { - tokens = llg.process_prompt(tokens); - } + // If we do that, we need to make sure we return the tokens forced + // by the grammar to the user. Currently we don't have infra for that, + // so instead we just start the parser without the prompt. + // + // if is_chat { + // tokens.extend_from_slice(&llg.process_prompt(vec![])); + // } else { + // tokens = llg.process_prompt(tokens); + // } + llg.start_without_prompt(); let mut r = vec![Box::new(llg)]; while r.len() < req_params.num_return_sequences as usize {