From 25e12c299abe729f50241bb0059c5a13897b9df8 Mon Sep 17 00:00:00 2001
From: Michal Moskal <michal@moskal.me>
Date: Thu, 19 Dec 2024 12:17:53 +0000
Subject: [PATCH] fix JSON output; allow non-auto tool_choice fixes
 https://github.com/guidance-ai/llguidance/issues/81

---
 llgtrt/src/routes/completions.rs | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/llgtrt/src/routes/completions.rs b/llgtrt/src/routes/completions.rs
index 73f6e2f..a56f603 100644
--- a/llgtrt/src/routes/completions.rs
+++ b/llgtrt/src/routes/completions.rs
@@ -137,8 +137,8 @@ fn validate_compl(req: &CompletionCreateParams) -> Result<()> {
 fn validate_chat(req: &ChatCompletionCreateParams) -> Result<()> {
     let _ = req_params_from_openai(&req.params)?;
     ensure!(
-        req.tool_choice == ToolChoice::Simple(ToolChoiceOption::Auto),
-        "only 'auto' option is currently supported for tool_choice"
+        matches!(req.tool_choice, ToolChoice::Simple(_)),
+        "only simple options are currently supported for tool_choice"
     );
     ensure!(
         req.tools.is_empty() || req.params.response_format.is_none(),
@@ -232,11 +232,16 @@ async fn mk_req_info(
 
         req_params.use_logits_post_processor = true;
 
-        if is_chat {
-            tokens.extend_from_slice(&llg.process_prompt(vec![]));
-        } else {
-            tokens = llg.process_prompt(tokens);
-        }
+        // If we do that, we need to make sure we return the tokens forced
+        // by the grammar to the user. Currently we don't have infra for that,
+        // so instead we just start the parser without the prompt.
+        //
+        // if is_chat {
+        //     tokens.extend_from_slice(&llg.process_prompt(vec![]));
+        // } else {
+        //     tokens = llg.process_prompt(tokens);
+        // }
+        llg.start_without_prompt();
 
         let mut r = vec![Box::new(llg)];
         while r.len() < req_params.num_return_sequences as usize {