diff --git a/llgtrt/src/routes/health_check.rs b/llgtrt/src/routes/health_check.rs
index 34f9689..5927ee3 100644
--- a/llgtrt/src/routes/health_check.rs
+++ b/llgtrt/src/routes/health_check.rs
@@ -34,11 +34,13 @@ pub async fn model_check(
     headers: HeaderMap,
     State(app_state): State<Arc<AppState>>,
 ) -> Result<Response, AppError> {
-    let req: CompletionCreateParams = serde_json::from_value(json!({
+    let mut req: CompletionCreateParams = serde_json::from_value(json!({
         "model": "model",
         "prompt": "Hi",
         "max_tokens": 2
     }))?;
+    // set very high priority for this request, so that it returns quickly
+    req.params.priority = Some(10.0);
     let resp = completions::route_completions(headers, State(app_state), Json(req)).await?;
     let status = resp.status();
     let body = axum::body::to_bytes(resp.into_body(), 1024 * 1024).await?;
diff --git a/llgtrt/src/routes/openai.rs b/llgtrt/src/routes/openai.rs
index 1a67913..f2630b2 100644
--- a/llgtrt/src/routes/openai.rs
+++ b/llgtrt/src/routes/openai.rs
@@ -173,6 +173,7 @@ pub struct CommonCreateParams {
     pub logprobs: Option<usize>,
 
     /// Defaults to 0.5. We don't allow it in JSON requests, but can be set internally.
+    /// Setting to higher value like 1.0 or 10.0 will make the request complete faster.
     #[serde(skip)]
     pub priority: Option<f32>,
 }
diff --git a/llguidance b/llguidance
index cfef3df..0ca091a 160000
--- a/llguidance
+++ b/llguidance
@@ -1 +1 @@
-Subproject commit cfef3df97372a7b84d74976ff41cc9cb78bca6cc
+Subproject commit 0ca091a701a50134e0503fa03c5c12b206e182a3
diff --git a/scripts/req.py b/scripts/req.py
old mode 100644
new mode 100755
index 9d8bd15..046a2f5
--- a/scripts/req.py
+++ b/scripts/req.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
 import requests
 import os
 import threading
@@ -247,7 +249,7 @@ def one_round():
 
 
 def main():
-    random.seed(0)
+    # random.seed(0)
     parser = argparse.ArgumentParser()
     parser.add_argument("--max_threads", type=int, default=0)
     parser.add_argument("--sessions", type=int, default=0)
@@ -259,6 +261,7 @@ def main():
         LLG = True
         NUM_THREADS = args.sessions
         PROMPT_SIZE = 2600
+        PROMPT_SIZE = 40_000
         NUM_REPS = 1
         NUM_JOKES = 100
         MAX_TOKENS = 4000
diff --git a/scripts/test-infer.sh b/scripts/test-infer.sh
index a86b81b..2e10b59 100755
--- a/scripts/test-infer.sh
+++ b/scripts/test-infer.sh
@@ -91,6 +91,10 @@ curl -X POST "${TRT_API_BASE}chat/completions" \
     curl -v "${TRT_API_BASE}health/live"
     ;;
 
+   health)
+    curl -v "${TRT_API_BASE}health/model"
+    ;;
+
    ready)
     curl -v "${TRT_API_BASE}health/ready"
     ;;