Merge pull request #18 from btc-raspberrypiclub/15-use-chat-api

Migrate from Ollama `generate` api to `chat` api
btc-raspberrypiclub · Jun 7, 2024 · d0bf781 · d0bf781
2 parents 2bcd2c7 + 8282a9c
commit d0bf781
Showing 1 changed file with 8 additions and 38 deletions.
diff --git a/app/bot/llm.py b/app/bot/llm.py
@@ -17,7 +17,7 @@ async def generate_response(
     system_prompt: str = _botconf.botconfig.system_prompt,
     auto_pull_model: bool = _botconf.botconfig.auto_pull_model # TODO: Use this again
 ) -> str | None:
-    url = f"http://{_globalconf.LLM_HOST}:{_globalconf.LLM_PORT}/api/generate"
+    url = f"http://{_globalconf.LLM_HOST}:{_globalconf.LLM_PORT}/api/chat"
     logger.info(f"url: {url}")
 
     # While receiving responses, show typing status
@@ -28,10 +28,13 @@ async def generate_response(
             try:
                 async with cs.post(url, json={
                     "model": _botconf.botconfig.llm_model,
-                    "prompt": message.content,
-                    "system": system_prompt,
-                    "context": [],
                     "stream": False,
+                    "messages": [
+                        {"role": "system",
+                        "content": system_prompt},
+                        {"role": "user",
+                        "content": message.content},
+                    ],
                 }) as res:
                     data = await res.json()
                     if "error" in data:
@@ -42,46 +45,13 @@ async def generate_response(
                     dur = data["total_duration"] / 1_000_000_000
                     logger.info(f"response took {dur:.3f} seconds")
 
-                    return data["response"]
+                    return data["message"]["content"]
 
 
             except Exception as e:
-                #err_res = e.response
-                #if not isinstance(err_res, requests.models.Response):
-                    #raise e
-
                 logger.error(f"{e}\nType: {type(e)}")
 
-                #if err_res.status_code == 404 and auto_pull_model:
-                    #pull_model(_botconf.botconfig.llm_model)
-
                 return None
-            #except ConnectionError as e:
-                #log_print(f"Ollama server unavailable at {url}")
-                #return None
-
-        #response = ""
-            ## Loop through tokens as they are streamed in
-        #for line in r.iter_lines():
-            #body = json.loads(line) # Parse response as json
-
-            ## Append token to response
-            #response_part = body.get("response", "")
-            #response += response_part
-
-            ## Raise error if present
-            #if "error" in body:
-                #raise Exception(body["error"])
-
-            ## Return response if done
-            #if body.get("done", False):
-                #log_print("Done typing (done)")
-                #return response
-
-        #log_print("Done typing (no more lines)")
-
-        ## Return response
-        #return None
 
 
 #def pull_model(model: str):