Fix asynchronous mode is not working

Turns out swapping YCloudML with AsyncYCloudML doesn't mean it uses real asynchronous mode 😬
black-roland · Dec 14, 2024 · 2c93902 · 2c93902
1 parent 0d8ea7c
commit 2c93902
Show file tree

Hide file tree

Showing 5 changed files with 19 additions and 1 deletion.
diff --git a/custom_components/yandexgpt_conversation/config_flow.py b/custom_components/yandexgpt_conversation/config_flow.py
@@ -30,6 +30,7 @@
 )
 
 from .const import (
+    CONF_ASYNCHRONOUS_MODE,
     CONF_CHAT_MODEL,
     CONF_FOLDER_ID,
     CONF_MAX_TOKENS,
@@ -219,6 +220,11 @@ def yandexgpt_config_option_schema(
                 description={"suggested_value": options.get(CONF_TEMPERATURE)},
                 default=RECOMMENDED_TEMPERATURE,
             ): NumberSelector(NumberSelectorConfig(min=0, max=1, step=0.05)),
+            vol.Optional(
+                CONF_ASYNCHRONOUS_MODE,
+                description={"suggested_value": options.get(CONF_ASYNCHRONOUS_MODE)},
+                default=options.get(CONF_ASYNCHRONOUS_MODE, False),
+            ): bool,
             vol.Optional(
                 CONF_MAX_TOKENS,
                 description={"suggested_value": options.get(CONF_MAX_TOKENS)},

diff --git a/custom_components/yandexgpt_conversation/const.py b/custom_components/yandexgpt_conversation/const.py
@@ -16,6 +16,7 @@
 CONF_TEMPERATURE = "temperature"
 CONF_CHAT_MODEL = "chat_model"
 CONF_MODEL_VERSION = "model_version"
+CONF_ASYNCHRONOUS_MODE = "asynchronous_mode"
 DEFAULT_CHAT_MODEL = "yandexgpt-lite"
 DEFAULT_MODEL_VERSION = "latest"
 RECOMMENDED_MAX_TOKENS = 1024

diff --git a/custom_components/yandexgpt_conversation/conversation.py b/custom_components/yandexgpt_conversation/conversation.py
@@ -23,6 +23,7 @@
 
 from .const import (
     BASE_PROMPT_RU,
+    CONF_ASYNCHRONOUS_MODE,
     CONF_CHAT_MODEL,
     CONF_MAX_TOKENS,
     CONF_MODEL_VERSION,
@@ -199,7 +200,7 @@ async def async_process(
 
         try:
             model = client.models.completions(model_name, model_version=model_ver)
-            result = await model.configure(**model_conf).run(messages)
+            result = await self.run_completion(model.configure(**model_conf), messages)
         except Exception as err:
             LOGGER.exception(err)
 
@@ -221,3 +222,11 @@ async def async_process(
         return conversation.ConversationResult(
             response=intent_response, conversation_id=conversation_id
         )
+
+    async def run_completion(self, model, messages):
+        if not self.entry.options.get(CONF_ASYNCHRONOUS_MODE, False):
+            return await model.run(messages)
+
+        operation = await model.run_deferred(messages)
+        LOGGER.debug("Async operation ID: %s", operation.id)
+        return await operation.wait(poll_timeout=300, poll_interval=0.5)
diff --git a/custom_components/yandexgpt_conversation/translations/en.json b/custom_components/yandexgpt_conversation/translations/en.json
@@ -19,6 +19,7 @@
           "recommended": "Recommended model settings",
           "model_version": "Version",
           "temperature": "Temperature",
+          "asynchronous_mode": "Asynchronous mode",
           "max_tokens": "Maximum tokens to return in response"
         },
         "data_description": {

diff --git a/custom_components/yandexgpt_conversation/translations/ru.json b/custom_components/yandexgpt_conversation/translations/ru.json
@@ -19,6 +19,7 @@
           "recommended": "Рекомендуемые настройки модели",
           "model_version": "Версия",
           "temperature": "Температура",
+          "asynchronous_mode": "Асинхронный режим",
           "max_tokens": "Ограничить количество токенов в ответе"
         },
         "data_description": {