LazyAGI · wzh1994 · Jun 20, 2024 · Jun 14, 2024 · Jun 14, 2024 · Jun 14, 2024
diff --git a/README.ENG.md b/README.ENG.md
@@ -93,9 +93,10 @@ mweb = lazyllm.WebModule(ppl, port=23456).start().wait()
 
 ```python
 import lazyllm
-from lazyllm import pipeline, parallel, Identity, warp, package
+from lazyllm import pipeline, warp, package, bind
 import time
 import re, json
+from lazyllm.components.formatter import JsonFormatter
 
 toc_prompt="""
 You are now an intelligent assistant. Your task is to understand the user's input and convert the outline into a list of nested dictionaries. Each dictionary contains a `title` and a `describe`, where the `title` should clearly indicate the level using Markdown format, and the `describe` is a description and writing guide for that section.
@@ -134,19 +135,18 @@ This is the expanded content for writing.
 Receive as follows:
 
 """
+
+writer_prompt = {"system": completion_prompt, "user": '{"title": {title}, "describe": {describe}}'}
 ```
 </details>
 
 ```python
-t1 = lazyllm.OnlineChatModule(source="openai", stream=False, prompter=ChatPrompter(instruction=toc_prompt))
-t2 = lazyllm.OnlineChatModule(source="openai", stream=False, prompter=ChatPrompter(instruction=completion_prompt))
-
-spliter = lambda s: tuple(eval(re.search(r'\[\s*\{.*\}\s*\]', s['message']['content'], re.DOTALL).group()))
-writter = pipeline(lambda d: json.dumps(d, ensure_ascii=False), t2, lambda d : d['message']['content'])
-collector = lambda dict_tuple, repl_tuple: "\n".join([v for d in [{**d, "describe": repl_tuple[i]} for i, d in enumerate(dict_tuple)] for v in d.values()])
-m = pipeline(t1, spliter, parallel(Identity, warp(writter)), collector)
+with pipeline() as ppl:
+    ppl.outline_writer = lazyllm.OnlineChatModule(source="openai", stream=False).formatter(JsonFormatter()).prompt(toc_prompt)
+    ppl.story_generater = warp(lazyllm.OnlineChatModule(source="openai", stream=False).prompt(writer_prompt))
+    ppl.synthesizer = (lambda dict_tuple, repl_tuple: "\n".join([v for d in [{**d, "describe": repl_tuple[i]} for i, d in enumerate(dict_tuple)] for v in d.values()])) | bind(ppl.outline_writer, ppl.story_generater)
 
-print(m({'query': 'Please help me write an article about the application of artificial intelligence in the medical field.'}))
+print(ppl({'query': 'Please help me write an article about the application of artificial intelligence in the medical field.'}))
 ```
 
 ## What can LazyLLM do

diff --git a/README.md b/README.md
@@ -90,9 +90,10 @@ mweb = lazyllm.WebModule(ppl, port=23456).start().wait()
 
 ```python
 import lazyllm
-from lazyllm import pipeline, parallel, Identity, warp, package
+from lazyllm import pipeline, warp, package, bind
 import time
 import re, json
+from lazyllm.components.formatter import JsonFormatter
 
 toc_prompt=""" 你现在是一个智能助手。你的任务是理解用户的输入，将大纲以列表嵌套字典的列表。每个字典包含一个 `title` 和 `describe`，其中 `title` 中需要用Markdown格式标清层级，`describe` `describe` 是对该段的描述和写作指导。
 
@@ -129,19 +130,18 @@ completion_prompt="""
 接收如下：
 
 """
+
+writer_prompt = {"system": completion_prompt, "user": '{"title": {title}, "describe": {describe}}'}
 ```
 </details>
 
 ```python
-t1 = lazyllm.OnlineChatModule(source="openai", stream=False, prompter=ChatPrompter(instruction=toc_prompt))
-t2 = lazyllm.OnlineChatModule(source="openai", stream=False, prompter=ChatPrompter(instruction=completion_prompt))
-
-spliter = lambda s: tuple(eval(re.search(r'\[\s*\{.*\}\s*\]', s['message']['content'], re.DOTALL).group()))
-writter = pipeline(lambda d: json.dumps(d, ensure_ascii=False), t2, lambda d : d['message']['content'])
-collector = lambda dict_tuple, repl_tuple: "\n".join([v for d in [{**d, "describe": repl_tuple[i]} for i, d in enumerate(dict_tuple)] for v in d.values()])
-m = pipeline(t1, spliter, parallel(Identity, warp(writter)), collector)
+with pipeline() as ppl:
+    ppl.outline_writer = lazyllm.OnlineChatModule(source="openai", stream=False).formatter(JsonFormatter()).prompt(toc_prompt)
+    ppl.story_generater = warp(lazyllm.OnlineChatModule(source="openai", stream=False).prompt(writer_prompt))
+    ppl.synthesizer = (lambda dict_tuple, repl_tuple: "\n".join([v for d in [{**d, "describe": repl_tuple[i]} for i, d in enumerate(dict_tuple)] for v in d.values()])) | bind(ppl.outline_writer, ppl.story_generater)
 
-print(m({'query':'请帮我写一篇关于人工智能在医疗领域应用的文章。'}))
+print(ppl({'query':'请帮我写一篇关于人工智能在医疗领域应用的文章。'}))
 ```
 
 ## 四、功能点

diff --git a/docs/source/api/components.rst b/docs/source/api/components.rst
@@ -60,3 +60,18 @@ ModelDownloader
 .. autoclass:: lazyllm.components.ModelDownloader
     :members: 
     :exclude-members:
+
+Formatter
+==========
+
+.. autoclass:: lazyllm.components.formatter.LazyLLMFormatterBase
+    :members:
+    :exclude-members:
+
+.. autoclass:: lazyllm.components.JsonFormatter
+    :members:
+    :exclude-members:
+
+.. autoclass:: lazyllm.components.EmptyFormatter
+    :members:
+    :exclude-members:
diff --git a/lazyllm/__init__.py b/lazyllm/__init__.py
@@ -7,7 +7,8 @@
                    Loop as loop, Switch as switch, IFS as ifs, Warp as warp)
 from .components import (LazyLLMDataprocBase, LazyLLMFinetuneBase, LazyLLMDeployBase,
                          LazyLLMValidateBase, register as component_register, Prompter,
-                         AlpacaPrompter, ChatPrompter, FastapiApp)
+                         AlpacaPrompter, ChatPrompter, FastapiApp, JsonFormatter)
+
 from .module import (ModuleBase, UrlModule, TrainableModule, ActionModule,
                      ServerModule, TrialModule, register as module_register,
                      OnlineChatModule, OnlineEmbeddingModule)
@@ -33,6 +34,7 @@
     'AlpacaPrompter',
     'ChatPrompter',
     'FastapiApp',
+    'JsonFormatter',
 
     # flow
     'LazyLLMFlowsBase',            # pipeline, parallel

diff --git a/lazyllm/common/common.py b/lazyllm/common/common.py
@@ -334,19 +334,19 @@ class LazyLlmRequest(struct):
 
     def split(self, flag=None):
         if flag is None:
-            assert len(self.kwargs) == 0 and isinstance(self.input, tuple), (
+            assert len(self.kwargs) == 0 and isinstance(self.input, (tuple, list)), (
                 f'Only tuple input can be split automatically, your input is {self.input} <{type(self.input)}>')
             return [LazyLlmRequest(input=inp, global_parameters=self.global_parameters) for inp in self.input]
         elif isinstance(flag, int):
-            assert len(self.kwargs) == 0 and isinstance(self.input, tuple), (
+            assert len(self.kwargs) == 0 and isinstance(self.input, (tuple, list)), (
                 f'Only tuple input can be split automatically, your input is {self.input} <{type(self.input)}>')
             assert flag == len(self.input), 'input size mismatch with split number'
             return [LazyLlmRequest(input=inp, global_parameters=self.global_parameters) for inp in self.input]
         elif isinstance(flag, list):
             if isinstance(self.input, dict):
                 assert len(self.kwargs) == 0, 'Cannot provived input and kwargs at the same time for split'
                 d = self.input
-            elif isinstance(self.input, tuple):
+            elif isinstance(self.input, (tuple, list)):
                 return self.split(len(flag))
             else:
                 assert not self.input, 'Cannot provived input and kwargs at the same time for split'

diff --git a/lazyllm/common/logger.py b/lazyllm/common/logger.py
@@ -18,7 +18,7 @@
     "log_format",
     str,
     "{process}: <green>{time:YYYY-MM-DD HH:mm:ss}</green> {extra[name]} "
-    "<level>{level}</level>: ({name}) <cyan>{message}</cyan>",
+    "<level>{level}</level>: ({name}:{line}) <cyan>{message}</cyan>",
     "LOG_FORMAT",
 )
 lazyllm.config.add("log_dir", str, "~/.lazyllm", "LOG_DIR")

diff --git a/lazyllm/components/__init__.py b/lazyllm/components/__init__.py
@@ -6,6 +6,7 @@
 from .validate import LazyLLMValidateBase
 from .auto import AutoDeploy, AutoFinetune
 from .utils import ModelDownloader
+from .formatter import FormatterBase, EmptyFormatter, JsonFormatter
 
 __all__ = [
     'register',
@@ -19,5 +20,8 @@
     'FastapiApp',
     'AutoDeploy',
     'AutoFinetune',
-    'ModelDownloader'
+    'ModelDownloader',
+    'FormatterBase',
+    'EmptyFormatter',
+    'JsonFormatter'
 ]
diff --git a/lazyllm/components/formatter/__init__.py b/lazyllm/components/formatter/__init__.py
@@ -0,0 +1,10 @@
+from .formatterBase import LazyLLMFormatterBase, LazyLLMFormatterBase as FormatterBase, EmptyFormatter
+from .jsonFormatter import JsonFormatter
+
+
+__all__ = [
+    'LazyLLMFormatterBase',
+    'FormatterBase',
+    'EmptyFormatter',
+    'JsonFormatter'
+]
diff --git a/lazyllm/components/formatter/formatterBase.py b/lazyllm/components/formatter/formatterBase.py
@@ -0,0 +1,50 @@
+from ...common import LazyLLMRegisterMetaClass
+
+def is_number(s: str):
+    try:
+        int(s)
+        return True
+    except ValueError:
+        if s == "None" or len(s) == 0:
+            return False
+        else:
+            raise ValueError("Invalid number: " + s + ". You can enter an integer, None or an empyt string.")
+
+class LazyLLMFormatterBase(metaclass=LazyLLMRegisterMetaClass):
+    def __init__(self, formatter: str = None):
+        self._formatter = formatter
+        if self._formatter:
+            self._parse_formatter()
+        else:
+            self._slices = None
+
+    def _parse_formatter(self):
+        # Remove the surrounding brackets
+        slice_str = self._formatter.strip()[1:-1]
+        dimensions = slice_str.split(",")
+        slices = []
+
+        for dim in dimensions:
+            if ":" in dim:
+                parts = dim.split(":")
+                start = int(parts[0]) if is_number(parts[0]) else None
+                end = int(parts[1]) if len(parts) > 1 and is_number(parts[1]) else None
+                step = int(parts[2]) if len(parts) > 2 and is_number(parts[2]) else None
+                slices.append(slice(start, end, step))
+            else:
+                slices.append(dim.strip())
+        self._slices = slices
+
+    def _load(self, msg: str):
+        raise NotImplementedError("This parse str function is not implemented.")
+
+    def _parse_py_data_by_formatter(self, py_data):
+        raise NotImplementedError("This data parse function is not implemented.")
+
+    def format(self, msg):
+        if isinstance(msg, str): msg = self._load(msg)
+        return self._parse_py_data_by_formatter(msg)
+
+class EmptyFormatter(LazyLLMFormatterBase):
+    def format(self, msg):
+        return msg
diff --git a/lazyllm/components/formatter/jsonFormatter.py b/lazyllm/components/formatter/jsonFormatter.py
@@ -0,0 +1,57 @@
+import json
+from .formatterBase import LazyLLMFormatterBase as FormatterBase
+import lazyllm
+
+class JsonFormatter(FormatterBase):
+    def _extract_json_from_string(self, mixed_str: str):
+        json_objects = []
+        brace_level = 0
+        current_json = ""
+        in_string = False
+
+        for char in mixed_str:
+            if char == '"' and (len(current_json) == 0 or current_json[-1] != '\\'):
+                in_string = not in_string
+
+            if not in_string:
+                if char == '{':
+                    if brace_level == 0:
+                        current_json = ""
+                    brace_level += 1
+                elif char == '}':
+                    brace_level -= 1
+
+            if brace_level > 0 or (brace_level == 0 and char == '}'):
+                current_json += char
+
+            if brace_level == 0 and current_json:
+                try:
+                    json.loads(current_json)
+                    json_objects.append(current_json)
+                    current_json = ""
+                except json.JSONDecodeError:
+                    continue
+
+        return json_objects
+
+    def _load(self, msg: str):
+        # Convert str to json format
+        assert msg.count("{") == msg.count("}"), f"{msg} is not a valid json string."
+        try:
+            json_strs = self._extract_json_from_string(msg)
+            if len(json_strs) == 0:
+                raise TypeError(f"{msg} is not a valid json string.")
+            res = []
+            for json_str in json_strs:
+                res.append(json.loads(json_str))
+            return res if len(res) > 1 else res[0]
+        except Exception as e:
+            lazyllm.LOG.info(f"Error: {e}")
+            return ""
+
+    def _parse_py_data_by_formatter(self, data, *, slices=None):
+        if slices is None: slices = self._slices
+        if not slices: return data
+        if isinstance(slices[0], slice): return [self._parse_py_data_by_formatter(d, slices=slices[1:])
+                                                 for d in data[slices[0]]]
+        else: return self._parse_py_data_by_formatter(data[slices[0]], slices=slices[1:])
diff --git a/lazyllm/components/prompter/alpacaPrompter.py b/lazyllm/components/prompter/alpacaPrompter.py
@@ -1,15 +1,21 @@
-from typing import List, Union, Optional
+from typing import List, Union, Optional, Dict
 from .builtinPrompt import LazyLLMPrompterBase
 
 class AlpacaPrompter(LazyLLMPrompterBase):
-    def __init__(self, instruction: Union[None, str] = None,
+    def __init__(self, instruction: Union[None, str, Dict[str, str]] = None,
                  extro_keys: Union[None, List[str]] = None, show: bool = False, tools: Optional[List] = None):
         super(__class__, self).__init__(show, tools=tools)
+        if isinstance(instruction, dict):
+            splice_struction = instruction.get("system", "") + \
+                AlpacaPrompter.ISA + instruction.get("user", "") + AlpacaPrompter.ISE
+            instruction = splice_struction
         instruction_template = ("Below is an instruction that describes a task, paired with extra messages such as "
                                 "input that provides further context if possible. Write a response that "
                                 f"appropriately completes the request.\n\n ### Instruction:\n{instruction}"
                                 "\n\n" + LazyLLMPrompterBase._get_extro_key_template(extro_keys))
-        self._init_prompt("{system}\n{instruction}\n{tools}### Response:\n", instruction_template, "### Response:")
+        self._init_prompt("{system}\n{instruction}\n{tools}\n{user}### Response:\n",
+                          instruction_template,
+                          "### Response:")
 
     def _check_values(self, instruction, input, history, tools):
         assert not history, f"Chat history is not supported in {__class__}."

diff --git a/lazyllm/components/prompter/builtinPrompt.py b/lazyllm/components/prompter/builtinPrompt.py
@@ -1,10 +1,14 @@
 from typing import Dict, Union, Any, List, Callable, Optional
 from ...common import LazyLLMRegisterMetaClass
 from lazyllm import LOG
+from functools import reduce
 import json
 import re
 
 class LazyLLMPrompterBase(metaclass=LazyLLMRegisterMetaClass):
+    ISA = "<!lazyllm-spliter!>"
+    ISE = "</!lazyllm-spliter!>"
+
     def __init__(self, show=False, tools=None):
         self._set_model_configs(system='You are an AI-Agent developed by LazyLLM.', sos='<|start_system|>',
                                 soh='<|Human|>:', soa='<|Assistant|>:', eos='<|end_system|>', eoh='', eoa='')
@@ -74,15 +78,23 @@ def _get_instruction_and_input(self, input):
                 return self._instruction_template, input
         assert isinstance(input, dict)
         kwargs = {k: input.pop(k) for k in prompt_keys}
-        assert len(input) <= 1, f'Unexpected keys found in input: {list(input.keys())}'
-        return (self._instruction_template.format(**kwargs) if len(kwargs) > 0 else self._instruction_template,
-                list(input.values())[0] if input else '')
+        assert len(input) <= 1, f"Unexpected keys found in input: {list(input.keys())}"
+        # instruction = reduce(lambda s, kv: s.replace(f"{{{kv[0]}}}", kv[1]),
+        #                                     kwargs.items(),
+        #                                     self._instruction_template)\
+        #     if len(kwargs) > 0 else self._instruction_template
+        # return (instruction, list(input.values())[0] if input else "")
+        return (reduce(lambda s, kv: s.replace(f"{{{kv[0]}}}", kv[1]),
+                       kwargs.items(),
+                       self._instruction_template)
+                if len(kwargs) > 0 else self._instruction_template,
+                list(input.values())[0] if input else "")
 
     def _check_values(self, instruction, input, history, tools): pass
 
     # Used for TrainableModule(local deployed)
     def _generate_prompt_impl(self, instruction, input, history, tools, label):
-        params = dict(system=self._system, instruction=instruction, input=input, history=history, tools=tools,
+        params = dict(system=self._system, instruction=instruction, user=input, history=history, tools=tools,
                       sos=self._sos, eos=self._eos, soh=self._soh, eoh=self._eoh, soa=self._soa, eoa=self._eoa)
         return self._template.format(**params) + (label if label else '')
 
@@ -105,6 +117,18 @@ def pre_hook(self, func: Optional[Callable] = None):
         self._pre_hook = func
         return self
 
+    def _split_instruction(self, instruction: str):
+        system_instruction = instruction
+        user_instruction = ""
+        if LazyLLMPrompterBase.ISA in instruction and LazyLLMPrompterBase.ISE in instruction:
+            # The instruction includes system prompts and/or user prompts
+            pattern = re.compile(r"%s(.*)%s" % (LazyLLMPrompterBase.ISA, LazyLLMPrompterBase.ISE))
+            ret = re.split(pattern, instruction)
+            system_instruction = ret[0]
+            user_instruction = ret[1]
+
+        return system_instruction, user_instruction
+
     def generate_prompt(self, input: Union[str, Dict[str, str], None] = None,
                         history: List[Union[List[str], Dict[str, Any]]] = None,
                         tools: Union[List[Dict[str, Any]], None] = None,
@@ -116,6 +140,8 @@ def generate_prompt(self, input: Union[str, Dict[str, str], None] = None,
         history = self._get_histories(history, return_dict=return_dict)
         tools = self._get_tools(tools, return_dict=return_dict)
         self._check_values(instruction, input, history, tools)
+        instruction, user_instruction = self._split_instruction(instruction)
+        input = user_instruction + input
         func = self._generate_prompt_dict_impl if return_dict else self._generate_prompt_impl
         result = func(instruction, input, history, tools, label)
         if self._show or show: LOG.info(result)

diff --git a/lazyllm/components/prompter/chatPrompter.py b/lazyllm/components/prompter/chatPrompter.py
@@ -1,10 +1,14 @@
-from typing import List, Union, Optional
+from typing import List, Union, Optional, Dict
 from .builtinPrompt import LazyLLMPrompterBase
 
 class ChatPrompter(LazyLLMPrompterBase):
-    def __init__(self, instruction: Union[None, str] = None,
+    def __init__(self, instruction: Union[None, str, Dict[str, str]] = None,
                  extro_keys: Union[None, List[str]] = None, show: bool = False, tools: Optional[List] = None):
         super(__class__, self).__init__(show, tools=tools)
+        if isinstance(instruction, dict):
+            splice_instruction = instruction.get("system", "") + \
+                ChatPrompter.ISA + instruction.get("user", "") + ChatPrompter.ISE
+            instruction = splice_instruction
         instruction_template = f'{instruction}\n{{extro_keys}}\n'.replace(
             '{extro_keys}', LazyLLMPrompterBase._get_extro_key_template(extro_keys))
         self._init_prompt("{sos}{system}{instruction}{tools}{eos}\n\n{history}\n{soh}\n{input}\n{eoh}{soa}\n",