Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Online chat formatter #8

Merged
merged 23 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions docs/source/api/components.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,18 @@ ModelDownloader
.. autoclass:: lazyllm.components.ModelDownloader
:members:
:exclude-members:

Formatter
==========

.. autoclass:: lazyllm.components.formatter.LazyLLMFormatterBase
:members:
:exclude-members:

.. autoclass:: lazyllm.components.JsonFormatter
:members:
:exclude-members:

.. autoclass:: lazyllm.components.EmptyFormatter
:members:
:exclude-members:
4 changes: 3 additions & 1 deletion lazyllm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
Loop as loop, Switch as switch, IFS as ifs, Warp as warp)
from .components import (LazyLLMDataprocBase, LazyLLMFinetuneBase, LazyLLMDeployBase,
LazyLLMValidateBase, register as component_register, Prompter,
AlpacaPrompter, ChatPrompter, FastapiApp)
AlpacaPrompter, ChatPrompter, FastapiApp, JsonFormatter)

from .module import (ModuleBase, UrlModule, TrainableModule, ActionModule,
ServerModule, TrialModule, register as module_register,
OnlineChatModule, OnlineEmbeddingModule)
Expand All @@ -33,6 +34,7 @@
'AlpacaPrompter',
'ChatPrompter',
'FastapiApp',
'JsonFormatter',

# flow
'LazyLLMFlowsBase', # pipeline, parallel
Expand Down
6 changes: 5 additions & 1 deletion lazyllm/components/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .validate import LazyLLMValidateBase
from .auto import AutoDeploy, AutoFinetune
from .utils import ModelDownloader
from .formatter import FormatterBase, EmptyFormatter, JsonFormatter

__all__ = [
'register',
Expand All @@ -19,5 +20,8 @@
'FastapiApp',
'AutoDeploy',
'AutoFinetune',
'ModelDownloader'
'ModelDownloader',
'FormatterBase',
'EmptyFormatter',
'JsonFormatter'
]
10 changes: 10 additions & 0 deletions lazyllm/components/formatter/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from .formatterBase import LazyLLMFormatterBase, LazyLLMFormatterBase as FormatterBase, EmptyFormatter
from .jsonFormatter import JsonFormatter


__all__ = [
'LazyLLMFormatterBase',
'FormatterBase',
'EmptyFormatter',
'JsonFormatter'
]
44 changes: 44 additions & 0 deletions lazyllm/components/formatter/formatterBase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from ...common import LazyLLMRegisterMetaClass

class LazyLLMFormatterBase(metaclass=LazyLLMRegisterMetaClass):
def __init__(self, formatter: str = None):
self._formatter = formatter
if self._formatter:
self._parse_formatter()
else:
self._slices = None

def _parse_formatter(self):
# Remove the surrounding brackets
slice_str = self._formatter.strip()[1:-1]
dimensions = slice_str.split(",")
slices = []

for dim in dimensions:
if ":" in dim:
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
parts = dim.split(":")
start = int(parts[0]) if parts[0] else None
end = int(parts[1]) if len(parts) > 1 and parts[1] else None
step = int(parts[2]) if len(parts) > 2 and parts[2] else None
slices.append(slice(start, end, step))
else:
slices.append(dim.strip())
self._slices = slices

def _str_to_python(self, msg: str):
raise NotImplementedError("This str to python convert function is not implemented.")

def _parse_py_data_by_formatter(self, py_data):
raise NotImplementedError("This data parse function is not implemented.")

def format(self, msg):
if isinstance(msg, str):
py_data = self._str_to_python(msg)
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
else:
py_data = msg
res = self._parse_py_data_by_formatter(py_data)
return res

class EmptyFormatter(LazyLLMFormatterBase):
def format(self, msg):
return msg
76 changes: 76 additions & 0 deletions lazyllm/components/formatter/jsonFormatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import json
from .formatterBase import LazyLLMFormatterBase as FormatterBase
import lazyllm
from typing import List, Dict

class JsonFormatter(FormatterBase):
def _extract_json_from_string(self, mixed_str: str):
json_objects = []
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
brace_level = 0
current_json = ""
in_string = False

for char in mixed_str:
if char == '"' and (len(current_json) == 0 or current_json[-1] != '\\'):
in_string = not in_string

if not in_string:
if char == '{':
if brace_level == 0:
current_json = ""
brace_level += 1
elif char == '}':
brace_level -= 1

if brace_level > 0 or (brace_level == 0 and char == '}'):
current_json += char

if brace_level == 0 and current_json:
try:
json.loads(current_json)
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
json_objects.append(current_json)
current_json = ""
except json.JSONDecodeError:
continue

return json_objects

def _str_to_python(self, msg: str):
# Convert str to json format
assert msg.count("{") == msg.count("}"), f"{msg} is not a valid json string."
try:
json_strs = self._extract_json_from_string(msg)
if len(json_strs) == 0:
raise TypeError(f"{msg} is not a valid json string.")
res = []
for json_str in json_strs:
res.append(json.loads(json_str))
return res if len(res) > 1 else res[0]
except Exception as e:
lazyllm.LOG.info(f"Error: {e}")
return ""

def _parse_py_data_by_formatter(self, data):
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
if self._slices is None:
return data
else:
result = data
try:
for s in self._slices:
if isinstance(s, slice):
result = result[s]
elif isinstance(s, str):
if isinstance(result, List):
res = [val[s] for val in result]
result = res if len(res) > 1 else res[0]
elif isinstance(result, Dict):
result = result[s]
else:
raise TypeError(f"{result} is not support {s} index.")
else:
raise TypeError(f"This class is not support {s} index.")
except Exception as e:
lazyllm.LOG.error(f"{e}")
return ""

return result[0] if len(result) == 1 and isinstance(result, list) else result
56 changes: 56 additions & 0 deletions lazyllm/docs/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,62 @@ def test_prompter():
>>> downloader.download('GLM3-6B')
''')

# ============= Formatter

# FormatterBase
add_chinese_doc('formatter.FormatterBase', '''\
此类是格式化器的基类,格式化器是模型输出结果的格式化器,用户可以自定义格式化器,也可以使用LazyLLM提供的格式化器。
主要方法:_parse_formatter:解析索引内容。_str_to_python:把字符串转化为python对象。_parse_py_data_by_formatter:根据自定义的格式化器和索引对python对象进行格式化。format:对传入的内容进行格式化,如果内容是字符串类型,先将字符串转化为python对象,再进行格式化。如果内容是python对象,直接进行格式化。
''')

add_english_doc('formatter.FormatterBase', '''\
This class is the base class of the formatter. The formatter is the formatter of the model output result. Users can customize the formatter or use the formatter provided by LazyLLM.
Main methods: _parse_formatter: parse the index content. _str_to_python: convert the string into a python object. _parse_py_data_by_formatter: format the python object according to the custom formatter and index. format: format the passed content. If the content is a string type, convert the string into a python object first, and then format it. If the content is a python object, format it directly.
''')

add_example('formatter.FormatterBase', '''\
>>> from lazyllm.components.formatter import FormatterBase
>>> class MyFormatter(LazyLLMFormatterBase):
... def _str_to_python(self, data):
... return custom(data)
...
... def _parse_py_data_by_formatter(self, data, formatter):
... return custom(data, formatter)
...
>>> fmt = MyFormatter("[:]")
>>> fmt.format("[1,2,3]")
[1,2,3]
''')

# JsonFormatter
add_chinese_doc('JsonFormatter', '''\
此类是JSON格式化器,即用户希望模型输出的内容格式为JSON,还可以通过索引方式对输出内容中的某个字段进行选择。
''')

add_english_doc('JsonFormatter', '''\
This class is a JSON formatter, that is, the user wants the model to output content is JSON format, and can also select a field in the output content by indexing.
''')

add_example('JsonFormatter', '''\
>>> from lazyllm.components import JsonFormatter
>>> jsonFormatter=JsonFormatter("[:, title]") # ":" represents all elements in a list. "title" represents the "title" field in the json data.
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
>>> model.formatter(jsonFormatter)
''')

# EmptyFormatter
add_chinese_doc('EmptyFormatter', '''\
此类是空的格式化器,即用户希望对模型的输出不做格式化,用户可以对模型指定该格式化器,也可以不指定(模型默认的格式化器就是空格式化器)
''')

add_english_doc('EmptyFormatter', '''\
This type is the system default formatter. When the user does not specify anything or does not want to format the model output, this type is selected. The model output will be in the same format.
''')

add_example('EmptyFormatter', '''\
>>> from lazyllm.components import EmptyFormatter
>>> emptyFormatter = EmptyFormatter()
>>> model.formatter(emptyFormatter)
''')

# ============= Prompter

Expand Down
6 changes: 1 addition & 5 deletions lazyllm/module/onlineChatModule/onlineChatModule.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ class OnlineChatModule(metaclass=_ChatModuleMeta):
@staticmethod
def _encapsulate_parameters(base_url: str,
model: str,
system_prompt: str,
stream: bool,
return_trace: bool,
**kwargs) -> Dict[str, Any]:
Expand All @@ -29,8 +28,6 @@ def _encapsulate_parameters(base_url: str,
params['base_url'] = base_url
if model is not None:
params['model'] = model
if system_prompt is not None:
params['system_prompt'] = system_prompt
params.update(kwargs)

return params
Expand All @@ -39,11 +36,10 @@ def __new__(self,
source: str,
base_url: str = None,
model: str = None,
system_prompt: str = None,
stream: bool = True,
return_trace: bool = False,
**kwargs):
params = OnlineChatModule._encapsulate_parameters(base_url, model, system_prompt, stream, return_trace, **kwargs)
params = OnlineChatModule._encapsulate_parameters(base_url, model, stream, return_trace, **kwargs)

if source.lower() == "openai":
return OpenAIModule(**params)
Expand Down
Loading