Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Online chat formatter #8

Merged
merged 23 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions README.ENG.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ import lazyllm
from lazyllm import pipeline, parallel, Identity, warp, package
import time
import re, json
from lazyllm.components.formatter import JsonFormatter

toc_prompt="""
You are now an intelligent assistant. Your task is to understand the user's input and convert the outline into a list of nested dictionaries. Each dictionary contains a `title` and a `describe`, where the `title` should clearly indicate the level using Markdown format, and the `describe` is a description and writing guide for that section.
Expand Down Expand Up @@ -134,11 +135,12 @@ Receive as follows:
</details>

```python
t1 = lazyllm.OnlineChatModule(source="openai", stream=False, prompter=ChatPrompter(instruction=toc_prompt))
t2 = lazyllm.OnlineChatModule(source="openai", stream=False, prompter=ChatPrompter(instruction=completion_prompt))
jsonFormatter = JsonFormatter("[:]")
t1 = lazyllm.OnlineChatModule(source="openai", stream=False).formatter(jsonFormatter).prompt(ChatPrompter(instruction=toc_prompt))
t2 = lazyllm.OnlineChatModule(source="openai", stream=False).prompt(ChatPrompter(instruction=completion_prompt))

spliter = lambda s: tuple(eval(re.search(r'\[\s*\{.*\}\s*\]', s['message']['content'], re.DOTALL).group()))
writter = pipeline(lambda d: json.dumps(d, ensure_ascii=False), t2, lambda d : d['message']['content'])
warper = lambda s: tuple(s)
writter = pipeline(lambda d: json.dumps(d, ensure_ascii=False), t2, lambda d : d)
collector = lambda dict_tuple, repl_tuple: "\n".join([v for d in [{**d, "describe": repl_tuple[i]} for i, d in enumerate(dict_tuple)] for v in d.values()])
m = pipeline(t1, spliter, parallel(Identity, warp(writter)), collector)

Expand Down
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ import lazyllm
from lazyllm import pipeline, parallel, Identity, warp, package
import time
import re, json
from lazyllm.components.formatter import JsonFormatter

toc_prompt=""" 你现在是一个智能助手。你的任务是理解用户的输入,将大纲以列表嵌套字典的列表。每个字典包含一个 `title` 和 `describe`,其中 `title` 中需要用Markdown格式标清层级,`describe` `describe` 是对该段的描述和写作指导。

Expand Down Expand Up @@ -129,11 +130,12 @@ completion_prompt="""
</details>

```python
t1 = lazyllm.OnlineChatModule(source="openai", stream=False, prompter=ChatPrompter(instruction=toc_prompt))
t2 = lazyllm.OnlineChatModule(source="openai", stream=False, prompter=ChatPrompter(instruction=completion_prompt))
jsonFormatter = JsonFormatter("[:]")
t1 = lazyllm.OnlineChatModule(source="openai", stream=False).formatter(jsonFormatter).prompt(ChatPrompter(instruction=toc_prompt))
t2 = lazyllm.OnlineChatModule(source="openai", stream=False).prompt(ChatPrompter(instruction=completion_prompt))

spliter = lambda s: tuple(eval(re.search(r'\[\s*\{.*\}\s*\]', s['message']['content'], re.DOTALL).group()))
writter = pipeline(lambda d: json.dumps(d, ensure_ascii=False), t2, lambda d : d['message']['content'])
warper = lambda s: tuple(s)
writter = pipeline(lambda d: json.dumps(d, ensure_ascii=False), t2, lambda d : d)
collector = lambda dict_tuple, repl_tuple: "\n".join([v for d in [{**d, "describe": repl_tuple[i]} for i, d in enumerate(dict_tuple)] for v in d.values()])
m = pipeline(t1, spliter, parallel(Identity, warp(writter)), collector)

Expand Down
15 changes: 15 additions & 0 deletions docs/source/api/components.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,18 @@ ModelDownloader
.. autoclass:: lazyllm.components.ModelDownloader
:members:
:exclude-members:

Formatter
==========

.. autoclass:: lazyllm.components.formatter.LazyLLMFormatterBase
:members:
:exclude-members:

.. autoclass:: lazyllm.components.JsonFormatter
:members:
:exclude-members:

.. autoclass:: lazyllm.components.EmptyFormatter
:members:
:exclude-members:
4 changes: 3 additions & 1 deletion lazyllm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
Loop as loop, Switch as switch, IFS as ifs, Warp as warp)
from .components import (LazyLLMDataprocBase, LazyLLMFinetuneBase, LazyLLMDeployBase,
LazyLLMValidateBase, register as component_register, Prompter,
AlpacaPrompter, ChatPrompter, FastapiApp)
AlpacaPrompter, ChatPrompter, FastapiApp, JsonFormatter)

from .module import (ModuleBase, UrlModule, TrainableModule, ActionModule,
ServerModule, TrialModule, register as module_register,
OnlineChatModule, OnlineEmbeddingModule)
Expand All @@ -33,6 +34,7 @@
'AlpacaPrompter',
'ChatPrompter',
'FastapiApp',
'JsonFormatter',

# flow
'LazyLLMFlowsBase', # pipeline, parallel
Expand Down
6 changes: 5 additions & 1 deletion lazyllm/components/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .validate import LazyLLMValidateBase
from .auto import AutoDeploy, AutoFinetune
from .utils import ModelDownloader
from .formatter import FormatterBase, EmptyFormatter, JsonFormatter

__all__ = [
'register',
Expand All @@ -19,5 +20,8 @@
'FastapiApp',
'AutoDeploy',
'AutoFinetune',
'ModelDownloader'
'ModelDownloader',
'FormatterBase',
'EmptyFormatter',
'JsonFormatter'
]
10 changes: 10 additions & 0 deletions lazyllm/components/formatter/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from .formatterBase import LazyLLMFormatterBase, LazyLLMFormatterBase as FormatterBase, EmptyFormatter
from .jsonFormatter import JsonFormatter


__all__ = [
'LazyLLMFormatterBase',
'FormatterBase',
'EmptyFormatter',
'JsonFormatter'
]
55 changes: 55 additions & 0 deletions lazyllm/components/formatter/formatterBase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from ...common import LazyLLMRegisterMetaClass

def is_number(s):
try:
float(s)
return True
except ValueError:
pass

try:
import unicodedata
unicodedata.numeric(s)
return True
except (TypeError, ValueError):
pass
return False

class LazyLLMFormatterBase(metaclass=LazyLLMRegisterMetaClass):
def __init__(self, formatter: str = None):
self._formatter = formatter
if self._formatter:
self._parse_formatter()
else:
self._slices = None

def _parse_formatter(self):
# Remove the surrounding brackets
slice_str = self._formatter.strip()[1:-1]
dimensions = slice_str.split(",")
slices = []

for dim in dimensions:
if ":" in dim:
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
parts = dim.split(":")
start = int(parts[0]) if is_number(parts[0]) else None
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
end = int(parts[1]) if len(parts) > 1 and is_number(parts[1]) else None
step = int(parts[2]) if len(parts) > 2 and is_number(parts[2]) else None
slices.append(slice(start, end, step))
else:
slices.append(dim.strip())
self._slices = slices

def _load_str(self, msg: str):
raise NotImplementedError("This parse str function is not implemented.")

def _parse_py_data_by_formatter(self, py_data):
raise NotImplementedError("This data parse function is not implemented.")

def format(self, msg):
if isinstance(msg, str): msg = self._load_str(msg)
return self._parse_py_data_by_formatter(msg)

class EmptyFormatter(LazyLLMFormatterBase):
def format(self, msg):
return msg
82 changes: 82 additions & 0 deletions lazyllm/components/formatter/jsonFormatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import copy
import json
from .formatterBase import LazyLLMFormatterBase as FormatterBase
import lazyllm
from typing import List, Dict, Union, Any

class JsonFormatter(FormatterBase):
def _extract_json_from_string(self, mixed_str: str):
json_objects = []
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
brace_level = 0
current_json = ""
in_string = False

for char in mixed_str:
if char == '"' and (len(current_json) == 0 or current_json[-1] != '\\'):
in_string = not in_string

if not in_string:
if char == '{':
if brace_level == 0:
current_json = ""
brace_level += 1
elif char == '}':
brace_level -= 1

if brace_level > 0 or (brace_level == 0 and char == '}'):
current_json += char

if brace_level == 0 and current_json:
try:
json.loads(current_json)
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
json_objects.append(current_json)
current_json = ""
except json.JSONDecodeError:
continue

return json_objects

def _load_str(self, msg: str):
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
# Convert str to json format
assert msg.count("{") == msg.count("}"), f"{msg} is not a valid json string."
try:
json_strs = self._extract_json_from_string(msg)
if len(json_strs) == 0:
raise TypeError(f"{msg} is not a valid json string.")
res = []
for json_str in json_strs:
res.append(json.loads(json_str))
return res if len(res) > 1 else res[0]
except Exception as e:
lazyllm.LOG.info(f"Error: {e}")
return ""

def _parsing_format_output(self, keys: List, data: Union[List[Dict[str, Any]], Dict[str, Any]]):
if not keys:
return data
key = keys.pop(0)
try:
if isinstance(key, slice):
return self._parsing_format_output(keys, data[key])
elif isinstance(key, str):
if isinstance(data, List):
res = [val[key] for val in data]
return self._parsing_format_output(keys, res if len(res) > 1 else res[0])
elif isinstance(data, Dict):
return self._parsing_format_output(keys, data.get(key, {}))
else:
return data
else:
raise TypeError(f"This class is not support {key} index.")
except Exception as e:
lazyllm.LOG.error(f"{e}")
return ""

def _parse_py_data_by_formatter(self, data):
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
if self._slices is None:
return data
else:
keys = copy.deepcopy(self._slices)
result = self._parsing_format_output(keys, data)

return result[0] if len(result) == 1 and isinstance(result, List) else result
Loading