From 71224d737088d21ac6d994a77a5b7b280e97da06 Mon Sep 17 00:00:00 2001 From: Drew Date: Tue, 22 Oct 2024 00:04:24 -0400 Subject: [PATCH] feat(memory/main.py, pyproject.toml): add json_repair to handle invalid JSON and extract content from code blocks In `memory/main.py`, the update introduces `json_repair` for robust JSON handling, ensuring the application can process and correct invalid JSON formats. This is particularly useful in environments where JSON data might not be well-formed. The code now also extracts JSON strings embedded within code blocks using regular expressions, enhancing the ability to process diverse response formats. Additionally, `json-repair` is added to the dependencies in `pyproject.toml`, ensuring the necessary library is available for handling these JSON parsing improvements. This facilitates more resilient data processing capabilities in the application. --- mem0/memory/main.py | 21 +++++++++++++++++++-- pyproject.toml | 1 + 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/mem0/memory/main.py b/mem0/memory/main.py index c8c3c229bf..3edfd709f5 100644 --- a/mem0/memory/main.py +++ b/mem0/memory/main.py @@ -1,3 +1,5 @@ +import re +import json_repair import concurrent import hashlib import json @@ -152,8 +154,14 @@ def _add_to_vector_store(self, messages, metadata, filters): response_format={"type": "json_object"}, ) + # Extract JSON content from code blocks in the response using a regular expression + search_result = re.search("(```json)((.*\n)+)(```)", response) + if search_result: + response = search_result.group(2).strip() + try: - new_retrieved_facts = json.loads(response)["facts"] + # Attempt to load the JSON response using json_repair to fix any invalid JSON syntax + new_retrieved_facts = json_repair.loads(response)["facts"] except Exception as e: logging.error(f"Error in new_retrieved_facts: {e}") new_retrieved_facts = [] @@ -178,8 +186,17 @@ def _add_to_vector_store(self, messages, metadata, filters): messages=[{"role": "user", "content": function_calling_prompt}], response_format={"type": "json_object"}, ) - new_memories_with_actions = json.loads(new_memories_with_actions) + # Extract JSON content from possible code blocks in response using re + search_result = re.search("(```json)((.*\n)+)(```)", new_memories_with_actions) + if search_result: + new_memories_with_actions = search_result.group(2).strip() + try: + # Attempt to load the JSON response using json_repair to fix any invalid JSON syntax + new_memories_with_actions = json_repair.loads(new_memories_with_actions) + except Exception as e: + logging.error(f"Could not load JSON from new_memories_with_actions: {e}") + new_memories_with_actions = [] returned_memories = [] try: for resp in new_memories_with_actions["memory"]: diff --git a/pyproject.toml b/pyproject.toml index 5e5537a1fb..92a4d5c42f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ openai = "^1.33.0" posthog = "^3.5.0" pytz = "^2024.1" sqlalchemy = "^2.0.31" +json-repair = "^0.30.0" [tool.poetry.group.graph.dependencies] langchain-community = "^0.3.1"