From db8cc1cbba2f77e86342b67143a3d852faf5a434 Mon Sep 17 00:00:00 2001 From: kimbwook Date: Sun, 7 Jan 2024 00:24:13 +0900 Subject: [PATCH 1/2] Use the chunk function to chunk the data into 25 pieces for put and delete in dynamo_linker.py --- RAGchain/utils/linker/dynamo_linker.py | 42 +++++++++++-------- .../RAGchain/utils/linker/test_base_linker.py | 10 +++++ .../utils/linker/test_dynamo_linker.py | 4 ++ .../RAGchain/utils/linker/test_json_linker.py | 4 ++ .../utils/linker/test_redis_linker.py | 4 ++ 5 files changed, 46 insertions(+), 18 deletions(-) diff --git a/RAGchain/utils/linker/dynamo_linker.py b/RAGchain/utils/linker/dynamo_linker.py index e8991f23..1aecd2ea 100644 --- a/RAGchain/utils/linker/dynamo_linker.py +++ b/RAGchain/utils/linker/dynamo_linker.py @@ -128,25 +128,31 @@ def flush_db(self): def put_json(self, ids: List[Union[UUID, str]], json_data_list: List[dict]): assert len(ids) == len(json_data_list), "ids and json_data_list must have the same length" - items = [{ - 'PutRequest': { - 'Item': { - 'id': str(_id), - 'data': json_data + for id_chunk, data_chunk in zip(self.chunk(ids, 25), self.chunk(json_data_list, 25)): + items = [{ + 'PutRequest': { + 'Item': { + 'id': str(_id), + 'data': json_data + } } - } - } for _id, json_data in zip(ids, json_data_list)] - request_items = {self.table_name: items} - self.dynamodb.batch_write_item(RequestItems=request_items) + } for _id, json_data in zip(id_chunk, data_chunk)] + request_items = {self.table_name: items} + self.dynamodb.batch_write_item(RequestItems=request_items) def delete_json(self, ids: List[Union[UUID, str]]): - str_ids = [str(_id) for _id in ids] - items = [{ - 'DeleteRequest': { - 'Key': { - 'id': _id + for id_chunk in self.chunk(ids, 25): + items = [{ + 'DeleteRequest': { + 'Key': { + 'id': str(_id) + } } - } - } for _id in str_ids] - request_items = {self.table_name: items} - self.dynamodb.batch_write_item(RequestItems=request_items) + } for _id in id_chunk] + request_items = {self.table_name: items} + self.dynamodb.batch_write_item(RequestItems=request_items) + + @staticmethod + def chunk(lst, n): + for i in range(0, len(lst), n): + yield lst[i:i + n] diff --git a/tests/RAGchain/utils/linker/test_base_linker.py b/tests/RAGchain/utils/linker/test_base_linker.py index 218971d6..b741001f 100644 --- a/tests/RAGchain/utils/linker/test_base_linker.py +++ b/tests/RAGchain/utils/linker/test_base_linker.py @@ -36,6 +36,9 @@ LONG_DB_ORIGIN = [TEST_DB_ORIGIN[0], TEST_DB_ORIGIN[1], TEST_DB_ORIGIN[2], TEST_DB_ORIGIN[0], TEST_DB_ORIGIN[1], TEST_DB_ORIGIN[2], None, TEST_DB_ORIGIN[1]] +LONG_26_TEST_IDS = [uuid4() for _ in range(26)] +LONG_26_DB_ORIGIN = [TEST_DB_ORIGIN[0] for _ in range(26)] + def test_singleton_same_child(): with pytest.raises(SingletonCreationError) as e: @@ -159,3 +162,10 @@ def delete_test(linker): linker.delete_json(['test_id2', 'test_id4']) new_data = linker.get_json(test_id_list) assert new_data == [db_origin_list[0], None, db_origin_list[2], None] + + +def long_26_test(linker): + linker.put_json(LONG_26_TEST_IDS, LONG_26_DB_ORIGIN) + assert linker.get_json(LONG_26_TEST_IDS) == LONG_26_DB_ORIGIN + linker.delete_json(LONG_26_TEST_IDS) + assert linker.get_json(LONG_26_TEST_IDS) == [None for _ in range(26)] diff --git a/tests/RAGchain/utils/linker/test_dynamo_linker.py b/tests/RAGchain/utils/linker/test_dynamo_linker.py index 7a0c0b03..29076ba8 100644 --- a/tests/RAGchain/utils/linker/test_dynamo_linker.py +++ b/tests/RAGchain/utils/linker/test_dynamo_linker.py @@ -40,3 +40,7 @@ def test_delete(dynamo_db): def test_long(dynamo_db): test_base_linker.long_test(dynamo_db) + + +def test_long_26(dynamo_db): + test_base_linker.long_26_test(dynamo_db) diff --git a/tests/RAGchain/utils/linker/test_json_linker.py b/tests/RAGchain/utils/linker/test_json_linker.py index ac4d4581..1c591071 100644 --- a/tests/RAGchain/utils/linker/test_json_linker.py +++ b/tests/RAGchain/utils/linker/test_json_linker.py @@ -38,3 +38,7 @@ def test_delete(json_linker): def test_long(json_linker): test_base_linker.long_test(json_linker) + + +def test_long_26(json_linker): + test_base_linker.long_26_test(json_linker) diff --git a/tests/RAGchain/utils/linker/test_redis_linker.py b/tests/RAGchain/utils/linker/test_redis_linker.py index 9cb0dc41..ce24c9b2 100644 --- a/tests/RAGchain/utils/linker/test_redis_linker.py +++ b/tests/RAGchain/utils/linker/test_redis_linker.py @@ -39,3 +39,7 @@ def test_delete(redis_db): def test_long(redis_db): test_base_linker.long_test(redis_db) + + +def test_long_26(redis_db): + test_base_linker.long_26_test(redis_db) From 87ccae91406bd8e1e2a449c305d1b01c6351a11e Mon Sep 17 00:00:00 2001 From: jeffrey Date: Sun, 7 Jan 2024 13:45:23 +0900 Subject: [PATCH 2/2] add pytest.warns at long_26_test --- tests/RAGchain/utils/linker/test_base_linker.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/RAGchain/utils/linker/test_base_linker.py b/tests/RAGchain/utils/linker/test_base_linker.py index b741001f..4b89d139 100644 --- a/tests/RAGchain/utils/linker/test_base_linker.py +++ b/tests/RAGchain/utils/linker/test_base_linker.py @@ -168,4 +168,5 @@ def long_26_test(linker): linker.put_json(LONG_26_TEST_IDS, LONG_26_DB_ORIGIN) assert linker.get_json(LONG_26_TEST_IDS) == LONG_26_DB_ORIGIN linker.delete_json(LONG_26_TEST_IDS) - assert linker.get_json(LONG_26_TEST_IDS) == [None for _ in range(26)] + with pytest.warns(NoIdWarning) as record: + assert linker.get_json(LONG_26_TEST_IDS) == [None for _ in range(26)]