From a74a270505922b3b68827f5df5db4f0b5390846e Mon Sep 17 00:00:00 2001 From: Josh Humphries Date: Sat, 1 Jun 2024 21:17:32 +0100 Subject: [PATCH] chore: remove unused function and test We used to have this when we were caring about id order in the dbs but we don't anymore cause cba, so i'm not sure why this is still here. --- dataimporter/lib/dbs.py | 38 -------------------------------------- tests/test_dbs.py | 20 -------------------- 2 files changed, 58 deletions(-) diff --git a/dataimporter/lib/dbs.py b/dataimporter/lib/dbs.py index f70c689..04e922e 100644 --- a/dataimporter/lib/dbs.py +++ b/dataimporter/lib/dbs.py @@ -4,48 +4,10 @@ import msgpack import plyvel -from fastnumbers import check_int from splitgill.utils import parse_to_timestamp, now, partition from dataimporter.lib.model import SourceRecord -# the maximum integer we can represent as a sortable string is 78 digits -MAX_INT = int("9" * 78) - - -def int_to_sortable_str(number: int) -> str: - """ - Encodes the given number and returns a string that when compared to other strings is - alphanumerically orderable. This fixes the standard 1, 2, 20, 21, 3 problem without - using zero padding which wastes space and requires a much lower maximum input value. - The algorithm used is based on the one presented here: - https://www.arangodb.com/2017/09/sorting-number-strings-numerically/ with a couple - of tweaks. - - Essentially, we encode the length of the number before the number itself using a - single ASCII character. This allows sorting to be done properly as the ASCII - character is compared first and then the number next. For example, the number 1 gets - the character 1 so is encoded as "1_1", whereas 10 gets the character 2 and is - encoded "2_10". Because we are restricted to not use . in keys and for low number - convenience, we start at character point 49 which is the character 1 and therefore - all numbers less than 1,000,000,000 are encoded with the numbers 1 to 9 which is - convenient for users. - - This encoding structure can support a number with a maximum length of 78 digits - (ASCII char 1 (49) to ~ (126)). - - This function only works on positive integers. If the input isn't valid, a - ValueError is raised. - - :param number: the number to encode, must be positive - :return: the encoded number as a str object - """ - if not check_int(number): - raise ValueError("Number must be a valid integer") - if number < 0 or number > MAX_INT: - raise ValueError(f"Number must be positive and no more than {MAX_INT}") - return f"{chr(48 + len(str(number)))}_{number}" - class DB: """ diff --git a/tests/test_dbs.py b/tests/test_dbs.py index fc3ed8c..2b5db60 100644 --- a/tests/test_dbs.py +++ b/tests/test_dbs.py @@ -15,31 +15,11 @@ Index, ChangeQueue, EmbargoQueue, - int_to_sortable_str, - MAX_INT, RedactionDB, ) from dataimporter.lib.model import SourceRecord -def test_int_to_sortable_str(): - with pytest.raises(ValueError): - assert int_to_sortable_str(20.5) - - with pytest.raises(ValueError): - assert int_to_sortable_str(20.0) - - with pytest.raises(ValueError): - assert int_to_sortable_str(-1) - - with pytest.raises(ValueError): - assert int_to_sortable_str(MAX_INT + 1) - - assert int_to_sortable_str(10) == "2_10" - assert int_to_sortable_str(0) == "1_0" - assert int_to_sortable_str(MAX_INT - 1) == f"~_{MAX_INT - 1}" - - class TestDB: def test_name(self, tmp_path: Path): db = DB(tmp_path / "database")