Skip to content

Commit

Permalink
chore: remove unused function and test
Browse files Browse the repository at this point in the history
We used to have this when we were caring about id order in the dbs but we don't anymore cause cba, so i'm not sure why this is still here.
  • Loading branch information
jrdh committed Jun 1, 2024
1 parent 709bdd5 commit a74a270
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 58 deletions.
38 changes: 0 additions & 38 deletions dataimporter/lib/dbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,48 +4,10 @@

import msgpack
import plyvel
from fastnumbers import check_int
from splitgill.utils import parse_to_timestamp, now, partition

from dataimporter.lib.model import SourceRecord

# the maximum integer we can represent as a sortable string is 78 digits
MAX_INT = int("9" * 78)


def int_to_sortable_str(number: int) -> str:
"""
Encodes the given number and returns a string that when compared to other strings is
alphanumerically orderable. This fixes the standard 1, 2, 20, 21, 3 problem without
using zero padding which wastes space and requires a much lower maximum input value.
The algorithm used is based on the one presented here:
https://www.arangodb.com/2017/09/sorting-number-strings-numerically/ with a couple
of tweaks.
Essentially, we encode the length of the number before the number itself using a
single ASCII character. This allows sorting to be done properly as the ASCII
character is compared first and then the number next. For example, the number 1 gets
the character 1 so is encoded as "1_1", whereas 10 gets the character 2 and is
encoded "2_10". Because we are restricted to not use . in keys and for low number
convenience, we start at character point 49 which is the character 1 and therefore
all numbers less than 1,000,000,000 are encoded with the numbers 1 to 9 which is
convenient for users.
This encoding structure can support a number with a maximum length of 78 digits
(ASCII char 1 (49) to ~ (126)).
This function only works on positive integers. If the input isn't valid, a
ValueError is raised.
:param number: the number to encode, must be positive
:return: the encoded number as a str object
"""
if not check_int(number):
raise ValueError("Number must be a valid integer")
if number < 0 or number > MAX_INT:
raise ValueError(f"Number must be positive and no more than {MAX_INT}")
return f"{chr(48 + len(str(number)))}_{number}"


class DB:
"""
Expand Down
20 changes: 0 additions & 20 deletions tests/test_dbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,31 +15,11 @@
Index,
ChangeQueue,
EmbargoQueue,
int_to_sortable_str,
MAX_INT,
RedactionDB,
)
from dataimporter.lib.model import SourceRecord


def test_int_to_sortable_str():
with pytest.raises(ValueError):
assert int_to_sortable_str(20.5)

with pytest.raises(ValueError):
assert int_to_sortable_str(20.0)

with pytest.raises(ValueError):
assert int_to_sortable_str(-1)

with pytest.raises(ValueError):
assert int_to_sortable_str(MAX_INT + 1)

assert int_to_sortable_str(10) == "2_10"
assert int_to_sortable_str(0) == "1_0"
assert int_to_sortable_str(MAX_INT - 1) == f"~_{MAX_INT - 1}"


class TestDB:
def test_name(self, tmp_path: Path):
db = DB(tmp_path / "database")
Expand Down

0 comments on commit a74a270

Please sign in to comment.