Skip to content

Commit

Permalink
Add UUID generator (#8)
Browse files Browse the repository at this point in the history
* feat: Add UUID Generator

* test: Add uuidgen unit test

* Update pre-commit configuration to max length 79 and fix style

* Add constraints for parameters

* Add uuid bulk generate

* Add uuid collision test

* move uuid to utils directory

* Fix polytope.core.uuidgen -> polytope.utils.uuidgen

* Fix some typos in comments

* Add uuid bulk large method

* style: Auto-fixed by black

* fix: Fix critical bugs

* style: Use index wrapper to make code clear

* test: Add test for large cases

* fix: Force length of alphabet to 2 or more

* feat: Use faster method to append string

* fix: Make UUID process more proper

* test: Add more various tests

* feat: Do uuid for bulk a single one

---------

Co-authored-by: Gyojun Youn <[email protected]>
  • Loading branch information
justiceHui and youngyojun authored Sep 10, 2023
1 parent 782a09c commit a97e592
Show file tree
Hide file tree
Showing 5 changed files with 215 additions and 6 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ repos:
hooks:
- id: black
exclude: ^test/
args: ['--line-length=79']
- repo: https://github.com/PyCQA/flake8
rev: 6.1.0
hooks:
Expand Down
19 changes: 13 additions & 6 deletions src/polytope/github/repository/Repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@


# alphanumeric or hyphen, starts & ends with alphanumeric
GITHUB_USERNAME_REGEX = r"^[a-zA-Z\d](?:[a-zA-Z\d]|-(?=[a-zA-Z\d])){0,37}[a-zA-Z\d]$"
GITHUB_USERNAME_REGEX = (
r"^[a-zA-Z\d](?:[a-zA-Z\d]|-(?=[a-zA-Z\d])){0,37}[a-zA-Z\d]$"
)
# alphanumeric, hyphen, underscore. starts & ends with alphanumeric.
GITHUB_REPONAME_REGEX = r"^[a-z0-9]+(?:(?:(?:[._]|__|[-]*)[a-z0-9]+)+)?$"

Expand Down Expand Up @@ -156,7 +158,9 @@ def get_url(self) -> str:

def get(self) -> GithubRepositoryResponse:
"""! Get a repository named {owner}/{repo}."""
result = self._requester.request(verb=RequestVerb.GET, api_url=self.get_url)
result = self._requester.request(
verb=RequestVerb.GET, api_url=self.get_url
)

# @todo define content to read

Expand Down Expand Up @@ -208,7 +212,9 @@ def update(
data = asdict(config)

result = self._requester.request(
verb=RequestVerb.PATCH, api_url=self.update_url, data=json.dumps(data)
verb=RequestVerb.PATCH,
api_url=self.update_url,
data=json.dumps(data),
)

# succeeded to update.
Expand Down Expand Up @@ -282,9 +288,10 @@ def fetch_polytope_config_file(
)

if result.status_code == 200:
self._has_polytope_config_file, reason = parse_polytope_config_file(
result.content
)
(
self._has_polytope_config_file,
reason,
) = parse_polytope_config_file(result.content)
return self._has_polytope_config_file, reason

else:
Expand Down
7 changes: 7 additions & 0 deletions src/polytope/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
__all__ = [
"PolytopeUUID",
"uuid",
"uuid_bulk",
]

from .uuidgen import PolytopeUUID, uuid, uuid_bulk
95 changes: 95 additions & 0 deletions src/polytope/utils/uuidgen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import random
import math
from typing import List

# collision probability in 150,000 entries ~ 1%
# lowercase alphabet + digit except [l, 1, o, 0]
DEFAULT_ALPHABET = "abcdefghijkmnpqrstuvwxyz23456789"
DEFAULT_LENGTH = 8


class PolytopeUUID:
"""UUID generator class."""

def __init__(
self, alphabet: str = DEFAULT_ALPHABET, length: int = DEFAULT_LENGTH
) -> None:
"""! PolytopeUUID class initializer.
@param alphabet alphabet for generating uuid
@param length length of uuid
"""
self.alphabet = alphabet
self.length = length

@property
def alphabet(self) -> str:
"""! An alphabet property for generating uuid."""
return self._alphabet

@alphabet.setter
def alphabet(self, value: str) -> None:
"""! A setter method for alphabet property."""
if len(value) < 10:
raise ValueError("alphabet must be long enough.")
if len(set(value)) != len(value):
raise ValueError("alphabet must consist of distinct characters.")

self._alphabet = value

@property
def length(self) -> int:
"""! A length property of uuid."""
return self._length

@length.setter
def length(self, value: int) -> None:
"""! A setter method for length property."""
if value < 5:
raise ValueError("length must be large enough.")

self._length = value

def uuid(self) -> str:
"""! A method for generating uuid."""
char_list = [random.choice(self.alphabet) for _ in range(self.length)]
return "".join(char_list)

def uuid_bulk(self, count: int) -> List[str]:
"""! A method for bulk generating a list of distinct uuids.
@param count number of uuids to generate
"""
if count < 0:
raise ValueError("count must be non-negative.")
if 0 == count:
return []
if 1 == count:
return [self.uuid()]

# 0.01 * (|alphabet| ** length) < count
if math.log(count) - self.length * math.log(
len(self.alphabet)
) > -math.log(100):
raise ValueError("count is too large to generate distinct uuids")

uuid_set: set = set()
while len(uuid_set) < count:
uuid = self.uuid()
if uuid not in uuid_set:
uuid_set.add(uuid)
return list(uuid_set)


def uuid(
alphabet: str = DEFAULT_ALPHABET, length: int = DEFAULT_LENGTH
) -> str:
generator = PolytopeUUID(alphabet, length)
return generator.uuid()


def uuid_bulk(
count: int, alphabet: str = DEFAULT_ALPHABET, length: int = DEFAULT_LENGTH
) -> List[str]:
generator = PolytopeUUID(alphabet, length)
return generator.uuid_bulk(count)
99 changes: 99 additions & 0 deletions test/utils/uuidgen_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import pytest

import polytope.utils.uuidgen as uuidgen


def test_uuid_collision():
# fail probability ~ 5 * 10^-5
uuids = [uuidgen.uuid() for _ in range(10000)]
assert len(uuids) == len(set(uuids))

def test_default():
gen = uuidgen.PolytopeUUID()
assert len(gen.alphabet) == 32
assert gen.length == 8

def test_gen_uuid():
gen = uuidgen.PolytopeUUID(alphabet='0123456789', length=15)
assert gen.alphabet == '0123456789'
assert gen.length == 15

gen.length = 15
uuid = gen.uuid()
assert len(uuid) == 15
for c in uuid:
assert c in gen.alphabet

gen = uuidgen.PolytopeUUID(length=300)
assert len(gen.alphabet) == 32
assert gen.length == 300

for _ in range(300):
uuid = gen.uuid()
assert len(uuid) == 300
for c in uuid:
assert c in gen.alphabet

def test_gen_bulk():
gen = uuidgen.PolytopeUUID(length=300)

uuids = gen.uuid_bulk(3000)
assert len(uuids) == 3000
assert len(uuids) == len(set(uuids))

for uuid in uuids:
assert len(uuid) == 300
for c in uuid:
assert c in gen.alphabet

def test_uuid():
alphabet = '0123456789'
uuid = uuidgen.uuid(alphabet, 12)
assert len(uuid) == 12
for c in uuid:
assert c in alphabet

for _ in range(300):
uuid = uuidgen.uuid(alphabet, 300)
assert len(uuid) == 300
for c in uuid:
assert c in alphabet

def test_bulk():
alphabet = '0123456789'

uuids = uuidgen.uuid_bulk(count=3000, alphabet=alphabet, length=300)
assert len(uuids) == 3000
assert len(uuids) == len(set(uuids))

for uuid in uuids:
assert len(uuid) == 300
for c in uuid:
assert c in alphabet

def test_value_error():
# Too few alphabets
with pytest.raises(ValueError):
gen = uuidgen.PolytopeUUID(alphabet="")
with pytest.raises(ValueError):
gen = uuidgen.PolytopeUUID(alphabet="abc")
with pytest.raises(ValueError):
gen = uuidgen.PolytopeUUID(alphabet="012345678")

# Duplicated alphabets
with pytest.raises(ValueError):
gen = uuidgen.PolytopeUUID(alphabet='aaabbbcccdddeee')
with pytest.raises(ValueError):
gen = uuidgen.PolytopeUUID(alphabet='bcdAefghijklmAno')

# Too small length
with pytest.raises(ValueError):
uuid = uuidgen.uuid('0123456789', -1)
with pytest.raises(ValueError):
uuid = uuidgen.uuid('0123456789', 0)
with pytest.raises(ValueError):
uuid = uuidgen.uuid('0123456789', 4)

# Too large count
with pytest.raises(ValueError):
uuids = uuidgen.uuid_bulk(1100, '0123456789', 5)

0 comments on commit a97e592

Please sign in to comment.