Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AVRO-1737 Implement Hashable for Schema #2367

Merged
merged 1 commit into from
Jul 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions lang/py/avro/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
from functools import reduce
from pathlib import Path
from typing import (
Callable,
FrozenSet,
List,
Mapping,
Expand Down Expand Up @@ -197,23 +198,41 @@ def other_props(self) -> Mapping[str, object]:
return get_other_props(self.props, self._reserved_properties)


class EqualByJsonMixin:
class EqualByJsonMixin(collections.abc.Hashable):
"""A mixin that defines equality as equal if the json deserializations are equal."""

fingerprint: Callable[..., bytes]

def __eq__(self, that: object) -> bool:
try:
that_obj = json.loads(str(that))
except json.decoder.JSONDecodeError:
return False
return cast(bool, json.loads(str(self)) == that_obj)

def __hash__(self) -> int:
"""Make it so a schema can be in a set or a key in a dictionary.

NB: Python has special rules for this method being defined in the same class as __eq__.
"""
return hash(self.fingerprint())


class EqualByPropsMixin(PropertiesMixin):
class EqualByPropsMixin(collections.abc.Hashable, PropertiesMixin):
"""A mixin that defines equality as equal if the props are equal."""

fingerprint: Callable[..., bytes]

def __eq__(self, that: object) -> bool:
return hasattr(that, "props") and self.props == getattr(that, "props")

def __hash__(self) -> int:
"""Make it so a schema can be in a set or a key in a dictionary.

NB: Python has special rules for this method being defined in the same class as __eq__.
"""
return hash(self.fingerprint())


class CanonicalPropertiesMixin(PropertiesMixin):
"""A Mixin that provides canonical properties to Schema and Field types."""
Expand Down
26 changes: 26 additions & 0 deletions lang/py/avro/test/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,31 @@ def parse_invalid(self):
self.test_schema.parse()


class HashableTestCase(unittest.TestCase):
"""Ensure that Schema are hashable.

While hashability is implemented with parsing canonical form fingerprinting,
this test should be kept distinct to avoid coupling."""

def __init__(self, test_schema):
"""Ignore the normal signature for unittest.TestCase because we are generating
many test cases from this one class. This is safe as long as the autoloader
ignores this class. The autoloader will ignore this class as long as it has
no methods starting with `test_`.
"""
super().__init__("parse_and_hash")
self.test_schema = test_schema

def parse_and_hash(self):
"""Ensure that every schema can be hashed."""
try:
hash(self.test_schema.parse())
except TypeError as e:
if "unhashable type" in str(e):
self.fail(f"{self.test_schema} is not hashable")
raise


class RoundTripParseTestCase(unittest.TestCase):
"""Enable generating round-trip parse test cases over all the valid test schema."""

Expand Down Expand Up @@ -1434,6 +1459,7 @@ def load_tests(loader, default_tests, pattern):
suite.addTests(OtherAttributesTestCase(ex) for ex in OTHER_PROP_EXAMPLES)
suite.addTests(loader.loadTestsFromTestCase(CanonicalFormTestCase))
suite.addTests(FingerprintTestCase(ex[0], ex[1]) for ex in FINGERPRINT_EXAMPLES)
suite.addTests(HashableTestCase(ex) for ex in VALID_EXAMPLES)
return suite


Expand Down