Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/fr optimize #32

Merged
merged 10 commits into from
Jun 14, 2024
19 changes: 16 additions & 3 deletions src/hope_dedup_engine/apps/api/serializers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
from typing import Any

from rest_framework import serializers

from hope_dedup_engine.apps.api.models import DeduplicationSet
from hope_dedup_engine.apps.api.models.deduplication import Duplicate, IgnoredKeyPair, Image
from hope_dedup_engine.apps.api.models.deduplication import (
Duplicate,
IgnoredKeyPair,
Image,
)


class DeduplicationSetSerializer(serializers.ModelSerializer):
Expand All @@ -10,7 +16,14 @@ class DeduplicationSetSerializer(serializers.ModelSerializer):
class Meta:
model = DeduplicationSet
exclude = ("deleted",)
read_only_fields = "external_system", "created_at", "created_by", "deleted", "updated_at", "updated_by"
read_only_fields = (
"external_system",
"created_at",
"created_by",
"deleted",
"updated_at",
"updated_by",
)


class ImageSerializer(serializers.ModelSerializer):
Expand All @@ -24,7 +37,7 @@ class EntrySerializer(serializers.Serializer):
reference_pk = serializers.SerializerMethodField()
filename = serializers.SerializerMethodField()

def __init__(self, prefix: str, *args, **kwargs) -> None:
def __init__(self, prefix: str, *args: Any, **kwargs: Any) -> None:
self._prefix = prefix
super().__init__(*args, **kwargs)

Expand Down
88 changes: 69 additions & 19 deletions src/hope_dedup_engine/apps/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,16 @@
HDETokenAuthentication,
UserAndDeduplicationSetAreOfTheSameSystem,
)
from hope_dedup_engine.apps.api.const import DEDUPLICATION_SET_FILTER, DEDUPLICATION_SET_PARAM
from hope_dedup_engine.apps.api.const import (
DEDUPLICATION_SET_FILTER,
DEDUPLICATION_SET_PARAM,
)
from hope_dedup_engine.apps.api.models import DeduplicationSet
from hope_dedup_engine.apps.api.models.deduplication import Duplicate, IgnoredKeyPair, Image
from hope_dedup_engine.apps.api.models.deduplication import (
Duplicate,
IgnoredKeyPair,
Image,
)
from hope_dedup_engine.apps.api.serializers import (
DeduplicationSetSerializer,
DuplicateSerializer,
Expand All @@ -36,17 +43,29 @@


class DeduplicationSetViewSet(
mixins.ListModelMixin, mixins.CreateModelMixin, mixins.DestroyModelMixin, viewsets.GenericViewSet
mixins.ListModelMixin,
mixins.CreateModelMixin,
mixins.DestroyModelMixin,
viewsets.GenericViewSet,
):
authentication_classes = (HDETokenAuthentication,)
permission_classes = IsAuthenticated, AssignedToExternalSystem, UserAndDeduplicationSetAreOfTheSameSystem
permission_classes = (
IsAuthenticated,
AssignedToExternalSystem,
UserAndDeduplicationSetAreOfTheSameSystem,
)
serializer_class = DeduplicationSetSerializer

def get_queryset(self) -> QuerySet:
return DeduplicationSet.objects.filter(external_system=self.request.user.external_system, deleted=False)
return DeduplicationSet.objects.filter(
external_system=self.request.user.external_system, deleted=False
)

def perform_create(self, serializer: Serializer) -> None:
serializer.save(created_by=self.request.user, external_system=self.request.user.external_system)
serializer.save(
created_by=self.request.user,
external_system=self.request.user.external_system,
)

def perform_destroy(self, instance: DeduplicationSet) -> None:
instance.updated_by = self.request.user
Expand All @@ -70,18 +89,24 @@ def process(self, request: Request, pk: UUID | None = None) -> Response:
self._start_processing(deduplication_set)
return Response({MESSAGE: STARTED})
case DeduplicationSet.State.PROCESSING:
return Response({MESSAGE: ALREADY_PROCESSING}, status=status.HTTP_400_BAD_REQUEST)
return Response(
{MESSAGE: ALREADY_PROCESSING}, status=status.HTTP_400_BAD_REQUEST
)


class ImageViewSet(
nested_viewsets.NestedViewSetMixin,
nested_viewsets.NestedViewSetMixin[Image],
mixins.ListModelMixin,
mixins.CreateModelMixin,
mixins.DestroyModelMixin,
viewsets.GenericViewSet,
):
authentication_classes = (HDETokenAuthentication,)
permission_classes = IsAuthenticated, AssignedToExternalSystem, UserAndDeduplicationSetAreOfTheSameSystem
permission_classes = (
IsAuthenticated,
AssignedToExternalSystem,
UserAndDeduplicationSetAreOfTheSameSystem,
)
serializer_class = ImageSerializer
queryset = Image.objects.all()
parent_lookup_kwargs = {
Expand All @@ -105,22 +130,26 @@ def perform_destroy(self, instance: Image) -> None:

@dataclass
class ListDataWrapper:
data: list[dict]
data: list[dict[str, Any]]

def __setitem__(self, key: str, value: Any) -> None:
for item in self.data:
item[key] = value


class WrapRequestDataMixin:
def initialize_request(self, request: Request, *args: Any, **kwargs: Any) -> Request:
def initialize_request(
self, request: Request, *args: Any, **kwargs: Any
) -> Request:
request = super().initialize_request(request, *args, **kwargs)
request._full_data = ListDataWrapper(request.data)
return request


class UnwrapRequestDataMixin:
def initialize_request(self, request: Request, *args: Any, **kwargs: Any) -> Request:
def initialize_request(
self, request: Request, *args: Any, **kwargs: Any
) -> Request:
request = super().initialize_request(request, *args, **kwargs)
request._full_data = request._full_data.data
return request
Expand All @@ -130,13 +159,17 @@ def initialize_request(self, request: Request, *args: Any, **kwargs: Any) -> Req
# UnwrapRequestDataMixin, and ListDataWrapper to make it work with list of objects
class BulkImageViewSet(
UnwrapRequestDataMixin,
nested_viewsets.NestedViewSetMixin,
nested_viewsets.NestedViewSetMixin[Image],
WrapRequestDataMixin,
mixins.CreateModelMixin,
viewsets.GenericViewSet,
):
authentication_classes = (HDETokenAuthentication,)
permission_classes = IsAuthenticated, AssignedToExternalSystem, UserAndDeduplicationSetAreOfTheSameSystem
permission_classes = (
IsAuthenticated,
AssignedToExternalSystem,
UserAndDeduplicationSetAreOfTheSameSystem,
)
serializer_class = ImageSerializer
queryset = Image.objects.all()
parent_lookup_kwargs = {
Expand All @@ -148,7 +181,9 @@ def get_serializer(self, *args: Any, **kwargs: Any) -> Serializer:

def perform_create(self, serializer: Serializer) -> None:
super().perform_create(serializer)
if deduplication_set := serializer.instance[0].deduplication_set if serializer.instance else None:
if deduplication_set := (
serializer.instance[0].deduplication_set if serializer.instance else None
):
deduplication_set.updated_by = self.request.user
deduplication_set.save()

Expand All @@ -161,9 +196,17 @@ def clear(self, request: Request, deduplication_set_pk: str) -> Response:
return Response(status=status.HTTP_204_NO_CONTENT)


class DuplicateViewSet(nested_viewsets.NestedViewSetMixin, mixins.ListModelMixin, viewsets.GenericViewSet):
class DuplicateViewSet(
nested_viewsets.NestedViewSetMixin[Duplicate],
mixins.ListModelMixin,
viewsets.GenericViewSet,
):
authentication_classes = (HDETokenAuthentication,)
permission_classes = IsAuthenticated, AssignedToExternalSystem, UserAndDeduplicationSetAreOfTheSameSystem
permission_classes = (
IsAuthenticated,
AssignedToExternalSystem,
UserAndDeduplicationSetAreOfTheSameSystem,
)
serializer_class = DuplicateSerializer
queryset = Duplicate.objects.all()
parent_lookup_kwargs = {
Expand All @@ -172,10 +215,17 @@ class DuplicateViewSet(nested_viewsets.NestedViewSetMixin, mixins.ListModelMixin


class IgnoredKeyPairViewSet(
nested_viewsets.NestedViewSetMixin, mixins.ListModelMixin, mixins.CreateModelMixin, viewsets.GenericViewSet
nested_viewsets.NestedViewSetMixin[IgnoredKeyPair],
mixins.ListModelMixin,
mixins.CreateModelMixin,
viewsets.GenericViewSet,
):
authentication_classes = (HDETokenAuthentication,)
permission_classes = IsAuthenticated, AssignedToExternalSystem, UserAndDeduplicationSetAreOfTheSameSystem
permission_classes = (
IsAuthenticated,
AssignedToExternalSystem,
UserAndDeduplicationSetAreOfTheSameSystem,
)
serializer_class = IgnoredKeyPairSerializer
queryset = IgnoredKeyPair.objects.all()
parent_lookup_kwargs = {
Expand Down
16 changes: 9 additions & 7 deletions src/hope_dedup_engine/apps/core/storage.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Any

from django.conf import settings
from django.core.files.storage import FileSystemStorage

Expand All @@ -16,7 +18,7 @@ class CV2DNNStorage(UniqueStorageMixin, FileSystemStorage):


class HDEAzureStorage(UniqueStorageMixin, AzureStorage):
def __init__(self, *args, **kwargs):
def __init__(self, *args: Any, **kwargs: Any) -> None:
self.account_name = settings.AZURE_ACCOUNT_NAME
self.account_key = settings.AZURE_ACCOUNT_KEY
self.custom_domain = settings.AZURE_CUSTOM_DOMAIN
Expand All @@ -26,20 +28,20 @@ def __init__(self, *args, **kwargs):


class HOPEAzureStorage(HDEAzureStorage):
def __init__(self, *args, **kwargs):
def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
self.azure_container = settings.AZURE_CONTAINER_HOPE

def delete(self, name):
def delete(self, name: str) -> None:
raise RuntimeError("This storage cannot delete files")

def open(self, name, mode="rb"):
def open(self, name: str, mode: str = "rb") -> Any:
if "w" in mode:
raise RuntimeError("This storage cannot open files in write mode")
return super().open(name, mode="rb")

def save(self, name, content, max_length=None):
def save(self, name: str, content: Any, max_length: int | None = None) -> None:
raise RuntimeError("This storage cannot save files")

def listdir(self, path=""):
return []
def listdir(self, path: str = "") -> tuple[list[str], list[str]]:
return ([], [])
17 changes: 13 additions & 4 deletions src/hope_dedup_engine/apps/faces/celery_tasks.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
import traceback

from celery import shared_task, states
from celery import Task, shared_task, states

from hope_dedup_engine.apps.faces.services.duplication_detector import (
DuplicationDetector,
)
from hope_dedup_engine.apps.faces.utils.celery_utils import task_lifecycle
from hope_dedup_engine.apps.faces.utils.duplication_detector import DuplicationDetector


@shared_task(bind=True, soft_time_limit=0.5 * 60 * 60, time_limit=1 * 60 * 60)
@task_lifecycle(name="Deduplicate", ttl=1 * 60 * 60)
# TODO: Use DeduplicationSet objects as input to deduplication pipeline
def deduplicate(self, filenames: tuple[str], ignore_pairs: tuple[tuple[str, str]] = tuple()) -> tuple[tuple[str]]:
def deduplicate(
self: Task,
filenames: tuple[str],
ignore_pairs: tuple[tuple[str, str], ...] = tuple(),
) -> tuple[tuple[str, ...], ...]:
"""
Deduplicate a set of filenames, ignoring any specified pairs of filenames.

Expand All @@ -25,5 +31,8 @@ def deduplicate(self, filenames: tuple[str], ignore_pairs: tuple[tuple[str, str]
dd = DuplicationDetector(filenames, ignore_pairs)
return dd.find_duplicates()
except Exception as e:
self.update_state(state=states.FAILURE, meta={"exc_message": str(e), "traceback": traceback.format_exc()})
self.update_state(
state=states.FAILURE,
meta={"exc_message": str(e), "traceback": traceback.format_exc()},
)
raise e
8 changes: 8 additions & 0 deletions src/hope_dedup_engine/apps/faces/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
class StorageKeyError(Exception):
"""
Exception raised when the storage key does not exist.
"""

def __init__(self, key: str) -> None:
self.key = key
super().__init__(f"Storage key '{key}' does not exist.")
26 changes: 26 additions & 0 deletions src/hope_dedup_engine/apps/faces/forms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from django.forms import CharField, ValidationError


class MeanValuesTupleField(CharField):
def to_python(self, value: str) -> tuple[float, float, float]:
try:
values = tuple(map(float, value.split(", ")))
if len(values) != 3:
raise ValueError("The tuple must have exactly three elements.")
if not all(-255 <= v <= 255 for v in values):
raise ValueError(
"Each value in the tuple must be between -255 and 255."
)
return values
except Exception as e:
raise ValidationError(
"""
Enter a valid tuple of three float values separated by commas and spaces, e.g. '0.0, 0.0, 0.0'.
Each value must be between -255 and 255.
"""
) from e

def prepare_value(self, value: tuple[float, float, float]) -> str:
if isinstance(value, tuple):
return ", ".join(map(str, value))
return super().prepare_value(value)
Empty file.
38 changes: 38 additions & 0 deletions src/hope_dedup_engine/apps/faces/managers/net.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from django.conf import settings

from constance import config
from cv2 import dnn, dnn_Net

from hope_dedup_engine.apps.core.storage import CV2DNNStorage


class DNNInferenceManager:
"""
A class to manage the loading and configuration of a neural network model using OpenCV's DNN module.

The DNNInferenceManager class provides functionality to load a neural network model from Caffe files stored in a
specified storage and configure the model with preferred backend and target settings.
"""

def __init__(self, storage: CV2DNNStorage) -> None:
"""
Loads and configures the neural network model using the specified storage.

Args:
storage (CV2DNNStorage): The storage object from which to load the neural network model.
"""
self.net = dnn.readNetFromCaffe(
storage.path(settings.PROTOTXT_FILE),
storage.path(settings.CAFFEMODEL_FILE),
)
self.net.setPreferableBackend(int(config.DNN_BACKEND))
self.net.setPreferableTarget(int(config.DNN_TARGET))

def get_model(self) -> dnn_Net:
"""
Get the loaded and configured neural network model.

Returns:
cv2.dnn_Net: The neural network model loaded and configured by this manager.
"""
return self.net
Loading
Loading