Skip to content

Commit

Permalink
Merge branch 'develop' into kl/hide-mask
Browse files Browse the repository at this point in the history
  • Loading branch information
klakhov authored Oct 18, 2024
2 parents c6aecf3 + 5be5d59 commit 60005d0
Show file tree
Hide file tree
Showing 12 changed files with 171 additions and 45 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
### Fixed

- Task creation with cloud storage data and GT_POOL validation mode
(<https://github.com/cvat-ai/cvat/pull/8539>)
4 changes: 4 additions & 0 deletions changelog.d/20241016_180804_sekachev.bs.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
### Fixed

- Incorrect quality reports and immediate feedback with non default start frame or frame step
(<https://github.com/cvat-ai/cvat/pull/8551>)
4 changes: 4 additions & 0 deletions changelog.d/20241017_155815_andrey_fix_task_creating.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
### Fixed

- av context closing issue when using AUTO thread_type
(<https://github.com/cvat-ai/cvat/pull/8555>)
7 changes: 2 additions & 5 deletions cvat/apps/dataset_manager/bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -812,11 +812,8 @@ def _init_frame_info(self):
self._excluded_frames.update(self.db_data.validation_layout.disabled_frames)

if self._required_frames:
abs_range = self.abs_range
self._required_frames = set(
self.abs_frame_id(frame) for frame in self._required_frames
if frame in abs_range
)
rel_range = self.rel_range
self._required_frames = set(frame for frame in self._required_frames if frame in rel_range)

def __len__(self):
segment = self._db_job.segment
Expand Down
8 changes: 7 additions & 1 deletion cvat/apps/engine/media_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,8 @@ def read_av_container(self, source: Union[str, io.BytesIO]) -> av.container.Inpu
for stream in container.streams:
context = stream.codec_context
if context and context.is_open:
# Currently, context closing may get stuck on some videos for an unknown reason,
# so the thread_type == 'AUTO' setting is disabled for future investigation
context.close()

if container.open_files:
Expand Down Expand Up @@ -583,7 +585,7 @@ def __init__(
stop: Optional[int] = None,
dimension: DimensionType = DimensionType.DIM_2D,
*,
allow_threading: bool = True,
allow_threading: bool = False,
):
super().__init__(
source_path=source_path,
Expand Down Expand Up @@ -635,6 +637,8 @@ def iterate_frames(

if self.allow_threading:
video_stream.thread_type = 'AUTO'
else:
video_stream.thread_type = 'NONE'

frame_counter = itertools.count()
with closing(self._decode_stream(container, video_stream)) as stream_decoder:
Expand Down Expand Up @@ -795,6 +799,8 @@ def iterate_frames(self, *, frame_filter: Iterable[int]) -> Iterable[av.VideoFra
video_stream = container.streams.video[0]
if self.allow_threading:
video_stream.thread_type = 'AUTO'
else:
video_stream.thread_type = 'NONE'

container.seek(offset=start_decode_timestamp, stream=video_stream)

Expand Down
28 changes: 28 additions & 0 deletions cvat/apps/engine/migrations/0084_honeypot_support.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# Generated by Django 4.2.15 on 2024-09-23 13:11

from typing import Collection
from collections import defaultdict

import django.db.models.deletion
from django.db import migrations, models
from django.db.models import Count, Q

import cvat.apps.engine.models

Expand Down Expand Up @@ -39,6 +41,28 @@ def get_segment_rel_frame_set(db_segment) -> Collection[int]:

return sorted(get_rel_frame(abs_frame, db_data) for abs_frame in frame_set)

def delete_duplicate_ground_truth_jobs(apps, schema_editor):
Task = apps.get_model("engine", "Task")
Job = apps.get_model("engine", "Job")

broken_tasks = Task.objects.annotate(
ground_truth_jobs_count=Count(
'segment__job', filter=Q(segment__job__type='ground_truth')
)
).filter(ground_truth_jobs_count__gt=1)

gt_jobs = Job.objects.filter(
segment__task__in=broken_tasks
).filter(type='ground_truth').order_by('-updated_date').iterator(1000)

groups = defaultdict(list)
for gt_job in gt_jobs:
assert gt_job.type == 'ground_truth'
groups[gt_job.segment.task.id].append(gt_job)

for gt_jobs in groups.values():
for gt_job in gt_jobs[1:]:
gt_job.delete()

def init_validation_layout_in_tasks_with_gt_job(apps, schema_editor):
Job = apps.get_model("engine", "Job")
Expand Down Expand Up @@ -220,6 +244,10 @@ class Migration(migrations.Migration):
),
],
),
migrations.RunPython(
delete_duplicate_ground_truth_jobs,
reverse_code=migrations.RunPython.noop,
),
migrations.RunPython(
init_validation_layout_in_tasks_with_gt_job,
reverse_code=migrations.RunPython.noop,
Expand Down
28 changes: 3 additions & 25 deletions cvat/apps/engine/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -1157,19 +1157,6 @@ def _update_status(msg: str) -> None:
assert job_file_mapping[-1] == validation_params['frames']
job_file_mapping.pop(-1)

# Update manifest
manifest = ImageManifestManager(db_data.get_manifest_path())
manifest.link(
sources=[extractor.get_path(image.frame) for image in images],
meta={
k: {'related_images': related_images[k] }
for k in related_images
},
data_dir=upload_dir,
DIM_3D=(db_task.dimension == models.DimensionType.DIM_3D),
)
manifest.create()

db_data.update_validation_layout(models.ValidationLayout(
mode=models.ValidationMode.GT_POOL,
frames=list(frame_idx_map.values()),
Expand Down Expand Up @@ -1324,24 +1311,15 @@ def _update_status(msg: str) -> None:
assert image.is_placeholder
image.real_frame = frame_id_map[image.real_frame]

# Update manifest
manifest.reorder([images[frame_idx_map[image.frame]].path for image in new_db_images])

images = new_db_images
db_data.size = len(images)
db_data.start_frame = 0
db_data.stop_frame = 0
db_data.frame_filter = ''

# Update manifest
manifest = ImageManifestManager(db_data.get_manifest_path())
manifest.link(
sources=[extractor.get_path(frame_idx_map[image.frame]) for image in images],
meta={
k: {'related_images': related_images[k] }
for k in related_images
},
data_dir=upload_dir,
DIM_3D=(db_task.dimension == models.DimensionType.DIM_3D),
)
manifest.create()

db_data.update_validation_layout(models.ValidationLayout(
mode=models.ValidationMode.GT_POOL,
Expand Down
9 changes: 5 additions & 4 deletions cvat/apps/quality_control/quality_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -2303,21 +2303,22 @@ def _compute_reports(self, task_id: int) -> int:
if validation_layout.mode == ValidationMode.GT_POOL:
task_frame_provider = TaskFrameProvider(task)
active_validation_frames = set(
task_frame_provider.get_rel_frame_number(frame)
for frame, real_frame in (
task_frame_provider.get_rel_frame_number(abs_frame)
for abs_frame, abs_real_frame in (
Image.objects.filter(data=task.data, is_placeholder=True)
.values_list("frame", "real_frame")
.iterator(chunk_size=10000)
)
if real_frame in active_validation_frames
if task_frame_provider.get_rel_frame_number(abs_real_frame)
in active_validation_frames
)

jobs: List[Job] = [j for j in job_queryset if j.type == JobType.ANNOTATION]
job_data_providers = {
job.id: JobDataProvider(
job.id,
queryset=job_queryset,
included_frames=set(job.segment.frame_set) & active_validation_frames,
included_frames=active_validation_frames,
)
for job in jobs
}
Expand Down
95 changes: 89 additions & 6 deletions tests/python/rest_api/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
ClassVar,
Dict,
Generator,
Iterable,
List,
Optional,
Sequence,
Expand Down Expand Up @@ -1529,12 +1530,13 @@ def _create_task_with_cloud_data(
server_files: List[str],
use_cache: bool = True,
sorting_method: str = "lexicographical",
spec: Optional[Dict[str, Any]] = None,
data_type: str = "image",
video_frame_count: int = 10,
server_files_exclude: Optional[List[str]] = None,
org: Optional[str] = None,
org: str = "",
filenames: Optional[List[str]] = None,
task_spec_kwargs: Optional[Dict[str, Any]] = None,
data_spec_kwargs: Optional[Dict[str, Any]] = None,
) -> Tuple[int, Any]:
s3_client = s3.make_client(bucket=cloud_storage["resource"])
if data_type == "video":
Expand All @@ -1551,7 +1553,9 @@ def _create_task_with_cloud_data(
)
else:
images = generate_image_files(
3, **({"prefixes": ["img_"] * 3} if not filenames else {"filenames": filenames})
3,
sizes=[(100, 50) if i % 2 else (50, 100) for i in range(3)],
**({"prefixes": ["img_"] * 3} if not filenames else {"filenames": filenames}),
)

for image in images:
Expand Down Expand Up @@ -1598,6 +1602,7 @@ def _create_task_with_cloud_data(
"name": "car",
}
],
**(task_spec_kwargs or {}),
}

data_spec = {
Expand All @@ -1608,9 +1613,8 @@ def _create_task_with_cloud_data(
server_files if not use_manifest else server_files + ["test/manifest.jsonl"]
),
"sorting_method": sorting_method,
**(data_spec_kwargs or {}),
}
if spec is not None:
data_spec.update(spec)

if server_files_exclude:
data_spec["server_files_exclude"] = server_files_exclude
Expand Down Expand Up @@ -1984,7 +1988,7 @@ def test_create_task_with_cloud_storage_and_check_retrieve_data_meta(
use_cache=False,
server_files=["test/video/video.avi"],
org=org,
spec=data_spec,
data_spec_kwargs=data_spec,
data_type="video",
)

Expand Down Expand Up @@ -2550,6 +2554,85 @@ def test_can_create_task_with_gt_job_from_video(
else:
assert len(validation_frames) == validation_frames_count

@pytest.mark.with_external_services
@pytest.mark.parametrize("cloud_storage_id", [2])
@pytest.mark.parametrize(
"validation_mode",
[
models.ValidationMode("gt"),
models.ValidationMode("gt_pool"),
],
)
def test_can_create_task_with_validation_and_cloud_data(
self,
cloud_storage_id: int,
validation_mode: models.ValidationMode,
request: pytest.FixtureRequest,
admin_user: str,
cloud_storages: Iterable,
):
cloud_storage = cloud_storages[cloud_storage_id]
server_files = [f"test/sub_0/img_{i}.jpeg" for i in range(3)]
validation_frames = ["test/sub_0/img_1.jpeg"]

(task_id, _) = self._create_task_with_cloud_data(
request,
cloud_storage,
use_manifest=False,
server_files=server_files,
sorting_method=models.SortingMethod(
"random"
), # only random sorting can be used with gt_pool
data_spec_kwargs={
"validation_params": models.DataRequestValidationParams._from_openapi_data(
mode=validation_mode,
frames=validation_frames,
frame_selection_method=models.FrameSelectionMethod("manual"),
frames_per_job_count=1,
)
},
task_spec_kwargs={
# in case of gt_pool: each regular job will contain 1 regular and 1 validation frames,
# (number of validation frames is not included into segment_size)
"segment_size": 1,
},
)

with make_api_client(admin_user) as api_client:
# check that GT job was created
(paginated_jobs, _) = api_client.jobs_api.list(task_id=task_id, type="ground_truth")
assert 1 == len(paginated_jobs["results"])

(paginated_jobs, _) = api_client.jobs_api.list(task_id=task_id, type="annotation")
jobs_count = (
len(server_files) - len(validation_frames)
if validation_mode == models.ValidationMode("gt_pool")
else len(server_files)
)
assert jobs_count == len(paginated_jobs["results"])
# check that the returned meta of images corresponds to the chunk data
# Note: meta is based on the order of images from database
# while chunk with CS data is based on the order of images in a manifest
for job in paginated_jobs["results"]:
(job_meta, _) = api_client.jobs_api.retrieve_data_meta(job["id"])
(_, response) = api_client.jobs_api.retrieve_data(
job["id"], type="chunk", quality="compressed", index=0
)
chunk_file = io.BytesIO(response.data)
assert zipfile.is_zipfile(chunk_file)

with zipfile.ZipFile(chunk_file, "r") as chunk_archive:
chunk_images = {
int(os.path.splitext(name)[0]): np.array(
Image.open(io.BytesIO(chunk_archive.read(name)))
)
for name in chunk_archive.namelist()
}
chunk_images = dict(sorted(chunk_images.items(), key=lambda e: e[0]))

for img, img_meta in zip(chunk_images.values(), job_meta.frames):
assert (img.shape[0], img.shape[1]) == (img_meta.height, img_meta.width)


class _SourceDataType(str, Enum):
images = "images"
Expand Down
12 changes: 9 additions & 3 deletions tests/python/shared/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import subprocess
from contextlib import closing
from io import BytesIO
from typing import Generator, List, Optional
from typing import Generator, List, Optional, Tuple

import av
import av.video.reformatter
Expand All @@ -25,7 +25,11 @@ def generate_image_file(filename="image.png", size=(100, 50), color=(0, 0, 0)):


def generate_image_files(
count, prefixes=None, *, filenames: Optional[List[str]] = None
count: int,
*,
prefixes: Optional[List[str]] = None,
filenames: Optional[List[str]] = None,
sizes: Optional[List[Tuple[int, int]]] = None,
) -> List[BytesIO]:
assert not (prefixes and filenames), "prefixes cannot be used together with filenames"
assert not prefixes or len(prefixes) == count
Expand All @@ -35,7 +39,9 @@ def generate_image_files(
for i in range(count):
prefix = prefixes[i] if prefixes else ""
filename = f"{prefix}{i}.jpeg" if not filenames else filenames[i]
image = generate_image_file(filename, color=(i, i, i))
image = generate_image_file(
filename, color=(i, i, i), **({"size": sizes[i]}) if sizes else {}
)
images.append(image)

return images
Expand Down
2 changes: 1 addition & 1 deletion utils/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
This folder contains some useful utilities for Computer Vision Annotation Tool (CVAT).
To read about a certain utility please choose a link:

- [Command line interface for working with CVAT tasks](https://docs.cvat.ai/docs/manual/advanced/cli/)
- [Command line interface for working with CVAT tasks](https://docs.cvat.ai/docs/api_sdk/cli/)
Loading

0 comments on commit 60005d0

Please sign in to comment.