add regression task dataset format (#44)

microsoft · Sep 22, 2022 · c7ced3a · c7ced3a
1 parent 264eb47
commit c7ced3a
Show file tree

Hide file tree

Showing 8 changed files with 81 additions and 11 deletions.
diff --git a/COCO_DATA_FORMAT.md b/COCO_DATA_FORMAT.md
@@ -94,3 +94,18 @@ Specifically, **only** image files are supported for the label files. The ground
     ]
 }
 ```
+
+## Image regression
+
+Here is one example of the json file for the image regression task, where the "target" in the "annotations" field is a real-valued number (e.g. a score, an age, etc.). Note that each image should only have one regression target (i.e. there should be exactly one annotation for each image).
+
+``` {json}
+{
+    "images": [{"id": 1, "width": 224.0, "height": 224.0, "file_name": "train_images/image_1.jpg", "zip_file": "train_images.zip"},
+              {"id": 2, "width": 224.0, "height": 224.0, "file_name": "train_images/image_2.jpg", "zip_file": "train_images.zip"}],
+    "annotations": [
+        {"id": 1, "image_id": 1, "target": 102.0},
+        {"id": 2, "image_id": 2, "target": 28.5}
+    ]
+}
+```
diff --git a/README.md b/README.md
@@ -8,13 +8,14 @@ This repo
 - provides many commonly used dataset operation, such as sample dataset by categories, sample few-shot sub-dataset, sample dataset by ratios, train-test split, merge dataset, etc. (See here [Link](vision_datasets/common/data_manifest.py) for available utilities)
 - provides API for organizing and accessing datasets, via `DatasetHub`
 
-Currently, six `basic` types of data are supported: 
+Currently, seven `basic` types of data are supported: 
 - `classification_multiclass`: each image can is only with one label.
 - `classification_multilabel`: each image can is with one or multiple labels (e.g., 'cat', 'animal', 'pet').
 - `object_detection`: each image is labeled with bounding boxes surrounding the objects of interest.
 - `image_caption`: each image is labeled with a few texts describing the images.
 - `image_text_matching`: each image is associated with a collection of texts describing the image, and whether each text description matches the image or not.
 - `image_matting`: each image has a pixel-wise annotation, where each pixel is labeled as 'foreground' or 'background'.
+- `image_regression`: each image is labeled with a real-valued numeric regression target.
 
 `multitask` type is a composition type, where one set of images has multiple sets of annotations available for different tasks, where each task can be of any basic type.
 

diff --git a/setup.py b/setup.py
@@ -1,7 +1,7 @@
 import setuptools
 from os import path
 
-VERSION = '0.2.17'
+VERSION = '0.2.18'
 
 # Get the long description from the README file
 here = path.abspath(path.dirname(__file__))

diff --git a/tests/test_coco_format_manifest.py b/tests/test_coco_format_manifest.py
@@ -223,3 +223,19 @@ def test_multitask_ic_multilabel_and_image_matting(self):
         self.assertTrue(np.array_equal(dataset_manifest.images[0].labels['task2'][0], img_0_matting, equal_nan=True))
         self.assertEqual(dataset_manifest.images[1].labels['task1'],  [0, 1])
         self.assertTrue(np.array_equal(dataset_manifest.images[1].labels['task2'][0], img_1_matting, equal_nan=True))
+
+    def test_image_regression_manifest(self):
+        image_regression_manifest = {
+            "images": [{"id": 1, "file_name": "[email protected]"},
+                       {"id": 2, "file_name": "[email protected]"}],
+            "annotations": [
+                {"id": 1, "image_id": 1, "target": 1.0},
+                {"id": 2, "image_id": 2, "target": 2.0},
+            ]
+        }
+
+        dataset_manifest = TestCases.get_manifest(DatasetTypes.IMAGE_REGRESSION, 0)
+        self.assertIsInstance(dataset_manifest, DatasetManifest)
+        self.assertEqual(len(dataset_manifest.images), 2)
+        self.assertEqual(dataset_manifest.images[0].labels, [image_regression_manifest["annotations"][0]["target"]])
+        self.assertEqual(dataset_manifest.images[1].labels, [image_regression_manifest["annotations"][1]["target"]])
diff --git a/tests/test_dataset_manifest.py b/tests/test_dataset_manifest.py
@@ -201,13 +201,40 @@ class TestCases:
             ]
         }]
 
+    image_regression_manifest_dicts = [
+        {
+            "images": [{"id": 1, "file_name": "[email protected]"},
+                       {"id": 2, "file_name": "[email protected]"}],
+            "annotations": [
+                {"id": 1, "image_id": 1, "target": 1.0},
+                {"id": 2, "image_id": 2, "target": 2.0},
+            ]
+        },
+        {
+            "images": [{"id": 1, "file_name": "[email protected]"},
+                       {"id": 2, "file_name": "[email protected]"}],
+            "annotations": [
+                {"id": 1, "image_id": 1, "target": 3.0},
+                {"id": 2, "image_id": 2, "target": 4.0},
+            ]
+        },
+        {
+            "images": [{"id": 1, "file_name": "[email protected]"},
+                       {"id": 2, "file_name": "[email protected]"}],
+            "annotations": [
+                {"id": 1, "image_id": 1, "target": 5.0},
+                {"id": 2, "image_id": 2, "target": 6.0},
+            ],
+        }]
+
     manifest_dict_by_data_type = {
         DatasetTypes.IC_MULTILABEL: ic_manifest_dicts,
         DatasetTypes.IC_MULTICLASS: ic_manifest_dicts,
         DatasetTypes.OD: od_manifest_dicts,
         DatasetTypes.IMCAP: cap_manifest_dicts,
         DatasetTypes.IMAGE_TEXT_MATCHING: image_text_manifest_dicts,
-        DatasetTypes.IMAGE_MATTING: image_matting_manifest_dicts
+        DatasetTypes.IMAGE_MATTING: image_matting_manifest_dicts,
+        DatasetTypes.IMAGE_REGRESSION: image_regression_manifest_dicts
     }
 
     @staticmethod
@@ -600,7 +627,7 @@ def test_spawn_multitask_manifest(self):
 
 class TestCocoGeneration(unittest.TestCase):
     def test_coco_generation(self):
-        for data_type in [DatasetTypes.IC_MULTICLASS, DatasetTypes.IC_MULTILABEL, DatasetTypes.OD, DatasetTypes.IMCAP]:
+        for data_type in [DatasetTypes.IC_MULTICLASS, DatasetTypes.IC_MULTILABEL, DatasetTypes.OD, DatasetTypes.IMCAP, DatasetTypes.IMAGE_REGRESSION]:
             for i in range(len(TestCases.manifest_dict_by_data_type[data_type])):
                 manifest = TestCases.get_manifest(data_type, i)
                 coco_dict = manifest.generate_coco_annotations()

diff --git a/vision_datasets/commands/dataset_check.py b/vision_datasets/commands/dataset_check.py
@@ -46,7 +46,7 @@ def check_images(dataset: ManifestDataset, err_msg_file: pathlib.Path):
         err_msg_file.write_text('\n'.join(file_not_found_list))
 
 
-def classification_detection_check(dataset):
+def classification_detection_check(dataset: ManifestDataset):
     n_imgs_by_class = {x: 0 for x in range(len(dataset.labels))}
     for sample in dataset.dataset_manifest.images:
         labels = sample.labels
@@ -141,6 +141,9 @@ def main():
                 quick_check_images(dataset)
             else:
                 check_images(dataset, err_msg_file)
+
+            if args.data_type in [DatasetTypes.IC_MULTICLASS, DatasetTypes.IC_MULTILABEL, DatasetTypes.OD]:
+                classification_detection_check(dataset)
         else:
             logger.info(f'{prefix} No split for {usage} available.')
 

diff --git a/vision_datasets/common/constants.py b/vision_datasets/common/constants.py
@@ -6,7 +6,8 @@ class DatasetTypes:
     IMCAP = 'image_caption'
     IMAGE_TEXT_MATCHING = 'image_text_matching'
     IMAGE_MATTING = 'image_matting'
-    VALID_TYPES = [IC_MULTILABEL, IC_MULTICLASS, OD, MULTITASK, IMCAP, IMAGE_TEXT_MATCHING, IMAGE_MATTING]
+    IMAGE_REGRESSION = 'image_regression'
+    VALID_TYPES = [IC_MULTILABEL, IC_MULTICLASS, OD, MULTITASK, IMCAP, IMAGE_TEXT_MATCHING, IMAGE_MATTING, IMAGE_REGRESSION]
 
     @staticmethod
     def is_classification(dataset_type):

diff --git a/vision_datasets/common/data_manifest.py b/vision_datasets/common/data_manifest.py
@@ -101,7 +101,8 @@ def __init__(self, id, img_path, width, height, labels, label_file_paths=None, l
                 image_caption: [caption1, caption2, ...];
                 image_text_matching: [(text1, match (0 or 1), text2, match (0 or 1), ...)];
                 multitask: dict[task, labels];
-                image_matting: [mask1, mask2, ...], each mask is a 2D numpy array that has the same width and height with the image.
+                image_matting: [mask1, mask2, ...], each mask is a 2D numpy array that has the same width and height with the image;
+                image_regression: [target1].
             label_file_paths (list): list of paths of the image label files. "label_file_paths" only works for image matting task.
             labels_extra_info (dict[string, list]]): extra information about this image's labels
                 Examples: 'iscrowd'
@@ -224,7 +225,7 @@ def _is_negative(self, labels):
 
     def generate_coco_annotations(self):
         """
-        Generate coco annotations, working for single task classification, detection and caption only
+        Generate coco annotations, working for single task classification, detection, caption, and image regression only
 
         Returns:
             A dict of annotation data ready for coco json dump
@@ -255,13 +256,15 @@ def generate_coco_annotations(self):
                     coco_ann['bbox'] = [ann[1], ann[2], ann[3] - ann[1], ann[4] - ann[2]]
                 elif self.data_type == DatasetTypes.IMCAP:
                     coco_ann['caption'] = ann
+                elif self.data_type == DatasetTypes.IMAGE_REGRESSION:
+                    coco_ann['target'] = ann
                 else:
                     raise ValueError(f'Unsupported data type {self.data_type}')
 
                 annotations.append(coco_ann)
 
         coco_dict = {'images': images, 'annotations': annotations}
-        if self.data_type != DatasetTypes.IMCAP:
+        if self.data_type not in [DatasetTypes.IMCAP, DatasetTypes.IMAGE_REGRESSION]:
             coco_dict['categories'] = [{'id': i + 1, 'name': x} for i, x in enumerate(self.labelmap)]
 
         return coco_dict
@@ -577,7 +580,7 @@ def _merge_with_same_labelmap(*args):
     def _merge_with_concat(*args):
         data_type = args[0].data_type
 
-        if data_type == DatasetTypes.IMCAP:
+        if data_type in [DatasetTypes.IMCAP, DatasetTypes.IMAGE_REGRESSION]:
             return DatasetManifest._merge_with_same_labelmap(args)
 
         if isinstance(data_type, dict):  # multitask
@@ -677,7 +680,7 @@ def create_dataset_manifest(dataset_info, usage: str, container_sas_or_root_dir:
         assert dataset_info
         assert usage
 
-        if dataset_info.type in [DatasetTypes.IMCAP, DatasetTypes.IMAGE_TEXT_MATCHING, DatasetTypes.IMAGE_MATTING]:
+        if dataset_info.type in [DatasetTypes.IMCAP, DatasetTypes.IMAGE_TEXT_MATCHING, DatasetTypes.IMAGE_MATTING, DatasetTypes.IMAGE_REGRESSION]:
             raise ValueError(f'Iris format is not supported for {dataset_info.type} task, please use COCO format!')
         if isinstance(dataset_info, MultiTaskDatasetInfo):
             dataset_manifest_by_task = {k: IrisManifestAdaptor.create_dataset_manifest(task_info, usage, container_sas_or_root_dir) for k, task_info in dataset_info.sub_task_infos.items()}
@@ -827,6 +830,10 @@ def process_labels_without_categories(image):
             def process_labels_without_categories(image):
                 image.label_file_paths = image.label_file_paths or []
                 image.label_file_paths.append(get_file_path(annotation, annotation['label']))
+        elif data_type == DatasetTypes.IMAGE_REGRESSION:
+            def process_labels_without_categories(image):
+                assert len(image.labels) == 0, f"There should be exactly one label per image for image_regression datasets, but image with id {annotation['image_id']} has more than one"
+                image.labels.append(annotation['target'])
 
         if process_labels_without_categories:
             for annotation in coco_manifest['annotations']: