Merge pull request #11 from encore-ecosystem/dev

Merge dev with release v0.0.3
encore-ecosystem · Oct 30, 2024 · 7d0bafd · 7d0bafd
2 parents ca39736 + becdd4a
commit 7d0bafd
Show file tree

Hide file tree

Showing 42 changed files with 829 additions and 176 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,5 @@
 .idea
 dist
+/target
+examples/cv_dataset_converter/toy_datasets/*
+
diff --git a/examples/cv_dataset_converter/converter.py b/examples/cv_dataset_converter/converter.py
@@ -0,0 +1,49 @@
+from examples.cv_dataset_converter.utils.adapters import *
+from examples.cv_dataset_converter.utils.functions import *
+from examples.cv_dataset_converter.utils.variables import *
+
+from nodeflow import Converter
+import pathlib
+
+ADAPTER_LIST = [
+    COCO2YOLO_Adapter(),
+    YOLO2COCO_Adapter(),
+]
+
+
+def main():
+    converter = Converter(ADAPTER_LIST)
+
+    coco_dataset = converter.convert(
+        variable=YOLO_Reader().compute(
+            path_to_dataset=PathVariable(
+                value=pathlib.Path().resolve() / 'toy_datasets' / 'YOLO'
+            )
+        ),
+        to_type=COCO_Dataset
+    )
+    COCO_Writer().compute(
+        coco_dataset=coco_dataset,
+        target_path=PathVariable(
+            value=pathlib.Path().resolve() / 'toy_datasets' / 'COCO'
+        )
+    )
+
+    yolo_dataset = converter.convert(
+        variable=COCO_Reader().compute(
+            path_to_dataset=PathVariable(
+                value=pathlib.Path().resolve() / 'toy_datasets' / 'COCO'
+            )
+        ),
+        to_type=YOLO_Dataset
+    )
+    YOLO_Writer().compute(
+        yolo_dataset=yolo_dataset,
+        target_path=PathVariable(
+            value=pathlib.Path().resolve() / 'toy_datasets' / 'YOLO'
+        )
+    )
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/cv_dataset_converter/utils/__pycache__/adapters.cpython-310.pyc b/examples/cv_dataset_converter/utils/__pycache__/adapters.cpython-310.pyc
diff --git a/examples/cv_dataset_converter/utils/__pycache__/functions.cpython-310.pyc b/examples/cv_dataset_converter/utils/__pycache__/functions.cpython-310.pyc
diff --git a/examples/cv_dataset_converter/utils/__pycache__/variables.cpython-310.pyc b/examples/cv_dataset_converter/utils/__pycache__/variables.cpython-310.pyc
diff --git a/examples/cv_dataset_converter/utils/adapters.py b/examples/cv_dataset_converter/utils/adapters.py
@@ -0,0 +1,144 @@
+import numpy as np
+from PIL import Image
+
+from nodeflow.adapters import Adapter
+from examples.cv_dataset_converter.utils.variables import YOLO_Dataset, COCO_Dataset, PathVariable
+from tqdm import tqdm
+from shapely.geometry import Polygon
+
+import yaml
+
+
+class YOLO2COCO_Adapter(Adapter):
+    def convert(self, variable: YOLO_Dataset) -> COCO_Dataset:
+        assert isinstance(variable, YOLO_Dataset)
+
+        cat_id_to_name_mapping = [
+            {"id": idx, "name": cat_name}
+            for idx, cat_name in enumerate(variable.datayaml["names"])
+        ]
+
+        coco_anns = {}
+        for split in variable.anns:
+            coco_anns[split] = {
+                'images': [],
+                'annotations': [],
+                'categories': cat_id_to_name_mapping
+            }
+            image_id, annotation_id = 0, 0
+            for im_name, im_path in tqdm(variable.imgs[split].items()):
+                image = np.array(Image.open(im_path))
+                height, width, _ = image.shape
+                image_info = {
+                    "id"        : image_id,
+                    "file_name" : im_name,
+                    "width"     : width,
+                    "height"    : height,
+                }
+                coco_anns[split]["images"].append(image_info)
+
+                for txt_label, line in variable.anns[split].items():
+                    mapped_line = list(map(float, line[0].split()))
+                    if len(mapped_line) == 5:
+                        class_id, x_center, y_center, width, height = mapped_line
+
+                        x_min = int((x_center - width / 2) * image_info["width"])
+                        y_min = int((y_center - height / 2) * image_info["height"])
+                        bbox_width = int(width * image_info["width"])
+                        bbox_height = int(height * image_info["height"])
+
+                        annotation = {
+                            "id": annotation_id,
+                            "image_id": image_id,
+                            "category_id": int(class_id),
+                            "bbox": [x_min, y_min, bbox_width, bbox_height],
+                            "area": bbox_width * bbox_height,
+                            "iscrowd": 0,
+                        }
+                        coco_anns[split]["annotations"].append(annotation)
+                        annotation_id += 1
+                    elif len(mapped_line) > 5:
+                        class_id = mapped_line[0]
+                        converted_segmentation = []
+                        for i, coord in enumerate(mapped_line[1:]):
+                            converted_segmentation.append(
+                                coord * width) if i % 2 == 1 else converted_segmentation.append(coord * height)
+
+                        annotation = {
+                            "id": annotation_id,
+                            "image_id": image_id,
+                            "category_id": int(class_id),
+                            "segmentation": [converted_segmentation],
+                            "area": Polygon(zip(converted_segmentation[0::2], converted_segmentation[1::2])).area,
+                            "iscrowd": 0,
+                        }
+                        coco_anns[split]["annotations"].append(annotation)
+                        annotation_id += 1
+
+                image_id += 1
+
+        return COCO_Dataset(anns=coco_anns, imgs=variable.imgs, path=None)
+
+    def is_loses_information(self) -> bool:
+        return True
+
+
+class COCO2YOLO_Adapter(Adapter):
+    def convert(self, variable: COCO_Dataset) -> YOLO_Dataset:
+        # Output path
+        output_base_directory = variable.value.value.parent / "results" / "COCO"
+
+        yolo_anns = {}
+        for split in ["train", "valid", "test"]:
+            yolo_anns[split] = {}
+            category_mapping = {cat["id"]: cat["name"] for cat in variable.anns[split]["categories"]}
+            category_id_mapping = {cat["name"]: cat["id"] for cat in variable.anns[split]["categories"]}
+
+            for image in tqdm(variable.anns[split]["images"]):
+                image_id = image["id"]
+                im_name = image["file_name"]
+
+                for annotation in variable.anns[split]["annotations"]:
+                    category_id = category_id_mapping[category_mapping[annotation["category_id"]]]
+                    if annotation["image_id"] == image_id:
+                        if annotation.get("bbox"):
+                            x_center = (annotation["bbox"][0] + annotation["bbox"][2] / 2) / image["width"]
+                            y_center = (annotation["bbox"][1] + annotation["bbox"][3] / 2) / image["height"]
+                            width = annotation["bbox"][2] / image["width"]
+                            height = annotation["bbox"][3] / image["height"]
+                            if yolo_anns[split].get(im_name):
+                                yolo_anns[split][im_name] += f"{category_id} {x_center} {y_center} {width} {height}\n"
+                            else:
+                                yolo_anns[split][im_name] = f"{category_id} {x_center} {y_center} {width} {height}\n"
+
+                        elif annotation.get("segmentation"):
+                            im_width, im_height = image["width"], image["height"]
+                            converted_segmentation = []
+                            for i, coord in enumerate(annotation.get("segmentation")[0]):
+                                converted_segmentation.append(
+                                    coord / im_width) if i % 2 == 0 else converted_segmentation.append(coord / im_height)
+                            if yolo_anns[split].get(im_name):
+                                yolo_anns[split][im_name] += f"{category_id} {str(converted_segmentation)}\n"
+                            else:
+                                yolo_anns[split][im_name] = f"{category_id} {str(converted_segmentation)}\n"
+
+
+        yaml_file = f"path: {str(output_base_directory)}\n"
+        yaml_file += 'train: ../train\n'
+        yaml_file += 'val: ../valid\n'
+        yaml_file += 'test: ../test\n'
+        yaml_file += f'nc: {len(category_mapping)}\n'
+        yaml_file += f'names: {list(category_mapping.values())}\n'
+
+        yaml_file = yaml.load(yaml_file, Loader=yaml.SafeLoader)
+
+        return YOLO_Dataset(datayaml=yaml_file, anns=yolo_anns, imgs=variable.imgs, path=None)
+
+    def is_loses_information(self) -> bool:
+        return False
+
+
+__all__ = [
+    'YOLO2COCO_Adapter',
+    'COCO2YOLO_Adapter',
+]
diff --git a/examples/cv_dataset_converter/utils/functions.py b/examples/cv_dataset_converter/utils/functions.py
@@ -0,0 +1,135 @@
+from examples.cv_dataset_converter.utils.variables import PathVariable, YOLO_Dataset, COCO_Dataset
+from nodeflow.node import Function
+from nodeflow import Node
+
+import shutil
+import yaml
+import json
+
+
+class YOLO_Reader(Function):
+    def compute(self, path_to_dataset: PathVariable) -> YOLO_Dataset:
+        # validate path
+        assert isinstance(path_to_dataset, PathVariable)
+
+        # load .yaml file
+        # <deprecated>
+        # files = list(path_to_dataset.value.glob("*yaml"))
+        # # print(path_to_dataset.value)
+        # assert len(files) == 1, "Could not find .yaml file"
+        # datayaml_path = files[0]
+
+        datayaml_path = path_to_dataset / 'data.yaml'
+        assert datayaml_path.exists(), "Could not find .yaml file"
+
+        with open(datayaml_path, "r") as datayaml_file:
+            datayaml = yaml.load(datayaml_file, Loader=yaml.SafeLoader)
+
+        annotations, images = {}, {}
+        for split in ["train", "test", "valid"]:
+            annotations[split], images[split] = {}, {}
+
+            for label_file_path in (path_to_dataset / split / "labels").glob("*.txt"):
+                with open(label_file_path, "r") as label_file:
+                    annotations[split][label_file_path.name] = label_file.readlines()
+
+            images_directory = path_to_dataset / split / "images"
+            for image_file_path in images_directory.iterdir():
+                if image_file_path.suffix in ['.jpg', '.jpeg', '.png']:
+                    images[split][image_file_path.stem] = images_directory / image_file_path
+
+        return YOLO_Dataset(
+            path     = path_to_dataset,
+            datayaml = datayaml,
+            anns     = annotations,
+            imgs     = images,
+        )
+
+class COCO_Reader(Function):
+    def compute(self, path_to_dataset: PathVariable) -> COCO_Dataset:
+        annotations, images = {}, {}
+        for split in ["train", "test", "valid"]:
+            annotations[split], images[split] = {}, {}
+
+            shrinkage_superclass_mapping = {}
+            all_categories_mapping       = {}
+            assert (path_to_dataset / split / "_annotations.json").exists()
+            with open(path_to_dataset / split / "_annotations.json", "r") as f:
+                data = json.load(f)
+                for category in data["categories"]:
+                    all_categories_mapping[category["id"]] = category["name"]
+                    shrinkage_superclass_mapping[category["name"]] = shrinkage_superclass_mapping.get(category["name"], []) + [category["id"]]
+
+                for category in data["annotations"]:
+                    category['category_id'] = min(shrinkage_superclass_mapping[all_categories_mapping[category['category_id']]])
+
+                data['categories'] = [
+                    {
+                        'id'            : min(shrinkage_superclass_mapping[category]),
+                        'name'          : category,
+                        'supercategory' : 'none'
+                    }
+                    for category in shrinkage_superclass_mapping
+                ]
+                annotations[split] = data
+
+            images_directory = path_to_dataset / split
+            for image_file_path in images_directory.iterdir():
+                if image_file_path.suffix in ['.jpg', '.jpeg', '.png']:
+                    images[split][image_file_path.stem] = image_file_path / image_file_path
+
+        return COCO_Dataset(path=path_to_dataset, anns=annotations, imgs=images)
+
+class YOLO_Writer(Function):
+    def compute(self, yolo_dataset: YOLO_Dataset, target_path: PathVariable) -> Node:
+        root = target_path.value
+
+        # create directories
+        for split in ["train", "test", "valid"]:
+            (root / split / "labels").mkdir(parents=True, exist_ok=True)
+            (root / split / "images").mkdir(parents=True, exist_ok=True)
+
+            for image_path in yolo_dataset.imgs[split].values():
+                shutil.copy(
+                    src = image_path,
+                    dst = root / split / 'images' / image_path.name
+                )
+
+                with open(root / split / 'labels' / f"{image_path.stem}.txt", 'w') as label_file:
+                    label_file.write(yolo_dataset.anns[split][image_path.name])
+
+        with open(root / 'data.yaml', "w") as yaml_file:
+            datayaml = {
+                'train': yolo_dataset.datayaml['train'],
+                'val'  : yolo_dataset.datayaml['val'],
+                'test' : yolo_dataset.datayaml['test'],
+                'nc'   : yolo_dataset.datayaml['nc'],
+                'names': yolo_dataset.datayaml['names'],
+            }
+            yaml.dump(datayaml, yaml_file)
+
+
+class COCO_Writer(Function):
+    def compute(self, coco_dataset: COCO_Dataset, target_path: PathVariable) -> Node:
+        root = target_path.value
+
+        for split in ["train", "test", "valid"]:
+            (root / split).mkdir(parents=True, exist_ok=True)
+
+            for image_path in coco_dataset.imgs[split].values():
+                shutil.copy(
+                    src = image_path,
+                    dst = root / split / image_path.name
+                )
+
+            with open(root / split / '_annotations.json', 'w') as json_file:
+                json.dump(coco_dataset.anns[split], json_file, indent=4)
+
+
+__all__ = [
+    "YOLO_Reader",
+    "COCO_Reader",
+    "YOLO_Writer",
+    "COCO_Writer",
+]
+