Skip to content

Commit

Permalink
Merge pull request #11 from encore-ecosystem/dev
Browse files Browse the repository at this point in the history
Merge dev with release v0.0.3
  • Loading branch information
meshushkevich authored Oct 30, 2024
2 parents ca39736 + becdd4a commit 7d0bafd
Show file tree
Hide file tree
Showing 42 changed files with 829 additions and 176 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
.idea
dist
/target
examples/cv_dataset_converter/toy_datasets/*

49 changes: 49 additions & 0 deletions examples/cv_dataset_converter/converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from examples.cv_dataset_converter.utils.adapters import *
from examples.cv_dataset_converter.utils.functions import *
from examples.cv_dataset_converter.utils.variables import *

from nodeflow import Converter
import pathlib

ADAPTER_LIST = [
COCO2YOLO_Adapter(),
YOLO2COCO_Adapter(),
]


def main():
converter = Converter(ADAPTER_LIST)

coco_dataset = converter.convert(
variable=YOLO_Reader().compute(
path_to_dataset=PathVariable(
value=pathlib.Path().resolve() / 'toy_datasets' / 'YOLO'
)
),
to_type=COCO_Dataset
)
COCO_Writer().compute(
coco_dataset=coco_dataset,
target_path=PathVariable(
value=pathlib.Path().resolve() / 'toy_datasets' / 'COCO'
)
)

yolo_dataset = converter.convert(
variable=COCO_Reader().compute(
path_to_dataset=PathVariable(
value=pathlib.Path().resolve() / 'toy_datasets' / 'COCO'
)
),
to_type=YOLO_Dataset
)
YOLO_Writer().compute(
yolo_dataset=yolo_dataset,
target_path=PathVariable(
value=pathlib.Path().resolve() / 'toy_datasets' / 'YOLO'
)
)


if __name__ == '__main__':
main()
Binary file not shown.
Binary file not shown.
Binary file not shown.
144 changes: 144 additions & 0 deletions examples/cv_dataset_converter/utils/adapters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import numpy as np
from PIL import Image

from nodeflow.adapters import Adapter
from examples.cv_dataset_converter.utils.variables import YOLO_Dataset, COCO_Dataset, PathVariable
from tqdm import tqdm
from shapely.geometry import Polygon

import yaml


class YOLO2COCO_Adapter(Adapter):
def convert(self, variable: YOLO_Dataset) -> COCO_Dataset:
assert isinstance(variable, YOLO_Dataset)

cat_id_to_name_mapping = [
{"id": idx, "name": cat_name}
for idx, cat_name in enumerate(variable.datayaml["names"])
]

coco_anns = {}
for split in variable.anns:
coco_anns[split] = {
'images': [],
'annotations': [],
'categories': cat_id_to_name_mapping
}
image_id, annotation_id = 0, 0
for im_name, im_path in tqdm(variable.imgs[split].items()):
image = np.array(Image.open(im_path))
height, width, _ = image.shape
image_info = {
"id" : image_id,
"file_name" : im_name,
"width" : width,
"height" : height,
}
coco_anns[split]["images"].append(image_info)

for txt_label, line in variable.anns[split].items():
mapped_line = list(map(float, line[0].split()))
if len(mapped_line) == 5:
class_id, x_center, y_center, width, height = mapped_line

x_min = int((x_center - width / 2) * image_info["width"])
y_min = int((y_center - height / 2) * image_info["height"])
bbox_width = int(width * image_info["width"])
bbox_height = int(height * image_info["height"])

annotation = {
"id": annotation_id,
"image_id": image_id,
"category_id": int(class_id),
"bbox": [x_min, y_min, bbox_width, bbox_height],
"area": bbox_width * bbox_height,
"iscrowd": 0,
}
coco_anns[split]["annotations"].append(annotation)
annotation_id += 1
elif len(mapped_line) > 5:
class_id = mapped_line[0]
converted_segmentation = []
for i, coord in enumerate(mapped_line[1:]):
converted_segmentation.append(
coord * width) if i % 2 == 1 else converted_segmentation.append(coord * height)

annotation = {
"id": annotation_id,
"image_id": image_id,
"category_id": int(class_id),
"segmentation": [converted_segmentation],
"area": Polygon(zip(converted_segmentation[0::2], converted_segmentation[1::2])).area,
"iscrowd": 0,
}
coco_anns[split]["annotations"].append(annotation)
annotation_id += 1

image_id += 1

return COCO_Dataset(anns=coco_anns, imgs=variable.imgs, path=None)

def is_loses_information(self) -> bool:
return True


class COCO2YOLO_Adapter(Adapter):
def convert(self, variable: COCO_Dataset) -> YOLO_Dataset:
# Output path
output_base_directory = variable.value.value.parent / "results" / "COCO"

yolo_anns = {}
for split in ["train", "valid", "test"]:
yolo_anns[split] = {}
category_mapping = {cat["id"]: cat["name"] for cat in variable.anns[split]["categories"]}
category_id_mapping = {cat["name"]: cat["id"] for cat in variable.anns[split]["categories"]}

for image in tqdm(variable.anns[split]["images"]):
image_id = image["id"]
im_name = image["file_name"]

for annotation in variable.anns[split]["annotations"]:
category_id = category_id_mapping[category_mapping[annotation["category_id"]]]
if annotation["image_id"] == image_id:
if annotation.get("bbox"):
x_center = (annotation["bbox"][0] + annotation["bbox"][2] / 2) / image["width"]
y_center = (annotation["bbox"][1] + annotation["bbox"][3] / 2) / image["height"]
width = annotation["bbox"][2] / image["width"]
height = annotation["bbox"][3] / image["height"]
if yolo_anns[split].get(im_name):
yolo_anns[split][im_name] += f"{category_id} {x_center} {y_center} {width} {height}\n"
else:
yolo_anns[split][im_name] = f"{category_id} {x_center} {y_center} {width} {height}\n"

elif annotation.get("segmentation"):
im_width, im_height = image["width"], image["height"]
converted_segmentation = []
for i, coord in enumerate(annotation.get("segmentation")[0]):
converted_segmentation.append(
coord / im_width) if i % 2 == 0 else converted_segmentation.append(coord / im_height)
if yolo_anns[split].get(im_name):
yolo_anns[split][im_name] += f"{category_id} {str(converted_segmentation)}\n"
else:
yolo_anns[split][im_name] = f"{category_id} {str(converted_segmentation)}\n"


yaml_file = f"path: {str(output_base_directory)}\n"
yaml_file += 'train: ../train\n'
yaml_file += 'val: ../valid\n'
yaml_file += 'test: ../test\n'
yaml_file += f'nc: {len(category_mapping)}\n'
yaml_file += f'names: {list(category_mapping.values())}\n'

yaml_file = yaml.load(yaml_file, Loader=yaml.SafeLoader)

return YOLO_Dataset(datayaml=yaml_file, anns=yolo_anns, imgs=variable.imgs, path=None)

def is_loses_information(self) -> bool:
return False


__all__ = [
'YOLO2COCO_Adapter',
'COCO2YOLO_Adapter',
]
135 changes: 135 additions & 0 deletions examples/cv_dataset_converter/utils/functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
from examples.cv_dataset_converter.utils.variables import PathVariable, YOLO_Dataset, COCO_Dataset
from nodeflow.node import Function
from nodeflow import Node

import shutil
import yaml
import json


class YOLO_Reader(Function):
def compute(self, path_to_dataset: PathVariable) -> YOLO_Dataset:
# validate path
assert isinstance(path_to_dataset, PathVariable)

# load .yaml file
# <deprecated>
# files = list(path_to_dataset.value.glob("*yaml"))
# # print(path_to_dataset.value)
# assert len(files) == 1, "Could not find .yaml file"
# datayaml_path = files[0]

datayaml_path = path_to_dataset / 'data.yaml'
assert datayaml_path.exists(), "Could not find .yaml file"

with open(datayaml_path, "r") as datayaml_file:
datayaml = yaml.load(datayaml_file, Loader=yaml.SafeLoader)

annotations, images = {}, {}
for split in ["train", "test", "valid"]:
annotations[split], images[split] = {}, {}

for label_file_path in (path_to_dataset / split / "labels").glob("*.txt"):
with open(label_file_path, "r") as label_file:
annotations[split][label_file_path.name] = label_file.readlines()

images_directory = path_to_dataset / split / "images"
for image_file_path in images_directory.iterdir():
if image_file_path.suffix in ['.jpg', '.jpeg', '.png']:
images[split][image_file_path.stem] = images_directory / image_file_path

return YOLO_Dataset(
path = path_to_dataset,
datayaml = datayaml,
anns = annotations,
imgs = images,
)

class COCO_Reader(Function):
def compute(self, path_to_dataset: PathVariable) -> COCO_Dataset:
annotations, images = {}, {}
for split in ["train", "test", "valid"]:
annotations[split], images[split] = {}, {}

shrinkage_superclass_mapping = {}
all_categories_mapping = {}
assert (path_to_dataset / split / "_annotations.json").exists()
with open(path_to_dataset / split / "_annotations.json", "r") as f:
data = json.load(f)
for category in data["categories"]:
all_categories_mapping[category["id"]] = category["name"]
shrinkage_superclass_mapping[category["name"]] = shrinkage_superclass_mapping.get(category["name"], []) + [category["id"]]

for category in data["annotations"]:
category['category_id'] = min(shrinkage_superclass_mapping[all_categories_mapping[category['category_id']]])

data['categories'] = [
{
'id' : min(shrinkage_superclass_mapping[category]),
'name' : category,
'supercategory' : 'none'
}
for category in shrinkage_superclass_mapping
]
annotations[split] = data

images_directory = path_to_dataset / split
for image_file_path in images_directory.iterdir():
if image_file_path.suffix in ['.jpg', '.jpeg', '.png']:
images[split][image_file_path.stem] = image_file_path / image_file_path

return COCO_Dataset(path=path_to_dataset, anns=annotations, imgs=images)

class YOLO_Writer(Function):
def compute(self, yolo_dataset: YOLO_Dataset, target_path: PathVariable) -> Node:
root = target_path.value

# create directories
for split in ["train", "test", "valid"]:
(root / split / "labels").mkdir(parents=True, exist_ok=True)
(root / split / "images").mkdir(parents=True, exist_ok=True)

for image_path in yolo_dataset.imgs[split].values():
shutil.copy(
src = image_path,
dst = root / split / 'images' / image_path.name
)

with open(root / split / 'labels' / f"{image_path.stem}.txt", 'w') as label_file:
label_file.write(yolo_dataset.anns[split][image_path.name])

with open(root / 'data.yaml', "w") as yaml_file:
datayaml = {
'train': yolo_dataset.datayaml['train'],
'val' : yolo_dataset.datayaml['val'],
'test' : yolo_dataset.datayaml['test'],
'nc' : yolo_dataset.datayaml['nc'],
'names': yolo_dataset.datayaml['names'],
}
yaml.dump(datayaml, yaml_file)


class COCO_Writer(Function):
def compute(self, coco_dataset: COCO_Dataset, target_path: PathVariable) -> Node:
root = target_path.value

for split in ["train", "test", "valid"]:
(root / split).mkdir(parents=True, exist_ok=True)

for image_path in coco_dataset.imgs[split].values():
shutil.copy(
src = image_path,
dst = root / split / image_path.name
)

with open(root / split / '_annotations.json', 'w') as json_file:
json.dump(coco_dataset.anns[split], json_file, indent=4)


__all__ = [
"YOLO_Reader",
"COCO_Reader",
"YOLO_Writer",
"COCO_Writer",
]

Loading

0 comments on commit 7d0bafd

Please sign in to comment.