-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #11 from encore-ecosystem/dev
Merge dev with release v0.0.3
- Loading branch information
Showing
42 changed files
with
829 additions
and
176 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,5 @@ | ||
.idea | ||
dist | ||
/target | ||
examples/cv_dataset_converter/toy_datasets/* | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from examples.cv_dataset_converter.utils.adapters import * | ||
from examples.cv_dataset_converter.utils.functions import * | ||
from examples.cv_dataset_converter.utils.variables import * | ||
|
||
from nodeflow import Converter | ||
import pathlib | ||
|
||
ADAPTER_LIST = [ | ||
COCO2YOLO_Adapter(), | ||
YOLO2COCO_Adapter(), | ||
] | ||
|
||
|
||
def main(): | ||
converter = Converter(ADAPTER_LIST) | ||
|
||
coco_dataset = converter.convert( | ||
variable=YOLO_Reader().compute( | ||
path_to_dataset=PathVariable( | ||
value=pathlib.Path().resolve() / 'toy_datasets' / 'YOLO' | ||
) | ||
), | ||
to_type=COCO_Dataset | ||
) | ||
COCO_Writer().compute( | ||
coco_dataset=coco_dataset, | ||
target_path=PathVariable( | ||
value=pathlib.Path().resolve() / 'toy_datasets' / 'COCO' | ||
) | ||
) | ||
|
||
yolo_dataset = converter.convert( | ||
variable=COCO_Reader().compute( | ||
path_to_dataset=PathVariable( | ||
value=pathlib.Path().resolve() / 'toy_datasets' / 'COCO' | ||
) | ||
), | ||
to_type=YOLO_Dataset | ||
) | ||
YOLO_Writer().compute( | ||
yolo_dataset=yolo_dataset, | ||
target_path=PathVariable( | ||
value=pathlib.Path().resolve() / 'toy_datasets' / 'YOLO' | ||
) | ||
) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
Binary file added
BIN
+4.63 KB
examples/cv_dataset_converter/utils/__pycache__/adapters.cpython-310.pyc
Binary file not shown.
Binary file added
BIN
+949 Bytes
examples/cv_dataset_converter/utils/__pycache__/functions.cpython-310.pyc
Binary file not shown.
Binary file added
BIN
+4.58 KB
examples/cv_dataset_converter/utils/__pycache__/variables.cpython-310.pyc
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
import numpy as np | ||
from PIL import Image | ||
|
||
from nodeflow.adapters import Adapter | ||
from examples.cv_dataset_converter.utils.variables import YOLO_Dataset, COCO_Dataset, PathVariable | ||
from tqdm import tqdm | ||
from shapely.geometry import Polygon | ||
|
||
import yaml | ||
|
||
|
||
class YOLO2COCO_Adapter(Adapter): | ||
def convert(self, variable: YOLO_Dataset) -> COCO_Dataset: | ||
assert isinstance(variable, YOLO_Dataset) | ||
|
||
cat_id_to_name_mapping = [ | ||
{"id": idx, "name": cat_name} | ||
for idx, cat_name in enumerate(variable.datayaml["names"]) | ||
] | ||
|
||
coco_anns = {} | ||
for split in variable.anns: | ||
coco_anns[split] = { | ||
'images': [], | ||
'annotations': [], | ||
'categories': cat_id_to_name_mapping | ||
} | ||
image_id, annotation_id = 0, 0 | ||
for im_name, im_path in tqdm(variable.imgs[split].items()): | ||
image = np.array(Image.open(im_path)) | ||
height, width, _ = image.shape | ||
image_info = { | ||
"id" : image_id, | ||
"file_name" : im_name, | ||
"width" : width, | ||
"height" : height, | ||
} | ||
coco_anns[split]["images"].append(image_info) | ||
|
||
for txt_label, line in variable.anns[split].items(): | ||
mapped_line = list(map(float, line[0].split())) | ||
if len(mapped_line) == 5: | ||
class_id, x_center, y_center, width, height = mapped_line | ||
|
||
x_min = int((x_center - width / 2) * image_info["width"]) | ||
y_min = int((y_center - height / 2) * image_info["height"]) | ||
bbox_width = int(width * image_info["width"]) | ||
bbox_height = int(height * image_info["height"]) | ||
|
||
annotation = { | ||
"id": annotation_id, | ||
"image_id": image_id, | ||
"category_id": int(class_id), | ||
"bbox": [x_min, y_min, bbox_width, bbox_height], | ||
"area": bbox_width * bbox_height, | ||
"iscrowd": 0, | ||
} | ||
coco_anns[split]["annotations"].append(annotation) | ||
annotation_id += 1 | ||
elif len(mapped_line) > 5: | ||
class_id = mapped_line[0] | ||
converted_segmentation = [] | ||
for i, coord in enumerate(mapped_line[1:]): | ||
converted_segmentation.append( | ||
coord * width) if i % 2 == 1 else converted_segmentation.append(coord * height) | ||
|
||
annotation = { | ||
"id": annotation_id, | ||
"image_id": image_id, | ||
"category_id": int(class_id), | ||
"segmentation": [converted_segmentation], | ||
"area": Polygon(zip(converted_segmentation[0::2], converted_segmentation[1::2])).area, | ||
"iscrowd": 0, | ||
} | ||
coco_anns[split]["annotations"].append(annotation) | ||
annotation_id += 1 | ||
|
||
image_id += 1 | ||
|
||
return COCO_Dataset(anns=coco_anns, imgs=variable.imgs, path=None) | ||
|
||
def is_loses_information(self) -> bool: | ||
return True | ||
|
||
|
||
class COCO2YOLO_Adapter(Adapter): | ||
def convert(self, variable: COCO_Dataset) -> YOLO_Dataset: | ||
# Output path | ||
output_base_directory = variable.value.value.parent / "results" / "COCO" | ||
|
||
yolo_anns = {} | ||
for split in ["train", "valid", "test"]: | ||
yolo_anns[split] = {} | ||
category_mapping = {cat["id"]: cat["name"] for cat in variable.anns[split]["categories"]} | ||
category_id_mapping = {cat["name"]: cat["id"] for cat in variable.anns[split]["categories"]} | ||
|
||
for image in tqdm(variable.anns[split]["images"]): | ||
image_id = image["id"] | ||
im_name = image["file_name"] | ||
|
||
for annotation in variable.anns[split]["annotations"]: | ||
category_id = category_id_mapping[category_mapping[annotation["category_id"]]] | ||
if annotation["image_id"] == image_id: | ||
if annotation.get("bbox"): | ||
x_center = (annotation["bbox"][0] + annotation["bbox"][2] / 2) / image["width"] | ||
y_center = (annotation["bbox"][1] + annotation["bbox"][3] / 2) / image["height"] | ||
width = annotation["bbox"][2] / image["width"] | ||
height = annotation["bbox"][3] / image["height"] | ||
if yolo_anns[split].get(im_name): | ||
yolo_anns[split][im_name] += f"{category_id} {x_center} {y_center} {width} {height}\n" | ||
else: | ||
yolo_anns[split][im_name] = f"{category_id} {x_center} {y_center} {width} {height}\n" | ||
|
||
elif annotation.get("segmentation"): | ||
im_width, im_height = image["width"], image["height"] | ||
converted_segmentation = [] | ||
for i, coord in enumerate(annotation.get("segmentation")[0]): | ||
converted_segmentation.append( | ||
coord / im_width) if i % 2 == 0 else converted_segmentation.append(coord / im_height) | ||
if yolo_anns[split].get(im_name): | ||
yolo_anns[split][im_name] += f"{category_id} {str(converted_segmentation)}\n" | ||
else: | ||
yolo_anns[split][im_name] = f"{category_id} {str(converted_segmentation)}\n" | ||
|
||
|
||
yaml_file = f"path: {str(output_base_directory)}\n" | ||
yaml_file += 'train: ../train\n' | ||
yaml_file += 'val: ../valid\n' | ||
yaml_file += 'test: ../test\n' | ||
yaml_file += f'nc: {len(category_mapping)}\n' | ||
yaml_file += f'names: {list(category_mapping.values())}\n' | ||
|
||
yaml_file = yaml.load(yaml_file, Loader=yaml.SafeLoader) | ||
|
||
return YOLO_Dataset(datayaml=yaml_file, anns=yolo_anns, imgs=variable.imgs, path=None) | ||
|
||
def is_loses_information(self) -> bool: | ||
return False | ||
|
||
|
||
__all__ = [ | ||
'YOLO2COCO_Adapter', | ||
'COCO2YOLO_Adapter', | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
from examples.cv_dataset_converter.utils.variables import PathVariable, YOLO_Dataset, COCO_Dataset | ||
from nodeflow.node import Function | ||
from nodeflow import Node | ||
|
||
import shutil | ||
import yaml | ||
import json | ||
|
||
|
||
class YOLO_Reader(Function): | ||
def compute(self, path_to_dataset: PathVariable) -> YOLO_Dataset: | ||
# validate path | ||
assert isinstance(path_to_dataset, PathVariable) | ||
|
||
# load .yaml file | ||
# <deprecated> | ||
# files = list(path_to_dataset.value.glob("*yaml")) | ||
# # print(path_to_dataset.value) | ||
# assert len(files) == 1, "Could not find .yaml file" | ||
# datayaml_path = files[0] | ||
|
||
datayaml_path = path_to_dataset / 'data.yaml' | ||
assert datayaml_path.exists(), "Could not find .yaml file" | ||
|
||
with open(datayaml_path, "r") as datayaml_file: | ||
datayaml = yaml.load(datayaml_file, Loader=yaml.SafeLoader) | ||
|
||
annotations, images = {}, {} | ||
for split in ["train", "test", "valid"]: | ||
annotations[split], images[split] = {}, {} | ||
|
||
for label_file_path in (path_to_dataset / split / "labels").glob("*.txt"): | ||
with open(label_file_path, "r") as label_file: | ||
annotations[split][label_file_path.name] = label_file.readlines() | ||
|
||
images_directory = path_to_dataset / split / "images" | ||
for image_file_path in images_directory.iterdir(): | ||
if image_file_path.suffix in ['.jpg', '.jpeg', '.png']: | ||
images[split][image_file_path.stem] = images_directory / image_file_path | ||
|
||
return YOLO_Dataset( | ||
path = path_to_dataset, | ||
datayaml = datayaml, | ||
anns = annotations, | ||
imgs = images, | ||
) | ||
|
||
class COCO_Reader(Function): | ||
def compute(self, path_to_dataset: PathVariable) -> COCO_Dataset: | ||
annotations, images = {}, {} | ||
for split in ["train", "test", "valid"]: | ||
annotations[split], images[split] = {}, {} | ||
|
||
shrinkage_superclass_mapping = {} | ||
all_categories_mapping = {} | ||
assert (path_to_dataset / split / "_annotations.json").exists() | ||
with open(path_to_dataset / split / "_annotations.json", "r") as f: | ||
data = json.load(f) | ||
for category in data["categories"]: | ||
all_categories_mapping[category["id"]] = category["name"] | ||
shrinkage_superclass_mapping[category["name"]] = shrinkage_superclass_mapping.get(category["name"], []) + [category["id"]] | ||
|
||
for category in data["annotations"]: | ||
category['category_id'] = min(shrinkage_superclass_mapping[all_categories_mapping[category['category_id']]]) | ||
|
||
data['categories'] = [ | ||
{ | ||
'id' : min(shrinkage_superclass_mapping[category]), | ||
'name' : category, | ||
'supercategory' : 'none' | ||
} | ||
for category in shrinkage_superclass_mapping | ||
] | ||
annotations[split] = data | ||
|
||
images_directory = path_to_dataset / split | ||
for image_file_path in images_directory.iterdir(): | ||
if image_file_path.suffix in ['.jpg', '.jpeg', '.png']: | ||
images[split][image_file_path.stem] = image_file_path / image_file_path | ||
|
||
return COCO_Dataset(path=path_to_dataset, anns=annotations, imgs=images) | ||
|
||
class YOLO_Writer(Function): | ||
def compute(self, yolo_dataset: YOLO_Dataset, target_path: PathVariable) -> Node: | ||
root = target_path.value | ||
|
||
# create directories | ||
for split in ["train", "test", "valid"]: | ||
(root / split / "labels").mkdir(parents=True, exist_ok=True) | ||
(root / split / "images").mkdir(parents=True, exist_ok=True) | ||
|
||
for image_path in yolo_dataset.imgs[split].values(): | ||
shutil.copy( | ||
src = image_path, | ||
dst = root / split / 'images' / image_path.name | ||
) | ||
|
||
with open(root / split / 'labels' / f"{image_path.stem}.txt", 'w') as label_file: | ||
label_file.write(yolo_dataset.anns[split][image_path.name]) | ||
|
||
with open(root / 'data.yaml', "w") as yaml_file: | ||
datayaml = { | ||
'train': yolo_dataset.datayaml['train'], | ||
'val' : yolo_dataset.datayaml['val'], | ||
'test' : yolo_dataset.datayaml['test'], | ||
'nc' : yolo_dataset.datayaml['nc'], | ||
'names': yolo_dataset.datayaml['names'], | ||
} | ||
yaml.dump(datayaml, yaml_file) | ||
|
||
|
||
class COCO_Writer(Function): | ||
def compute(self, coco_dataset: COCO_Dataset, target_path: PathVariable) -> Node: | ||
root = target_path.value | ||
|
||
for split in ["train", "test", "valid"]: | ||
(root / split).mkdir(parents=True, exist_ok=True) | ||
|
||
for image_path in coco_dataset.imgs[split].values(): | ||
shutil.copy( | ||
src = image_path, | ||
dst = root / split / image_path.name | ||
) | ||
|
||
with open(root / split / '_annotations.json', 'w') as json_file: | ||
json.dump(coco_dataset.anns[split], json_file, indent=4) | ||
|
||
|
||
__all__ = [ | ||
"YOLO_Reader", | ||
"COCO_Reader", | ||
"YOLO_Writer", | ||
"COCO_Writer", | ||
] | ||
|
Oops, something went wrong.