From 38433bab75d2f24750d2860b883b1b5ae687d911 Mon Sep 17 00:00:00 2001 From: lllrj <1018652162@qq.com> Date: Fri, 10 Mar 2023 18:15:22 +0800 Subject: [PATCH] labelme2coco --- tools/misc/labelme2coco.py | 322 +++++++++++++++++++++++++++++++++++++ 1 file changed, 322 insertions(+) create mode 100644 tools/misc/labelme2coco.py diff --git a/tools/misc/labelme2coco.py b/tools/misc/labelme2coco.py new file mode 100644 index 0000000000..ccb4d5145b --- /dev/null +++ b/tools/misc/labelme2coco.py @@ -0,0 +1,322 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import glob +import json +import os +import shutil + +import numpy as np +from sklearn.model_selection import train_test_split +from tqdm import tqdm + +# import sys + + +class Labelme2coco_keypoints(): + + def __init__(self, args): + """Lableme 关键点数据集转 COCO 数据集的构造函数: + + Args + args:命令行输入的参数 + - class_name 根类名字 + """ + + self.classname_to_id = {args.class_name: 1} + self.images = [] + self.annotations = [] + self.categories = [] + self.ann_id = 0 + self.img_id = 0 + + def save_coco_json(self, instance, save_path): + json.dump( + instance, + open(save_path, 'w', encoding='utf-8'), + ensure_ascii=False, + indent=1) + + def read_jsonfile(self, path): + with open(path, 'r', encoding='utf-8') as f: + return json.load(f) + + def _get_box(self, points): + min_x = min_y = np.inf + max_x = max_y = 0 + for x, y in points: + min_x = min(min_x, x) + min_y = min(min_y, y) + max_x = max(max_x, x) + max_y = max(max_y, y) + return [min_x, min_y, max_x - min_x, max_y - min_y] + + def _get_keypoints(self, points, keypoints, num_keypoints, label): + """解析 labelme 的原始数据, 生成 coco 标注的 关键点对象. + + 例如: + "keypoints": [ + 67.06149888292556, # x 的值 + 122.5043507571318, # y 的值 + 1, # 相当于 Z 值,2D关键点 v = 0表示不可见, + v = 1表示标记但不可见,v = 2表示标记且可见 + 82.42582269256718, + 109.95672933232304, + 1, + ..., + ], + """ + labels = ['wrist', 'thumb1', 'thumb2', ...] + flag = label.split('_')[-1] + x = label.split('_')[0] + visible = 0 + if flag == 'occluded': + visible = 1 + else: + visible = 2 + x = labels.index(x) + keypoints[x * 3] = points[0] + keypoints[x * 3 + 1] = points[1] + keypoints[x * 3 + 2] = visible + num_keypoints += 1 + + return num_keypoints + + def _image(self, obj, path): + """解析 labelme 的 obj 对象,生成 coco 的 image 对象. + + 生成包括:id,file_name,height,width 4个属性 + + 示例: + { + "file_name": "training/rgb/00031426.jpg", + "height": 224, + "width": 224, + "id": 31426 + } + """ + + image = {} + + # 此处通过imageData获得数据 + # 获得原始 labelme 标签的 imageData 属性,并通过 labelme 的工具方法转成 array + # img_x = utils.img_b64_to_arr(obj['imageData']) + # image['height'], image['width'] = img_x.shape[:-1] # 获得图片的宽高 + + # 此处直接通过imageHeight,imageWidth得到,避免labelme中的imageData问题 + image['height'], image['width'] = obj['imageHeight'], obj[ + 'imageWidth'] # 获得图片的宽高 + # self.img_id = int(os.path.basename(path).split(".json")[0]) + self.img_id = self.img_id + 1 + image['id'] = self.img_id + + image['file_name'] = os.path.basename(path).replace('.json', '.jpg') + + return image + + def _annotation(self, bboxes_list, keypoints_list, json_path): + """生成coco标注. + + Args: bboxes_list: 矩形标注框 keypoints_list: 关键点 json_path:json文件路径 + """ + # 核对一个bbox里有n个keypoints; 然而本人不要求每个bbox里都要有n个点 + # if len(keypoints_list) != args.join_num * len(bboxes_list): + # print( + # 'you loss {} keypoint(s) with file {}'\ + # .format(args.join_num * len(bboxes_list) -\ + # len(keypoints_list), json_path) + # ) + # print('Please check !!!') + # sys.exit() + + i = 0 + # 对每个bbox分别保存keypoints + for object in bboxes_list: + annotation = {} + keypoints = [0 for i in range(36) + ] # 每个keypoint数组初始化为[0,..] len = 36 对应12个点(x,y,v) + num_keypoints = 0 + + label = object['label'] + bbox = object['points'] + annotation['id'] = self.ann_id + annotation['image_id'] = self.img_id + annotation['category_id'] = int(self.classname_to_id[label]) + annotation['iscrowd'] = 0 + annotation['area'] = 1.0 + annotation['segmentation'] = [np.asarray(bbox).flatten().tolist() + ] # 两个坐标点 + annotation['bbox'] = self._get_box(bbox) # 矩形框左上角的坐标和矩形框的长宽 + + # 生成keypoint的list + for keypoint in keypoints_list: + point = keypoint['points'] + label = keypoint['label'] # 点的名字 + num_keypoints = self._get_keypoints(point[0], keypoints, + num_keypoints, label) + annotation['keypoints'] = keypoints + annotation['num_keypoints'] = num_keypoints + + i += 1 + self.ann_id += 1 + self.annotations.append(annotation) + + def _init_categories(self): + """初始化 COCO 的 标注类别. + + 例如: + "categories": [ + { + "supercategory": "hand", + "id": 1, + "name": "hand", + "keypoints": [ + "wrist", + "thumb1", + "thumb2", + ..., + ], + "skeleton": [ + ] + } + ] + """ + + for name, id in self.classname_to_id.items(): + category = {} + + category['supercategory'] = name + category['id'] = id + category['name'] = name + # n个关键点数据 + category['keypoint'] = [ + 'wrist', + 'thumb1', + 'thumb2', + ..., + ] + # category['keypoint'] = [str(i + 1) for i in range(args.join_num)] + + self.categories.append(category) + + def to_coco(self, json_path_list): + """Labelme 原始标签转换成 coco 数据集格式,生成的包括标签和图像. + + Args: json_path_list:原始数据集的目录 + """ + + self._init_categories() + # 整个文件夹里的json进行逐个处理 + for json_path in tqdm(json_path_list): + obj = self.read_jsonfile(json_path) # 解析一个标注文件 + self.images.append(self._image(obj, json_path)) # 解析图片 + shapes = obj['shapes'] # 读取 labelme shape 标注 + + bboxes_list, keypoints_list = [], [] + for shape in shapes: + if shape['shape_type'] == 'rectangle': # bboxs + bboxes_list.append(shape) + elif shape['shape_type'] == 'point': # keypoints + keypoints_list.append(shape) + # 输入为一个文件的keypoints和bbox,即一张图里的信息 + self._annotation(bboxes_list, keypoints_list, json_path) + + keypoints = {} + keypoints['info'] = { + 'description': 'Air Dataset', + 'version': 1.0, + 'year': 2022 + } + keypoints['license'] = ['BUAA'] + keypoints['images'] = self.images + keypoints['annotations'] = self.annotations + keypoints['categories'] = self.categories + return keypoints + + +def init_dir(base_path): + """初始化COCO数据集的文件夹结构; + + coco - annotations #标注文件路径 + - train #训练数据集 + - val #验证数据集 + Args: + base_path:数据集放置的根路径 + """ + if not os.path.exists(os.path.join(base_path, 'coco', 'annotations')): + os.makedirs(os.path.join(base_path, 'coco', 'annotations')) + if not os.path.exists(os.path.join(base_path, 'coco', 'train')): + os.makedirs(os.path.join(base_path, 'coco', 'train')) + if not os.path.exists(os.path.join(base_path, 'coco', 'val')): + os.makedirs(os.path.join(base_path, 'coco', 'val')) + + +def convert(path, target): + parser = argparse.ArgumentParser() + parser.add_argument( + '--class_name', '--n', help='class name', type=str, default='airplane') + parser.add_argument( + '--input', + '--i', + help='json file path (labelme)', + type=str, + default=path) + parser.add_argument( + '--output', + '--o', + help='output file path (coco format)', + type=str, + default=path) + parser.add_argument( + '--join_num', '--j', help='number of join', type=int, default=12) + parser.add_argument( + '--ratio', + '--r', + help='train and test split ratio', + type=float, + default=0.25) + args = parser.parse_args() + + labelme_path = args.input + saved_coco_path = args.output + + init_dir(saved_coco_path) # 初始化COCO数据集的文件夹结构 + + json_list_path = glob.glob(labelme_path + '/*.json') + train_path, val_path = train_test_split( + json_list_path, test_size=args.ratio) + print('{} for training'.format(len(train_path)), + '\n{} for testing'.format(len(val_path))) + print('Start transform please wait ...') + + l2c_train = Labelme2coco_keypoints(args) # 构造数据集生成类 + + # 生成训练集 + train_keypoints = l2c_train.to_coco(train_path) + l2c_train.save_coco_json( + train_keypoints, + os.path.join(saved_coco_path, 'coco', 'annotations', + 'keypoints_train.json')) + + # 生成验证集 + l2c_val = Labelme2coco_keypoints(args) + val_instance = l2c_val.to_coco(val_path) + l2c_val.save_coco_json( + val_instance, + os.path.join(saved_coco_path, 'coco', 'annotations', + 'keypoints_val.json')) + + # 拷贝 labelme 的原始图片到训练集和验证集里面 + for file in train_path: + shutil.copy( + file.replace('json', 'jpg'), + os.path.join(saved_coco_path, 'coco', 'train')) + for file in val_path: + shutil.copy( + file.replace('json', 'jpg'), + os.path.join(saved_coco_path, 'coco', 'val')) + + +if __name__ == '__main__': + source = 'your labelme path' + target = 'your coco path' + convert(source, target)