From e0c55d61effb105e6bf51bb94e6110a4c07db9c4 Mon Sep 17 00:00:00 2001 From: Kanghao Chen Date: Mon, 10 Oct 2022 00:17:41 +0800 Subject: [PATCH] [Feature] Add 4d association method (#16) * add 4dag method * rename fourd to bottom up, add metric and alignment, change name manner * rename some files and variables * fix a bug * delete Camera class in association * change configs * modify triangulation pipeline * convert data format form pkl to json * align triangulate method, align kps convention * delete fourdag triangulate, add fourdag19 convention pipeline * change some names * resolve conflict * resolve some comments * run pre-commit * align result * refactor code * refactor triangulator and optimization * resolve comments * pre-commit * change limb info * fix bug * add stringdoc * rename joint to kps * add yapf * pre-commit * add docstring * refactor * add limb info json file * fix bug * delete debug call * add readme * rephase term class * rephase associate * add cloud file and path * process fourdag seq5 * resolve comments --- configs/fourdag/README.md | 106 +++ .../fourdag/campus_config/eval_keypoints3d.py | 90 +++ .../fourdag_config/eval_keypoints3d_seq2.py | 91 +++ .../fourdag_config/eval_keypoints3d_seq4.py | 91 +++ .../fourdag_config/eval_keypoints3d_seq5.py | 91 +++ .../fourdag/shelf_config/eval_keypoints3d.py | 90 +++ .../bottom_up_eval_shelf_unittest.py | 90 +++ .../data/dataset/shelf_unittest_bottom_up.py | 15 + .../fourdag_associator.py | 41 + .../ops/triangulation/jacobi_triangulator.py | 1 + docs/en/benchmark.md | 4 + docs/en/dataset_preparation.md | 8 +- scripts/download_weight.sh | 1 + .../evaluation/test_fourdag_evaluation.py | 35 + tests/ops/test_bottom_up_association.py | 29 + tests/ops/test_triangulation.py | 50 ++ tests/test_data/test_dataset.py | 38 + xrmocap/core/evaluation/align_keypoints3d.py | 106 +++ .../bottom_up_association_evaluation.py | 201 +++++ xrmocap/core/evaluation/builder.py | 3 + xrmocap/core/evaluation/metrics.py | 198 +++++ .../visualization/visualize_keypoints2d.py | 4 +- .../mview_mperson_data_visualization.py | 71 +- .../bottom_up_mview_mperson_dataset.py | 199 +++++ xrmocap/data/dataset/builder.py | 3 + xrmocap/data/dataset/mview_mperson_dataset.py | 2 + xrmocap/ops/bottom_up_association/__init__.py | 0 xrmocap/ops/bottom_up_association/builder.py | 16 + .../fourdag_associator.py | 242 ++++++ .../graph_solver/builder.py | 14 + .../graph_solver/graph_associate.py | 741 ++++++++++++++++++ .../graph_solver/graph_construct.py | 282 +++++++ xrmocap/ops/triangulation/builder.py | 4 + .../ops/triangulation/jacobi_triangulator.py | 155 ++++ .../keypoints_convention/__init__.py | 91 ++- .../keypoints_convention/fourdag_19.py | 21 + .../convention/keypoints_convention/paf.py | 10 + .../transform/keypoints3d/optim/builder.py | 6 + .../optim/fourdag_base_optimizer.py | 73 ++ .../keypoints3d/optim/fourdag_optimization.py | 653 +++++++++++++++ xrmocap/utils/fourdag_utils.py | 124 +++ xrmocap/utils/mvpose_utils.py | 23 +- 42 files changed, 4101 insertions(+), 12 deletions(-) create mode 100644 configs/fourdag/README.md create mode 100644 configs/fourdag/campus_config/eval_keypoints3d.py create mode 100644 configs/fourdag/fourdag_config/eval_keypoints3d_seq2.py create mode 100644 configs/fourdag/fourdag_config/eval_keypoints3d_seq4.py create mode 100644 configs/fourdag/fourdag_config/eval_keypoints3d_seq5.py create mode 100644 configs/fourdag/shelf_config/eval_keypoints3d.py create mode 100644 configs/modules/core/evaluation/bottom_up_eval_shelf_unittest.py create mode 100644 configs/modules/data/dataset/shelf_unittest_bottom_up.py create mode 100644 configs/modules/ops/bottom_up_association/fourdag_associator.py create mode 100644 configs/modules/ops/triangulation/jacobi_triangulator.py create mode 100644 tests/core/evaluation/test_fourdag_evaluation.py create mode 100644 tests/ops/test_bottom_up_association.py create mode 100644 xrmocap/core/evaluation/align_keypoints3d.py create mode 100644 xrmocap/core/evaluation/bottom_up_association_evaluation.py create mode 100644 xrmocap/core/evaluation/metrics.py create mode 100644 xrmocap/data/dataset/bottom_up_mview_mperson_dataset.py create mode 100644 xrmocap/ops/bottom_up_association/__init__.py create mode 100644 xrmocap/ops/bottom_up_association/builder.py create mode 100644 xrmocap/ops/bottom_up_association/fourdag_associator.py create mode 100644 xrmocap/ops/bottom_up_association/graph_solver/builder.py create mode 100644 xrmocap/ops/bottom_up_association/graph_solver/graph_associate.py create mode 100644 xrmocap/ops/bottom_up_association/graph_solver/graph_construct.py create mode 100644 xrmocap/ops/triangulation/jacobi_triangulator.py create mode 100644 xrmocap/transform/convention/keypoints_convention/fourdag_19.py create mode 100644 xrmocap/transform/convention/keypoints_convention/paf.py create mode 100644 xrmocap/transform/keypoints3d/optim/fourdag_base_optimizer.py create mode 100644 xrmocap/transform/keypoints3d/optim/fourdag_optimization.py create mode 100644 xrmocap/utils/fourdag_utils.py diff --git a/configs/fourdag/README.md b/configs/fourdag/README.md new file mode 100644 index 00000000..6d30d05f --- /dev/null +++ b/configs/fourdag/README.md @@ -0,0 +1,106 @@ +# 4D Association Graph for Realtime Multi-person Motion Capture Using Multiple Video Cameras +Note: As a python variable name cannot start with a number, we refer to this method as `FourDAG` in the following text and code. + + - [Introduction](#introduction) + - [Prepare limb information and datasets](#prepare-limb-information-and-datasets) + - [Results](#results) + - [Campus](#campus) + - [Shelf](#shelf) + - [FourDAG](#fourdag-1) + +## Introduction + +We provide the config files for FourDAG: [4D Association Graph for Realtime Multi-person Motion Capture Using Multiple Video Cameras](https://arxiv.org/abs/2002.12625). + + +[Official Implementation](https://github.com/zhangyux15/4d_association) + +```BibTeX +@inproceedings{Zhang20204DAG, + title={4D Association Graph for Realtime Multi-Person Motion Capture Using Multiple Video Cameras}, + author={Yuxiang Zhang and Liang An and Tao Yu and Xiu Li and Kun Li and Yebin Liu}, + journal={IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + year={2020}, + pages={1321-1330} +} +``` +## Prepare limb information and datasets + +- **Prepare limb information**: + +``` +sh scripts/download_weight.sh +``` +You could find perception models in `weight` file. + +- **Prepare the datasets**: + +You could download Shelf, Campus or FourDAG datasets, and convert original dataset to our unified meta-data. Considering that it takes long to run a converter, we have done it for you. Please download compressed zip file for converted meta-data from [here](../../docs/en/dataset_preparation.md), and place meta-data under `ROOT/xrmocap_data/DATASET`. + +The final file structure would be like: + +```text +xrmocap +├── xrmocap +├── docs +├── tools +├── configs +├── weight +| └── limb_info.json +└── xrmocap_data + ├── CampusSeq1 + ├── Shelf + | ├── Camera0 + | ├── ... + | ├── Camera4 + | └── xrmocap_meta_testset + └── FourDAG + ├── seq2 + ├── seq4 + ├── seq5 + ├── xrmocap_meta_seq2 + ├── xrmocap_meta_seq4 + └── xrmocap_meta_seq5 +``` +You can download just one dataset of Shelf, Campus and FourDAG. + +## Results + +We evaluate FourDAG on 3 benchmarks, report the Percentage of Correct Parts (PCP) on Shelf/Campus/FourDAG datasets. + +You can find the recommended configs in `configs/foudage/*/eval_keypoints3d.py`. + + +### Campus + +The 2D keypoints and pafs data we use is generated by openpose, and you can download it from [here](/docs/en/dataset_preparation.md#download-converted-meta-data). + +| Config | Actor 0 | Actor 1 | Actor 2 | Average | Download | +|:------:|:-------:|:--------:|:--------:|:--------:|:--------:| +| [eval_keypoints3d.py](./campus_config/eval_keypoints3d.py) | 64.26 | 90.64 | 86.27 | 80.39 | [log](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/logs/FourDAG/campus.zip) | + + +### Shelf + +The 2D keypoints and pafs data we use is generated by fasterrcnn, and you can download it from [here](/docs/en/dataset_preparation.md#download-converted-meta-data). + +| Config | Actor 0 | Actor 1 | Actor 2 | Average | Download | +|:------:|:-------:|:--------:|:--------:|:--------:|:--------:| +| [eval_keypoints3d.py](./shelf_config/eval_keypoints3d.py) | 99.61 | 96.76 | 98.20 | 98.19 | [log](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/logs/FourDAG/shelf.zip) | + + +### FourDAG + +The 2D keypoints and pafs data we use is generated by mmpose, and you can download it from [here](/docs/en/dataset_preparation.md#download-converted-meta-data). + +- **seq2** + +| Config | Actor 0 | Actor 1 | Average | PCK@200mm | Download | +|:-------:|:--------:|:--------:|:--------:|:--------:|:--------:| +| [eval_keypoints3d.py](./fourdag_config/eval_keypoints3d_seq2.py) | 92.18 | 87.35 | 89.77 | 83.10 | [log](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/logs/FourDAG/fourdag.zip) | + +- **seq4** + +| Config | Actor 0 | Actor 1 | Actor 1 | Average | PCK@200mm | Download | +|:-------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:| +| [eval_keypoints3d.py](./fourdag_config/eval_keypoints3d_seq4.py) | 91.85 | 86.48 | 92.92 | 90.42 | 81.29 |[log](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/logs/FourDAG/fourdag.zip) | diff --git a/configs/fourdag/campus_config/eval_keypoints3d.py b/configs/fourdag/campus_config/eval_keypoints3d.py new file mode 100644 index 00000000..93d63ab9 --- /dev/null +++ b/configs/fourdag/campus_config/eval_keypoints3d.py @@ -0,0 +1,90 @@ +type = 'BottomUpAssociationEvaluation' + +__data_root__ = './xrmocap_data/CampusSeq1' +__meta_path__ = __data_root__ + '/xrmocap_meta_testset' + +logger = None +output_dir = './output/fourdag/CampusSeq1_fourdag_19_FourDAGOptimization/' +pred_kps3d_convention = 'fourdag_19' +eval_kps3d_convention = 'campus' +selected_limbs_name = [ + 'left_lower_leg', 'right_lower_leg', 'left_upperarm', 'right_upperarm', + 'left_forearm', 'right_forearm', 'left_thigh', 'right_thigh' +] +additional_limbs_names = [['jaw', 'headtop']] + +associator = dict( + type='FourDAGAssociator', + kps_convention=pred_kps3d_convention, + min_asgn_cnt=10, + use_tracking_edges=True, + keypoints3d_optimizer=dict( + type='FourDAGOptimizer', + triangulator=dict(type='JacobiTriangulator', ), + active_rate=0.5, + min_track_cnt=20, + bone_capacity=30, + w_bone3d=1.0, + w_square_shape=1e-3, + shape_max_iter=5, + w_kps3d=1.0, + w_regular_pose=1e-4, + pose_max_iter=20, + w_kps2d=1e-5, + w_temporal_trans=1e-1 / pow(512 / 2048, 2), + w_temporal_pose=1e-1 / pow(512 / 2048, 2), + min_triangulate_cnt=15, + init_active=0.9, + triangulate_thresh=0.05, + logger=logger, + ), + graph_construct=dict( + type='GraphConstruct', + kps_convention=pred_kps3d_convention, + max_epi_dist=0.15, + max_temp_dist=0.2, + normalize_edges=True, + logger=logger, + ), + graph_associate=dict( + type='GraphAssociate', + kps_convention=pred_kps3d_convention, + w_epi=2, + w_temp=2, + w_view=2, + w_paf=4, + w_hier=0.5, + c_view_cnt=1.5, + min_check_cnt=1, + logger=logger, + ), + logger=logger, +) + +dataset = dict( + type='BottomUpMviewMpersonDataset', + data_root=__data_root__, + img_pipeline=[ + dict(type='LoadImagePIL'), + dict(type='ToTensor'), + ], + meta_path=__meta_path__, + test_mode=True, + shuffled=False, + kps2d_convention=pred_kps3d_convention, + gt_kps3d_convention='campus', + cam_world2cam=True, +) + +dataset_visualization = dict( + type='MviewMpersonDataVisualization', + data_root=__data_root__, + output_dir=output_dir, + meta_path=__meta_path__, + pred_kps3d_paths=None, + vis_percep2d=False, + kps2d_convention=pred_kps3d_convention, + vis_gt_kps3d=False, + vis_bottom_up=True, + gt_kps3d_convention=None, +) diff --git a/configs/fourdag/fourdag_config/eval_keypoints3d_seq2.py b/configs/fourdag/fourdag_config/eval_keypoints3d_seq2.py new file mode 100644 index 00000000..4784adcb --- /dev/null +++ b/configs/fourdag/fourdag_config/eval_keypoints3d_seq2.py @@ -0,0 +1,91 @@ +type = 'BottomUpAssociationEvaluation' + +__data_root__ = './xrmocap_data/FourDAG/' +__meta_path__ = __data_root__ + '/xrmocap_meta_seq2' + +logger = None +output_dir = './output/fourdag/fourdag_fourdag_19_FourDAGOptimization/' +pred_kps3d_convention = 'fourdag_19' +eval_kps3d_convention = 'campus' +selected_limbs_name = [ + 'left_lower_leg', 'right_lower_leg', 'left_upperarm', 'right_upperarm', + 'left_forearm', 'right_forearm', 'left_thigh', 'right_thigh' +] +# additional_limbs_names = [['jaw', 'headtop']] + +associator = dict( + type='FourDAGAssociator', + kps_convention=pred_kps3d_convention, + min_asgn_cnt=5, + use_tracking_edges=True, + keypoints3d_optimizer=dict( + type='FourDAGOptimizer', + triangulator=dict(type='JacobiTriangulator', ), + active_rate=0.5, + min_track_cnt=20, + bone_capacity=30, + w_bone3d=1.0, + w_square_shape=1e-3, + shape_max_iter=5, + w_kps3d=1.0, + w_regular_pose=1e-4, + pose_max_iter=20, + w_kps2d=1e-5, + w_temporal_trans=1e-1 / pow(512 / 2048, 2), + w_temporal_pose=1e-1 / pow(512 / 2048, 2), + min_triangulate_cnt=15, + init_active=0.9, + triangulate_thresh=0.05, + logger=logger, + ), + graph_construct=dict( + type='GraphConstruct', + kps_convention=pred_kps3d_convention, + max_epi_dist=0.15, + max_temp_dist=0.3, + normalize_edges=True, + logger=logger, + ), + graph_associate=dict( + type='GraphAssociate', + kps_convention=pred_kps3d_convention, + w_epi=1, + w_temp=2, + w_view=1, + w_paf=2, + w_hier=1, + c_view_cnt=1, + min_check_cnt=10, + logger=logger, + ), + logger=logger, +) + +dataset = dict( + type='BottomUpMviewMpersonDataset', + data_root=__data_root__, + img_pipeline=[ + dict(type='LoadImagePIL'), + dict(type='ToTensor'), + ], + meta_path=__meta_path__, + test_mode=True, + shuffled=False, + kps2d_convention=pred_kps3d_convention, + gt_kps3d_convention='campus', + cam_world2cam=True, +) + +dataset_visualization = dict( + type='MviewMpersonDataVisualization', + data_root=__data_root__, + output_dir=output_dir, + meta_path=__meta_path__, + pred_kps3d_paths=None, + vis_percep2d=False, + kps2d_convention=pred_kps3d_convention, + vis_gt_kps3d=False, + vis_bottom_up=True, + gt_kps3d_convention=None, + resolution=(368, 368), +) diff --git a/configs/fourdag/fourdag_config/eval_keypoints3d_seq4.py b/configs/fourdag/fourdag_config/eval_keypoints3d_seq4.py new file mode 100644 index 00000000..c294db9f --- /dev/null +++ b/configs/fourdag/fourdag_config/eval_keypoints3d_seq4.py @@ -0,0 +1,91 @@ +type = 'BottomUpAssociationEvaluation' + +__data_root__ = './xrmocap_data/FourDAG' +__meta_path__ = __data_root__ + '/xrmocap_meta_seq4' + +logger = None +output_dir = './output/fourdag/fourdag_fourdag_19_FourDAGOptimization/' +pred_kps3d_convention = 'fourdag_19' +eval_kps3d_convention = 'campus' +selected_limbs_name = [ + 'left_lower_leg', 'right_lower_leg', 'left_upperarm', 'right_upperarm', + 'left_forearm', 'right_forearm', 'left_thigh', 'right_thigh' +] +# additional_limbs_names = [['jaw', 'headtop']] + +associator = dict( + type='FourDAGAssociator', + kps_convention=pred_kps3d_convention, + min_asgn_cnt=5, + use_tracking_edges=True, + keypoints3d_optimizer=dict( + type='FourDAGOptimizer', + triangulator=dict(type='JacobiTriangulator', ), + active_rate=0.5, + min_track_cnt=20, + bone_capacity=30, + w_bone3d=1.0, + w_square_shape=1e-3, + shape_max_iter=5, + w_kps3d=1.0, + w_regular_pose=1e-4, + pose_max_iter=20, + w_kps2d=1e-5, + w_temporal_trans=1e-1 / pow(512 / 2048, 2), + w_temporal_pose=1e-1 / pow(512 / 2048, 2), + min_triangulate_cnt=15, + init_active=0.9, + triangulate_thresh=0.05, + logger=logger, + ), + graph_construct=dict( + type='GraphConstruct', + kps_convention=pred_kps3d_convention, + max_epi_dist=0.15, + max_temp_dist=0.3, + normalize_edges=True, + logger=logger, + ), + graph_associate=dict( + type='GraphAssociate', + kps_convention=pred_kps3d_convention, + w_epi=1, + w_temp=2, + w_view=1, + w_paf=2, + w_hier=1, + c_view_cnt=1, + min_check_cnt=10, + logger=logger, + ), + logger=logger, +) + +dataset = dict( + type='BottomUpMviewMpersonDataset', + data_root=__data_root__, + img_pipeline=[ + dict(type='LoadImagePIL'), + dict(type='ToTensor'), + ], + meta_path=__meta_path__, + test_mode=True, + shuffled=False, + kps2d_convention=pred_kps3d_convention, + gt_kps3d_convention='campus', + cam_world2cam=True, +) + +dataset_visualization = dict( + type='MviewMpersonDataVisualization', + data_root=__data_root__, + output_dir=output_dir, + meta_path=__meta_path__, + pred_kps3d_paths=None, + vis_percep2d=False, + kps2d_convention=pred_kps3d_convention, + vis_gt_kps3d=False, + vis_bottom_up=True, + gt_kps3d_convention=None, + resolution=(368, 368), +) diff --git a/configs/fourdag/fourdag_config/eval_keypoints3d_seq5.py b/configs/fourdag/fourdag_config/eval_keypoints3d_seq5.py new file mode 100644 index 00000000..02c00c45 --- /dev/null +++ b/configs/fourdag/fourdag_config/eval_keypoints3d_seq5.py @@ -0,0 +1,91 @@ +type = 'BottomUpAssociationEvaluation' + +__data_root__ = './xrmocap_data/FourDAG' +__meta_path__ = __data_root__ + '/xrmocap_meta_seq5' + +logger = None +output_dir = './output/fourdag/fourdag_fourdag_19_FourDAGOptimization/' +pred_kps3d_convention = 'fourdag_19' +eval_kps3d_convention = 'campus' +selected_limbs_name = [ + 'left_lower_leg', 'right_lower_leg', 'left_upperarm', 'right_upperarm', + 'left_forearm', 'right_forearm', 'left_thigh', 'right_thigh' +] +# additional_limbs_names = [['jaw', 'headtop']] + +associator = dict( + type='FourDAGAssociator', + kps_convention=pred_kps3d_convention, + min_asgn_cnt=5, + use_tracking_edges=True, + keypoints3d_optimizer=dict( + type='FourDAGOptimizer', + triangulator=dict(type='JacobiTriangulator', ), + active_rate=0.5, + min_track_cnt=20, + bone_capacity=30, + w_bone3d=1.0, + w_square_shape=1e-3, + shape_max_iter=5, + w_kps3d=1.0, + w_regular_pose=1e-4, + pose_max_iter=20, + w_kps2d=1e-5, + w_temporal_trans=1e-1 / pow(512 / 2048, 2), + w_temporal_pose=1e-1 / pow(512 / 2048, 2), + min_triangulate_cnt=15, + init_active=0.9, + triangulate_thresh=0.05, + logger=logger, + ), + graph_construct=dict( + type='GraphConstruct', + kps_convention=pred_kps3d_convention, + max_epi_dist=0.15, + max_temp_dist=0.3, + normalize_edges=True, + logger=logger, + ), + graph_associate=dict( + type='GraphAssociate', + kps_convention=pred_kps3d_convention, + w_epi=1, + w_temp=2, + w_view=1, + w_paf=2, + w_hier=1, + c_view_cnt=1, + min_check_cnt=10, + logger=logger, + ), + logger=logger, +) + +dataset = dict( + type='BottomUpMviewMpersonDataset', + data_root=__data_root__, + img_pipeline=[ + dict(type='LoadImagePIL'), + dict(type='ToTensor'), + ], + meta_path=__meta_path__, + test_mode=True, + shuffled=False, + kps2d_convention=pred_kps3d_convention, + gt_kps3d_convention='campus', + cam_world2cam=True, +) + +dataset_visualization = dict( + type='MviewMpersonDataVisualization', + data_root=__data_root__, + output_dir=output_dir, + meta_path=__meta_path__, + pred_kps3d_paths=None, + vis_percep2d=False, + kps2d_convention=pred_kps3d_convention, + vis_gt_kps3d=False, + vis_bottom_up=True, + gt_kps3d_convention=None, + resolution=(368, 368), +) diff --git a/configs/fourdag/shelf_config/eval_keypoints3d.py b/configs/fourdag/shelf_config/eval_keypoints3d.py new file mode 100644 index 00000000..c4a4172c --- /dev/null +++ b/configs/fourdag/shelf_config/eval_keypoints3d.py @@ -0,0 +1,90 @@ +type = 'BottomUpAssociationEvaluation' + +__data_root__ = './xrmocap_data/Shelf' +__meta_path__ = __data_root__ + '/xrmocap_meta_testset' + +logger = None +output_dir = './output/fourdag/shelf_fourdag_19_FourDAGOptimization/' +pred_kps3d_convention = 'fourdag_19' +eval_kps3d_convention = 'campus' +selected_limbs_name = [ + 'left_lower_leg', 'right_lower_leg', 'left_upperarm', 'right_upperarm', + 'left_forearm', 'right_forearm', 'left_thigh', 'right_thigh' +] +additional_limbs_names = [['jaw', 'headtop']] + +associator = dict( + type='FourDAGAssociator', + kps_convention=pred_kps3d_convention, + min_asgn_cnt=5, + use_tracking_edges=True, + keypoints3d_optimizer=dict( + type='FourDAGOptimizer', + triangulator=dict(type='JacobiTriangulator', ), + active_rate=0.1, + min_track_cnt=5, + bone_capacity=100, + w_bone3d=1.0, + w_square_shape=1e-2, + shape_max_iter=5, + w_kps3d=1.0, + w_regular_pose=1e-3, + pose_max_iter=20, + w_kps2d=1e-5, + w_temporal_trans=1e-1, + w_temporal_pose=1e-2, + min_triangulate_cnt=15, + init_active=0.9, + triangulate_thresh=0.05, + logger=logger, + ), + graph_construct=dict( + type='GraphConstruct', + kps_convention=pred_kps3d_convention, + max_epi_dist=0.15, + max_temp_dist=0.2, + normalize_edges=True, + logger=logger, + ), + graph_associate=dict( + type='GraphAssociate', + kps_convention=pred_kps3d_convention, + w_epi=2, + w_temp=2, + w_view=2, + w_paf=1, + w_hier=0.5, + c_view_cnt=1.5, + min_check_cnt=1, + logger=logger, + ), + logger=logger, +) + +dataset = dict( + type='BottomUpMviewMpersonDataset', + data_root=__data_root__, + img_pipeline=[ + dict(type='LoadImagePIL'), + dict(type='ToTensor'), + ], + meta_path=__meta_path__, + test_mode=True, + shuffled=False, + kps2d_convention=pred_kps3d_convention, + gt_kps3d_convention='campus', + cam_world2cam=True, +) + +dataset_visualization = dict( + type='MviewMpersonDataVisualization', + data_root=__data_root__, + output_dir=output_dir, + meta_path=__meta_path__, + pred_kps3d_paths=None, + vis_percep2d=False, + kps2d_convention=pred_kps3d_convention, + vis_gt_kps3d=False, + vis_bottom_up=True, + gt_kps3d_convention=None, +) diff --git a/configs/modules/core/evaluation/bottom_up_eval_shelf_unittest.py b/configs/modules/core/evaluation/bottom_up_eval_shelf_unittest.py new file mode 100644 index 00000000..f63f89b7 --- /dev/null +++ b/configs/modules/core/evaluation/bottom_up_eval_shelf_unittest.py @@ -0,0 +1,90 @@ +type = 'BottomUpAssociationEvaluation' + +__data_root__ = 'tests/data/data/test_dataset/Shelf_unittest' +__meta_path__ = __data_root__ + '/xrmocap_meta_perception2d' + +logger = None +output_dir = 'tests/data/output/core/test_mvpose_evaluation' +pred_kps3d_convention = 'fourdag_19' +eval_kps3d_convention = 'campus' +selected_limbs_name = [ + 'left_lower_leg', 'right_lower_leg', 'left_upperarm', 'right_upperarm', + 'left_forearm', 'right_forearm', 'left_thigh', 'right_thigh' +] +additional_limbs_names = [['jaw', 'headtop']] + +associator = dict( + type='FourDAGAssociator', + kps_convention=pred_kps3d_convention, + min_asgn_cnt=10, + use_tracking_edges=True, + keypoints3d_optimizer=dict( + type='FourDAGOptimizer', + triangulator=dict(type='JacobiTriangulator', ), + active_rate=0.1, + min_track_cnt=5, + bone_capacity=2, + w_bone3d=1.0, + w_square_shape=1e-2, + shape_max_iter=5, + w_kps3d=1.0, + w_regular_pose=1e-3, + pose_max_iter=20, + w_kps2d=1e-5, + w_temporal_trans=1e-1, + w_temporal_pose=1e-2, + min_triangulate_cnt=15, + init_active=0.9, + triangulate_thresh=0.05, + logger=logger, + ), + graph_construct=dict( + type='GraphConstruct', + kps_convention=pred_kps3d_convention, + max_epi_dist=0.15, + max_temp_dist=0.2, + normalize_edges=True, + logger=logger, + ), + graph_associate=dict( + type='GraphAssociate', + kps_convention=pred_kps3d_convention, + w_epi=2, + w_temp=2, + w_view=2, + w_paf=1, + w_hier=0.5, + c_view_cnt=1.5, + min_check_cnt=1, + logger=logger, + ), + logger=logger, +) + +dataset = dict( + type='BottomUpMviewMpersonDataset', + data_root=__data_root__, + img_pipeline=[ + dict(type='LoadImagePIL'), + dict(type='ToTensor'), + ], + meta_path=__meta_path__, + test_mode=True, + shuffled=False, + kps2d_convention=pred_kps3d_convention, + gt_kps3d_convention='campus', + cam_world2cam=True, +) + +dataset_visualization = dict( + type='MviewMpersonDataVisualization', + data_root=__data_root__, + output_dir=output_dir, + meta_path=__meta_path__, + pred_kps3d_paths=None, + vis_percep2d=False, + kps2d_convention=pred_kps3d_convention, + vis_gt_kps3d=False, + vis_bottom_up=False, + gt_kps3d_convention=None, +) diff --git a/configs/modules/data/dataset/shelf_unittest_bottom_up.py b/configs/modules/data/dataset/shelf_unittest_bottom_up.py new file mode 100644 index 00000000..28cdbd57 --- /dev/null +++ b/configs/modules/data/dataset/shelf_unittest_bottom_up.py @@ -0,0 +1,15 @@ +type = 'BottomUpMviewMpersonDataset' +data_root = 'tests/data/data/test_dataset/Shelf_unittest' +img_pipeline = [ + dict(type='LoadImagePIL'), + dict(type='Resize', size=224), + dict(type='ToTensor'), + dict(type='BGR2RGB'), +] +meta_path = 'tests/data/data/test_dataset/Shelf_unittest/' +\ + 'xrmocap_meta_perception2d' +test_mode = True +shuffled = False +kps2d_convention = 'fourdag_19' +gt_kps3d_convention = 'campus' +cam_world2cam = True diff --git a/configs/modules/ops/bottom_up_association/fourdag_associator.py b/configs/modules/ops/bottom_up_association/fourdag_associator.py new file mode 100644 index 00000000..395c6886 --- /dev/null +++ b/configs/modules/ops/bottom_up_association/fourdag_associator.py @@ -0,0 +1,41 @@ +type = 'FourDAGAssociator' +kps_convention = 'fourdag_19' +min_asgn_cnt = 5 +use_tracking_edges = True +keypoints3d_optimizer = dict( + type='FourDAGOptimizer', + triangulator=dict(type='JacobiTriangulator', ), + active_rate=0.1, + min_track_cnt=5, + bone_capacity=100, + w_bone3d=1.0, + w_square_shape=1e-2, + shape_max_iter=5, + w_kps3d=1.0, + w_regular_pose=1e-3, + pose_max_iter=20, + w_kps2d=1e-5, + w_temporal_trans=1e-1, + w_temporal_pose=1e-2, + min_triangulate_cnt=15, + init_active=0.9, + triangulate_thresh=0.05, +) +graph_construct = dict( + type='GraphConstruct', + kps_convention='fourdag_19', + max_epi_dist=0.15, + max_temp_dist=0.2, + normalize_edges=True, +) +graph_associate = dict( + type='GraphAssociate', + kps_convention='fourdag_19', + w_epi=2, + w_temp=2, + w_view=2, + w_paf=1, + w_hier=0.5, + c_view_cnt=1.5, + min_check_cnt=1, +) diff --git a/configs/modules/ops/triangulation/jacobi_triangulator.py b/configs/modules/ops/triangulation/jacobi_triangulator.py new file mode 100644 index 00000000..3b3e5178 --- /dev/null +++ b/configs/modules/ops/triangulation/jacobi_triangulator.py @@ -0,0 +1 @@ +type = 'JacobiTriangulator' diff --git a/docs/en/benchmark.md b/docs/en/benchmark.md index 93ed3edd..0acb0926 100644 --- a/docs/en/benchmark.md +++ b/docs/en/benchmark.md @@ -21,3 +21,7 @@ Please refer to [Shape-aware 3D Pose Optimization](../../configs/shape_aware_3d_ ### MvP Please refer to [MvP benchmarks](../../configs/mvp/README.md) for details. + +### 4D Association Graph + +Please refer to [FourDAG benchmarks](../../configs/fourdag/README.md) for details. diff --git a/docs/en/dataset_preparation.md b/docs/en/dataset_preparation.md index f8b7d4f4..e89ba841 100644 --- a/docs/en/dataset_preparation.md +++ b/docs/en/dataset_preparation.md @@ -17,6 +17,7 @@ Our data pipeline converts original dataset to our unified meta-data, with data | Campus | [Home page](https://campar.in.tum.de/Chair/MultiHumanPose) | [CampusSeq1.tar.bz2](https://www.campar.in.tum.de/public_datasets/2014_cvpr_belagiannis/CampusSeq1.tar.bz2) | | Shelf | [Home page](https://campar.in.tum.de/Chair/MultiHumanPose) | [Shelf.tar.bz2](https://www.campar.in.tum.de/public_datasets/2014_cvpr_belagiannis/Shelf.tar.bz2) | | CMU Panoptic | [Home page](http://domedb.perception.cs.cmu.edu/) | By [official script](https://github.com/CMU-Perceptual-Computing-Lab/panoptic-toolbox/blob/master/scripts/getData.sh) | +| FourDAG | [Home page](https://github.com/zhangyux15/multiview_human_dataset) | [dataset](https://pan.baidu.com/s/1AZgNV4kp7PuIBicEiSPdGA) (password in home page) | ### Download converted meta-data @@ -26,12 +27,12 @@ For where to put the downloaded meta-data, check [xrmocap dataset structure](tut | Dataset name | meta name | Download link | Notes | | ------------ | ------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | -| Campus | testset | [download](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/xrmocap_meta/Campus/xrmocap_meta_testset.zip) | | +| Campus | testset | [download](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/xrmocap_meta/Campus/xrmocap_meta_testset.zip) | Paf data is included. | | Campus | testset_fasterrcnn | [download](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/xrmocap_meta/Campus/xrmocap_meta_testset_fasterrcnn.zip) | Bbox 2D is generated by [mmdet Faster R-CNN](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn). | | Campus | testset_mvpose2d | [download](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/xrmocap_meta/Campus/xrmocap_meta_testset_mvpose2d.zip) | Perception 2D is generated by [MVPose](https://github.com/zju3dv/mvpose#accelerate-the-evaluation), defined in coco convention. | | Campus | trainset | [download](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/xrmocap_meta/Campus/xrmocap_meta_trainset.zip) | | | Campus | trainset_pesudo_gt | [download](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/xrmocap_meta/Campus/xrmocap_meta_trainset_pesudo_gt.zip) | Ground-truth keypoints3d is generated by [MvP](https://github.com/sail-sg/mvp#22-shelfcampus), defined in campus convention. | -| Shelf | testset | [download](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/xrmocap_meta/Shelf/xrmocap_meta_testset.zip) | | +| Shelf | testset | [download](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/xrmocap_meta/Shelf/xrmocap_meta_testset.zip) | Paf data is included. | | Shelf | testset_fasterrcnn | [download](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/xrmocap_meta/Shelf/xrmocap_meta_testset_fasterrcnn.zip) | Bbox 2D is generated by [mmdet Faster R-CNN](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn). | | Shelf | testset_mvpose2d | [download](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/xrmocap_meta/Shelf/xrmocap_meta_testset_mvpose2d.zip) | Perception 2D is generated by [MVPose](https://github.com/zju3dv/mvpose#accelerate-the-evaluation), defined in coco. There's only data for the first three people in ground truth keypoints3d . | | Shelf | trainset | [download](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/xrmocap_meta/Shelf/xrmocap_meta_trainset.zip) | | @@ -40,6 +41,9 @@ For where to put the downloaded meta-data, check [xrmocap dataset structure](tut | CMU Panoptic | 160422_haggling1 | [download](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/xrmocap_meta/Panoptic/xrmocap_meta_haggling1.zip) | Only five views are selected: 03, 06, 12, 13, 23 | | CMU Panoptic | 160906_ian5 | [download](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/xrmocap_meta/Panoptic/xrmocap_meta_ian5.zip) | Only five views are selected: 03, 06, 12, 13, 23 | | CMU Panoptic | 160906_pizza1 | [download](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/xrmocap_meta/Panoptic/xrmocap_meta_pizza1.zip) | Only five views are selected: 03, 06, 12, 13, 23 | +| FourDAG | seq2 | [download](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/xrmocap_meta/FourDAG/xrmocap_meta_seq2.zip) | Paf data is included.| +| FourDAG | seq4 | [download](https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/xrmocap_meta/FourDAG/xrmocap_meta_seq4.zip) |Paf data is included. | + For CMU panoptic meta-data, frames extracted from videos have been removed before uploading. One has to convert panoptic data locally with `bbox_detector = None` and `kps2d_estimator = None` first, and then copy download data into the converted meta-data directory. diff --git a/scripts/download_weight.sh b/scripts/download_weight.sh index bf74c81e..a574d24b 100644 --- a/scripts/download_weight.sh +++ b/scripts/download_weight.sh @@ -4,6 +4,7 @@ wget https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50 wget https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth wget https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot17-half-64ee2ed4.pth wget https://download.openmmlab.com/mmtracking/mot/reid/tracktor_reid_r50_iter25245-a452f51f.pth +wget https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/weight/limb_info.json cd mvpose wget https://openxrlab-share.oss-cn-hongkong.aliyuncs.com/xrmocap/weight/resnet50_reid_camstyle-98d61e41_20220921.pth cd ../.. diff --git a/tests/core/evaluation/test_fourdag_evaluation.py b/tests/core/evaluation/test_fourdag_evaluation.py new file mode 100644 index 00000000..6bcef3bc --- /dev/null +++ b/tests/core/evaluation/test_fourdag_evaluation.py @@ -0,0 +1,35 @@ +import mmcv +import os +import os.path as osp +import pytest +import shutil + +from xrmocap.core.evaluation.builder import build_evaluation +from xrmocap.data_structure.keypoints import Keypoints + +output_dir = 'tests/data/output/core/test_fourdag_evaluation' + + +@pytest.fixture(scope='module', autouse=True) +def fixture(): + if os.path.exists(output_dir): + shutil.rmtree(output_dir) + os.makedirs(output_dir, exist_ok=False) + + +def test_fourdag_evaluation(): + evaluation_config = dict( + mmcv.Config.fromfile('configs/modules/core/evaluation/' + + 'bottom_up_eval_shelf_unittest.py')) + evaluation_config['output_dir'] = output_dir + evaluation_config['dataset_visualization']['output_dir'] = output_dir + evaluation_config['dataset_visualization']['pred_kps3d_paths'] = osp.join( + output_dir, 'scene0_pred_keypoints3d.npz') + os.makedirs(output_dir, exist_ok=True) + evaluation = build_evaluation(evaluation_config) + evaluation.run(overwrite=True) + pred_keypoints3d = Keypoints.fromfile( + osp.join(output_dir, 'scene0_pred_keypoints3d.npz')) + pred_kps3d = pred_keypoints3d.get_keypoints() + assert pred_kps3d.shape == (5, 2, 19, 4) + assert pred_keypoints3d.get_mask().shape == (5, 2, 19) diff --git a/tests/ops/test_bottom_up_association.py b/tests/ops/test_bottom_up_association.py new file mode 100644 index 00000000..1aa6c187 --- /dev/null +++ b/tests/ops/test_bottom_up_association.py @@ -0,0 +1,29 @@ +# yapf: disable +import mmcv +import os +import pytest +import shutil + +from xrmocap.ops.bottom_up_association.builder import ( + build_bottom_up_associator, +) + +# yapf: enable + +output_dir = 'tests/data/output/ops/test_bottom_up_association' + + +@pytest.fixture(scope='module', autouse=True) +def fixture(): + if os.path.exists(output_dir): + shutil.rmtree(output_dir) + os.makedirs(output_dir, exist_ok=False) + + +def test_build_bottom_up_associator(): + associator_cfg = dict( + mmcv.Config.fromfile('configs/modules/ops/' + + 'bottom_up_association/' + + 'fourdag_associator.py')) + associator = build_bottom_up_associator(associator_cfg) + assert associator is not None diff --git a/tests/ops/test_triangulation.py b/tests/ops/test_triangulation.py index 359863a0..0a6255ae 100644 --- a/tests/ops/test_triangulation.py +++ b/tests/ops/test_triangulation.py @@ -121,3 +121,53 @@ def test_aniposelib_triangulator(): filename=os.path.join(output_dir, f'projected_aniposelib_{cam_idx}.jpg'), img=canvas) + + +def test_fourdag_triangulator(): + n_view = 6 + kps2d_list = [] + mask_list = [] + for view_idx in range(n_view): + npz_path = os.path.join(input_dir, f'keypoints_2d_{view_idx:02d}.npz') + npz_dict = dict(np.load(npz_path, allow_pickle=True)) + kps2d_list.append(npz_dict['keypoints'][0, 0, :, :]) + mask_list.append(npz_dict['mask'][0, 0, :]) + kps2d = np.asarray(kps2d_list) + kps2d_mask = np.asarray(mask_list, dtype=kps2d.dtype) + cam_param_list = [] + for view_idx in range(n_view): + cam_param_path = os.path.join(input_dir, f'cam_{view_idx:03d}.json') + cam_param = PinholeCameraParameter() + cam_param.load(cam_param_path) + cam_param_list.append(cam_param) + triangulator_config = dict( + mmcv.Config.fromfile( + 'configs/modules/ops/triangulation/jacobi_triangulator.py')) + triangulator_config['camera_parameters'] = cam_param_list + triangulator = build_triangulator(triangulator_config) + assert triangulator is not None + # test kp2d np + kps3d = triangulator.triangulate(kps2d) + assert kps3d.shape[:2] == kps2d.shape[1:3] + # test kp2d list + kps3d = triangulator.triangulate(kps2d.tolist()) + assert kps3d.shape[:2] == kps2d.shape[1:3] + # test kp2d tuple + kps3d = triangulator.triangulate(tuple(map(tuple, kps2d))) + assert kps3d.shape[:2] == kps2d.shape[1:3] + # test mask np + points_mask = np.ones_like(kps2d[..., 0:1]) + kps3d = triangulator.triangulate(points=kps2d, points_mask=points_mask) + assert kps3d.shape[:2] == kps2d.shape[1:3] + # test mask list + kps3d = triangulator.triangulate( + points=kps2d, points_mask=points_mask.tolist()) + assert kps3d.shape[:2] == kps2d.shape[1:3] + # test mask tuple + kps3d = triangulator.triangulate( + points=kps2d, points_mask=tuple(map(tuple, points_mask))) + # test mask from confidence + points_mask = kps2d_mask + kps3d = triangulator.triangulate(points=kps2d, points_mask=points_mask) + assert kps3d.shape[:2] == kps2d.shape[1:3] + assert kps3d.shape[:2] == kps2d.shape[1:3] diff --git a/tests/test_data/test_dataset.py b/tests/test_data/test_dataset.py index dc5bcf3e..b5f591f9 100644 --- a/tests/test_data/test_dataset.py +++ b/tests/test_data/test_dataset.py @@ -82,3 +82,41 @@ def test_load_shelf_dataset_wo_perception2d(): expect_n_batch = int( len(dataloader.dataset) / 2) + len(dataloader.dataset) % 2 assert iter_count == expect_n_batch + + +def test_load_shelf_dataset_bottom_up(): + dataset_config = dict( + mmcv.Config.fromfile('configs/modules/data/dataset/' + + 'shelf_unittest_bottom_up.py')) + dataloader_config = dict( + type='DataLoader', dataset=dataset_config, batch_size=1, num_workers=1) + dataloader = build_dataloader(dataloader_config) + dataloader.dataset[0] + iter_count = 0 + for batch_idx, batch_data in enumerate(dataloader): + # mview img shape: batch_size, n_v, h, w, c + assert len(batch_data[0].shape) == 5 + # K shape: batch_size, n_v, 3, 3 + assert len(batch_data[1].shape) == 4 + assert batch_data[1].shape[-2:] == (3, 3) + # R shape: batch_size, n_v, 3, 3 + assert len(batch_data[2].shape) == 4 + assert batch_data[2].shape[-2:] == (3, 3) + # T shape: batch_size, n_v, 3 + assert len(batch_data[3].shape) == 3 + assert batch_data[3].shape[-1] == 3 + # kps3d shape: batch_size, n_person, n_kps, 4 + assert len(batch_data[4].shape) == 4 + assert batch_data[4].shape[-1] == 4 + # end_of_clip shape: batch_size + assert len(batch_data[5].shape) == 1 + assert batch_data[5][0].item() == \ + (batch_idx == len(dataloader.dataset)-1) + # kw_data + kps2d = batch_data[6] + assert len(kps2d) == 5 + pafs = batch_data[7] + assert len(pafs) == 5 + iter_count = batch_idx + 1 + expect_n_batch = len(dataloader.dataset) + assert iter_count == expect_n_batch diff --git a/xrmocap/core/evaluation/align_keypoints3d.py b/xrmocap/core/evaluation/align_keypoints3d.py new file mode 100644 index 00000000..7b87e1c4 --- /dev/null +++ b/xrmocap/core/evaluation/align_keypoints3d.py @@ -0,0 +1,106 @@ +# yapf: disable +import numpy as np +import string +from typing import List + +from xrmocap.data_structure.keypoints import Keypoints +from xrmocap.transform.convention.keypoints_convention import ( + convert_keypoints, get_keypoint_idx, +) +from xrmocap.transform.limbs import get_limbs_from_keypoints +from xrmocap.utils.mvpose_utils import ( + add_campus_jaw_headtop, add_campus_jaw_headtop_mask, +) + +# yapf: enable + + +def align_keypoints3d(pred_keypoints3d: Keypoints, gt_keypoints3d: Keypoints, + eval_kps3d_convention: string, + selected_limbs_name: List[List[str]], + additional_limbs_names: List[List[str]]): + """align keypoints convention. + + Args: + pred_keypoints3d (Keypoints): prediction of keypoints + gt_keypoints3d (Keypoints): ground true of keypoints + eval_kps3d_convention (string): keypoints convention to align + selected_limbs_name (List): selected limbs to be evaluated + additional_limbs_names (List): additional limbs to be evaluated + """ + ret_limbs = [] + gt_nose = None + pred_nose = None + pred_kps3d_convention = pred_keypoints3d.get_convention() + gt_kps3d_convention = gt_keypoints3d.get_convention() + if gt_kps3d_convention == 'panoptic': + gt_nose_index = get_keypoint_idx( + name='nose_openpose', convention=gt_kps3d_convention) + gt_nose = gt_keypoints3d.get_keypoints()[:, :, gt_nose_index, :3] + + if pred_kps3d_convention == 'coco': + pred_nose_index = get_keypoint_idx( + name='nose', convention=pred_kps3d_convention) + pred_nose = pred_keypoints3d.get_keypoints()[:, :, pred_nose_index, :3] + + if pred_kps3d_convention == 'fourdag_19' or\ + pred_kps3d_convention == 'openpose_25': + pred_leftear_index = get_keypoint_idx( + name='left_ear_openpose', convention=pred_kps3d_convention) + pre_rightear_index = get_keypoint_idx( + name='right_ear_openpose', convention=pred_kps3d_convention) + head_center = ( + pred_keypoints3d.get_keypoints()[:, :, pred_leftear_index, :3] + + pred_keypoints3d.get_keypoints()[:, :, pre_rightear_index, :3]) / 2 + pred_nose = head_center + + if pred_kps3d_convention != eval_kps3d_convention: + pred_keypoints3d = convert_keypoints( + keypoints=pred_keypoints3d, + dst=eval_kps3d_convention, + approximate=True) + if gt_kps3d_convention != eval_kps3d_convention: + gt_keypoints3d = convert_keypoints( + keypoints=gt_keypoints3d, + dst=eval_kps3d_convention, + approximate=True) + limbs = get_limbs_from_keypoints( + keypoints=pred_keypoints3d, fill_limb_names=True) + limb_name_list = [] + conn_list = [] + for limb_name, conn in limbs.get_connections_by_names().items(): + limb_name_list.append(limb_name) + conn_list.append(conn) + + for idx, limb_name in enumerate(limb_name_list): + if limb_name in selected_limbs_name: + ret_limbs.append(conn_list[idx]) + + for conn_names in additional_limbs_names: + kps_idx_0 = get_keypoint_idx( + name=conn_names[0], convention=eval_kps3d_convention) + kps_idx_1 = get_keypoint_idx( + name=conn_names[1], convention=eval_kps3d_convention) + ret_limbs.append(np.array([kps_idx_0, kps_idx_1], dtype=np.int32)) + pred_kps3d_mask = pred_keypoints3d.get_mask() + pred_kps3d = pred_keypoints3d.get_keypoints()[..., :3] + if pred_nose is not None: + pred_kps3d = add_campus_jaw_headtop(pred_nose, pred_kps3d) + pred_kps3d_mask = add_campus_jaw_headtop_mask(pred_kps3d_mask) + + gt_kps3d_mask = gt_keypoints3d.get_mask() + gt_kps3d = gt_keypoints3d.get_keypoints()[..., :3] + if gt_nose is not None: + gt_kps3d = add_campus_jaw_headtop(gt_nose, gt_kps3d) + gt_kps3d_mask = add_campus_jaw_headtop_mask(gt_kps3d_mask) + + pred_kps3d = np.concatenate((pred_kps3d, pred_kps3d_mask[..., np.newaxis]), + axis=-1) + pred_keypoints3d = Keypoints( + kps=pred_kps3d, mask=pred_kps3d_mask, convention=eval_kps3d_convention) + gt_kps3d = np.concatenate((gt_kps3d, gt_kps3d_mask[..., np.newaxis]), + axis=-1) + gt_keypoints3d = Keypoints( + kps=gt_kps3d, mask=gt_kps3d_mask, convention=eval_kps3d_convention) + + return pred_keypoints3d, gt_keypoints3d, ret_limbs diff --git a/xrmocap/core/evaluation/bottom_up_association_evaluation.py b/xrmocap/core/evaluation/bottom_up_association_evaluation.py new file mode 100644 index 00000000..5cfa8ffc --- /dev/null +++ b/xrmocap/core/evaluation/bottom_up_association_evaluation.py @@ -0,0 +1,201 @@ +# yapf: disable +import logging +import numpy as np +import os.path as osp +from tqdm import tqdm +from typing import List, Union +from xrprimer.utils.log_utils import get_logger +from xrprimer.utils.path_utils import prepare_output_path + +from xrmocap.core.evaluation.align_keypoints3d import align_keypoints3d +from xrmocap.core.evaluation.metrics import calc_limbs_accuracy, evaluate +from xrmocap.data.data_visualization.builder import ( + BaseDataVisualization, build_data_visualization, +) +from xrmocap.data.dataset.builder import MviewMpersonDataset, build_dataset +from xrmocap.data_structure.keypoints import Keypoints +from xrmocap.ops.bottom_up_association.builder import ( + FourDAGAssociator, build_bottom_up_associator, +) +from xrmocap.transform.convention.keypoints_convention import get_keypoint_num + +# yapf: enable + + +class BottomUpAssociationEvaluation: + """Bottom-up association evaluation.""" + + def __init__(self, + output_dir: str, + selected_limbs_name: List[List[str]], + dataset: Union[dict, MviewMpersonDataset], + associator: Union[dict, FourDAGAssociator], + additional_limbs_names: List[List[str]] = [], + dataset_visualization: Union[None, dict, + BaseDataVisualization] = None, + pred_kps3d_convention: str = 'coco', + eval_kps3d_convention: str = 'campus', + logger: Union[None, str, logging.Logger] = None) -> None: + """Initialization for the class. + + Args: + output_dir (str): The path to save results. + selected_limbs_name (List[List[str]]): The name of selected + limbs in evaluation. + additional_limbs_names (List[List[str]]): + Names at both ends of the limb. + dataset (Union[dict, MviewMpersonDataset]) + associator (Union[dict, MvposeAssociator]) + dataset_visualization + (Union[None, dict, BaseDataVisualization], optional): + Defaults to None. + pred_kps3d_convention (str, optional): Target convention of + keypoints3d, Defaults to 'coco'. + eval_kps3d_convention (str, optional): the convention of + keypoints3d for evaluation, Defaults to 'campus'. + logger (Union[None, str, logging.Logger], optional): + Logger for logging. If None, root logger will be selected. + Defaults to None. + """ + + self.output_dir = output_dir + self.pred_kps3d_convention = pred_kps3d_convention + self.eval_kps3d_convention = eval_kps3d_convention + self.additional_limbs_names = additional_limbs_names + self.selected_limbs_name = selected_limbs_name + self.logger = get_logger(logger) + + if isinstance(dataset, dict): + dataset['logger'] = self.logger + self.dataset = build_dataset(dataset) + else: + self.dataset = dataset + self.n_views = self.dataset.n_views + if isinstance(associator, dict): + associator['logger'] = self.logger + associator['n_views'] = self.n_views + self.associator = build_bottom_up_associator(associator) + else: + self.associator = associator + + if isinstance(dataset_visualization, dict): + dataset_visualization['logger'] = self.logger + self.dataset_visualization = build_data_visualization( + dataset_visualization) + else: + self.dataset_visualization = dataset_visualization + + def run(self, overwrite: bool = False): + prepare_output_path( + output_path=self.output_dir, + allowed_suffix='', + path_type='dir', + overwrite=overwrite, + logger=self.logger) + n_frame = len(self.dataset) + n_kps = get_keypoint_num(convention=self.pred_kps3d_convention) + pred_kps3d = np.zeros(shape=(n_frame, 1, n_kps, 4)) + pred_kps2d = np.zeros(shape=(n_frame, 1, self.n_views, n_kps, 3)) + gt_kps3d = None + max_identity = 0 + end_of_clip_idxs = [] + identities = [] + for frame_idx, frame_item in enumerate(tqdm(self.dataset)): + _, _, _, _, kps3d, end_of_clip, kps2d, pafs = frame_item + if end_of_clip: + end_of_clip_idxs.append(frame_idx) + fisheye_list = self.dataset.fisheye_params[0] + # prepare input for associate single frame + + self.associator.set_cameras(fisheye_list) + predict_keypoints3d, identities, multi_kps2d, _ = \ + self.associator.associate_frame(kps2d, pafs, end_of_clip) + # save predict kps3d + for idx, identity in enumerate(identities): + if identity > max_identity: + n_identity = identity - max_identity + pred_kps3d = np.concatenate( + (pred_kps3d, + np.zeros(shape=(n_frame, n_identity, n_kps, 4))), + axis=1) + pred_kps2d = np.concatenate( + (pred_kps2d, + np.zeros( + shape=(n_frame, n_identity, self.n_views, n_kps, + 3))), + axis=1) + max_identity = identity + pred_kps3d[frame_idx, + identity] = predict_keypoints3d.get_keypoints()[0, + idx] + # prepare 2d associate result + if identity in multi_kps2d: + pred_kps2d[frame_idx, identity] = multi_kps2d[identity] + # save ground truth kps3d + if gt_kps3d is None: + gt_kps3d = kps3d.numpy()[np.newaxis] + else: + gt_kps3d = np.concatenate( + (gt_kps3d, kps3d.numpy()[np.newaxis]), axis=0) + + pred_keypoints3d = Keypoints( + dtype='numpy', + kps=pred_kps3d, + mask=pred_kps3d[..., -1] > 0, + convention=self.pred_kps3d_convention, + logger=self.logger) + gt_keypoints3d = Keypoints( + dtype='numpy', + kps=gt_kps3d, + mask=gt_kps3d[..., -1] > 0, + convention=self.dataset.gt_kps3d_convention, + logger=self.logger) + mscene_keypoints_paths = [] + + # prepare result + scene_start_idx = 0 + for scene_idx, scene_end_idx in enumerate(end_of_clip_idxs): + scene_keypoints = pred_keypoints3d.clone() + kps3d = scene_keypoints.get_keypoints()[ + scene_start_idx:scene_end_idx + 1, ...] + mask = scene_keypoints.get_mask()[scene_start_idx:scene_end_idx + + 1, ...] + scene_keypoints.set_keypoints(kps3d) + scene_keypoints.set_mask(mask) + npz_path = osp.join(self.output_dir, + f'scene{scene_idx}_pred_keypoints3d.npz') + scene_keypoints.dump(npz_path) + mscene_keypoints_paths.append(npz_path) + + npz_path = osp.join(self.output_dir, + f'scene{scene_idx}_associate_keypoints2d') + associate_kps2d = pred_kps2d[scene_start_idx:scene_end_idx + 1, + ...] + np.save(npz_path, associate_kps2d) + + scene_start_idx = scene_end_idx + 1 + + # evaluation + pred_keypoints3d_, gt_keypoints3d_, limbs = align_keypoints3d( + pred_keypoints3d, gt_keypoints3d, self.eval_kps3d_convention, + self.selected_limbs_name, self.additional_limbs_names) + _, eval_table = calc_limbs_accuracy( + pred_keypoints3d_, gt_keypoints3d_, limbs, logger=self.logger) + self.logger.info('\n' + eval_table.get_string()) + evel_dict = evaluate( + pred_keypoints3d_, + gt_keypoints3d_, + pck_thres=[100, 200], + logger=self.logger) + self.logger.info('MPJPE: {:.2f} ± {:.2f} mm'.format( + evel_dict['mpjpe_mean'], evel_dict['mpjpe_std'])) + self.logger.info(f'PA-MPJPE: {evel_dict["pa_mpjpe_mean"]:.2f} ±' + f'{evel_dict["pa_mpjpe_std"]:.2f} mm') + self.logger.info(f'PCK@100mm: {evel_dict["pck"][100]:.2f} %') + self.logger.info(f'PCK@200mm: {evel_dict["pck"][200]:.2f} %') + + # visualization + if self.dataset_visualization is not None: + self.dataset_visualization.pred_kps3d_paths = \ + mscene_keypoints_paths + self.dataset_visualization.run(overwrite=overwrite) diff --git a/xrmocap/core/evaluation/builder.py b/xrmocap/core/evaluation/builder.py index c2ad0b0e..b9d9ee94 100644 --- a/xrmocap/core/evaluation/builder.py +++ b/xrmocap/core/evaluation/builder.py @@ -1,5 +1,6 @@ from mmcv.utils import Registry +from .bottom_up_association_evaluation import BottomUpAssociationEvaluation from .mvp_evaluation import MVPEvaluation from .top_down_association_evaluation import TopDownAssociationEvaluation @@ -8,6 +9,8 @@ EVALUATION.register_module( name='TopDownAssociationEvaluation', module=TopDownAssociationEvaluation) EVALUATION.register_module(name='MVPEvaluation', module=MVPEvaluation) +EVALUATION.register_module( + name='BottomUpAssociationEvaluation', module=BottomUpAssociationEvaluation) def build_evaluation(cfg): diff --git a/xrmocap/core/evaluation/metrics.py b/xrmocap/core/evaluation/metrics.py new file mode 100644 index 00000000..bfcd8c41 --- /dev/null +++ b/xrmocap/core/evaluation/metrics.py @@ -0,0 +1,198 @@ +# yapf: disable +import logging +import numpy as np +from prettytable import PrettyTable +from typing import List, Tuple, Union + +from xrmocap.data_structure.keypoints import Keypoints +from xrmocap.utils.geometry import compute_similarity_transform +from xrmocap.utils.mvpose_utils import ( + check_limb_is_correct, compute_mpjpe, vectorize_distance, +) + +# yapf: enable + + +def evaluate(pred_keypoints3d: Keypoints, + gt_keypoints3d: Keypoints, + pck_thres: List = [50, 100], + scale=1000., + logger: Union[None, str, logging.Logger] = None) -> dict: + """evaluation of accuracy pred_keypoints3d (Keypoints): + + prediction of keypoints + gt_keypoints3d (Keypoints): + ground true of keypoints + pck_thres (List): + threshold value of precision + scale: + """ + # There must be no np.nan in the pred_keypoints3d + mpjpe, pa_mpjpe = [], [] + pck = {i: [] for i in pck_thres} + n_frame = gt_keypoints3d.get_frame_number() + gt_kps3d = gt_keypoints3d.get_keypoints()[..., :3] + gt_kps3d_mask = gt_keypoints3d.get_mask() + pred_kps3d = pred_keypoints3d.get_keypoints()[..., :3] + pred_kps3d_mask = pred_keypoints3d.get_mask() + pred_kps3d_convention = pred_keypoints3d.get_convention() + gt_kps3d_convention = gt_keypoints3d.get_convention() + for frame_idx in range(n_frame): + if not gt_kps3d_mask[frame_idx].any(): + continue + gt_kps3d_idxs = np.where( + np.sum(gt_kps3d_mask[frame_idx], axis=1) > 0)[0] + for gt_kps3d_idx in gt_kps3d_idxs: + f_gt_kps3d = gt_kps3d[frame_idx][gt_kps3d_idx] + f_pred_kps3d = pred_kps3d[frame_idx][ + np.sum(pred_kps3d_mask[frame_idx], axis=1) > 0] + if len(f_pred_kps3d) == 0: + continue + + dist = vectorize_distance(f_gt_kps3d[np.newaxis], f_pred_kps3d) + f_pred_kps3d = f_pred_kps3d[np.argmin(dist[0])] + + if np.all((f_pred_kps3d == 0)): + continue + + # MPJPE + f_pred_keypoints = Keypoints( + kps=np.concatenate( + (f_pred_kps3d, np.ones_like(f_pred_kps3d[..., 0:1])), + axis=-1), + convention=pred_kps3d_convention) + f_gt_keypoints = Keypoints( + kps=np.concatenate( + (f_gt_kps3d, np.ones_like(f_gt_kps3d[..., 0:1])), axis=-1), + convention=gt_kps3d_convention) + mpjpe.append( + compute_mpjpe(f_pred_keypoints, f_gt_keypoints, align=True)) + + # PA-MPJPE + _, _, rotation, scaling, transl = compute_similarity_transform( + f_gt_kps3d, f_pred_kps3d, compute_optimal_scale=True) + pred_kps3d_pa = (scaling * f_pred_kps3d.dot(rotation)) + transl + + pred_keypoints_pa = Keypoints( + kps=np.concatenate( + (pred_kps3d_pa, np.ones_like(pred_kps3d_pa[..., 0:1])), + axis=-1), + convention=pred_kps3d_convention) + pa_mpjpe_i = compute_mpjpe( + pred_keypoints_pa, f_gt_keypoints, align=True) + pa_mpjpe.append(pa_mpjpe_i) + + for thres in pck_thres: + pck[thres].append(np.mean(pa_mpjpe_i <= (thres / scale))) + + mpjpe = np.asarray(mpjpe) * scale # m to mm + pa_mpjpe = np.asarray(pa_mpjpe) * scale # m to mm + mpjpe_mean, mpjpe_std = np.mean(mpjpe), np.std(mpjpe) + pa_mpjpe_mean, pa_mpjpe_std = np.mean(pa_mpjpe), np.std(pa_mpjpe) + # percentage + for thres in pck_thres: + pck[thres] = np.mean(pck[thres]) * 100. + return dict( + mpjpe_mean=mpjpe_mean, + mpjpe_std=mpjpe_std, + pa_mpjpe_mean=pa_mpjpe_mean, + pa_mpjpe_std=pa_mpjpe_std, + pck=pck) + + +def calc_limbs_accuracy( + pred_keypoints3d, + gt_keypoints3d, + limbs, + logger: Union[None, str, logging.Logger] = None +) -> Tuple[np.ndarray, PrettyTable]: + """calculate the limbs accuracy pred_keypoints3d (Keypoints): + + prediction of keypoints + gt_keypoints3d (Keypoints): + ground true of keypoints + limbs: + limb to be evaluated + """ + n_frame = gt_keypoints3d.get_frame_number() + n_gt_person = gt_keypoints3d.get_person_number() + gt_kps3d = gt_keypoints3d.get_keypoints()[..., :3] + gt_kps3d_mask = gt_keypoints3d.get_mask() + pred_kps3d = pred_keypoints3d.get_keypoints()[..., :3] + pred_kps3d_mask = pred_keypoints3d.get_mask() + check_result = np.zeros((n_frame, n_gt_person, len(limbs) + 1), + dtype=np.int32) + accuracy_cnt = 0 + error_cnt = 0 + + for idx in range(n_frame): + if not gt_kps3d_mask[idx].any(): + continue + gt_kps3d_idxs = np.where(np.sum(gt_kps3d_mask[idx], axis=1) > 0)[0] + for gt_kps3d_idx in gt_kps3d_idxs: + f_gt_kps3d = gt_kps3d[idx][gt_kps3d_idx] + f_pred_kps3d = pred_kps3d[idx][ + np.sum(pred_kps3d_mask[idx], axis=1) > 0] + if len(f_pred_kps3d) == 0: + continue + + dist = vectorize_distance(f_gt_kps3d[np.newaxis], f_pred_kps3d) + f_pred_kps3d = f_pred_kps3d[np.argmin(dist[0])] + + for i, limb in enumerate(limbs): + start_point, end_point = limb + if check_limb_is_correct(f_pred_kps3d[start_point], + f_pred_kps3d[end_point], + f_gt_kps3d[start_point], + f_gt_kps3d[end_point]): + check_result[idx, gt_kps3d_idx, i] = 1 + accuracy_cnt += 1 + else: + check_result[idx, gt_kps3d_idx, i] = -1 + error_cnt += 1 + gt_hip = (f_gt_kps3d[2] + f_gt_kps3d[3]) / 2 + pred_hip = (f_pred_kps3d[2] + f_pred_kps3d[3]) / 2 + if check_limb_is_correct(pred_hip, f_pred_kps3d[12], gt_hip, + f_gt_kps3d[12]): + check_result[idx, gt_kps3d_idx, -1] = 1 + accuracy_cnt += 1 + else: + check_result[idx, gt_kps3d_idx, -1] = -1 + error_cnt += 1 + bone_group = dict([('Torso', np.array([len(limbs) - 1])), + ('Upper arms', np.array([5, 6])), + ('Lower arms', np.array([4, 7])), + ('Upper legs', np.array([1, 2])), + ('Lower legs', np.array([0, 3]))]) + if len(limbs) > 9: + # head is absent in some dataset + bone_group['Head'] = np.array([8]) + + person_wise_avg = np.sum( + check_result > 0, axis=(0, 2)) / np.sum( + np.abs(check_result), axis=(0, 2)) + + bone_wise_result = dict() + bone_person_wise_result = dict() + for k, v in bone_group.items(): + bone_wise_result[k] = np.sum(check_result[:, :, v] > 0) / np.sum( + np.abs(check_result[:, :, v])) + bone_person_wise_result[k] = np.sum( + check_result[:, :, v] > 0, axis=(0, 2)) / np.sum( + np.abs(check_result[:, :, v]), axis=(0, 2)) + + tb = PrettyTable() + tb.field_names = ['Bone Group'] + [ + f'Actor {i}' for i in range(bone_person_wise_result['Torso'].shape[0]) + ] + ['Average'] + for k, v in bone_person_wise_result.items(): + this_row = [k] + [np.char.mod('%.4f', i) for i in v + ] + [np.char.mod('%.4f', + np.sum(v) / len(v))] + tb.add_row(this_row) + this_row = ['Total'] + [ + np.char.mod('%.4f', i) for i in person_wise_avg + ] + [np.char.mod('%.4f', + np.sum(person_wise_avg) / len(person_wise_avg))] + tb.add_row(this_row) + return check_result, tb diff --git a/xrmocap/core/visualization/visualize_keypoints2d.py b/xrmocap/core/visualization/visualize_keypoints2d.py index e9e6685f..634cdfe7 100644 --- a/xrmocap/core/visualization/visualize_keypoints2d.py +++ b/xrmocap/core/visualization/visualize_keypoints2d.py @@ -1,6 +1,6 @@ # yapf: disable import numpy as np -from typing import List, Union, overload +from typing import List, Tuple, Union, overload from xrmocap.data_structure.keypoints import Keypoints @@ -68,6 +68,7 @@ def visualize_keypoints2d( img_paths: Union[None, List[str]] = None, video_path: Union[None, str] = None, overwrite: bool = False, + resolution: Tuple = None, return_array: bool = False) -> Union[None, np.ndarray]: """Visualize 2d keypoints, powered by mmhuman3d. @@ -127,6 +128,7 @@ def visualize_keypoints2d( frame_list=img_paths, origin_frames=video_path, data_source=kps_convention, + resolution=resolution, mask=mm_kps_mask, overwrite=overwrite, return_array=return_array) diff --git a/xrmocap/data/data_visualization/mview_mperson_data_visualization.py b/xrmocap/data/data_visualization/mview_mperson_data_visualization.py index edd8c281..a684782d 100644 --- a/xrmocap/data/data_visualization/mview_mperson_data_visualization.py +++ b/xrmocap/data/data_visualization/mview_mperson_data_visualization.py @@ -2,7 +2,7 @@ import logging import numpy as np import os -from typing import List, Union +from typing import List, Tuple, Union from xrprimer.data_structure.camera import FisheyeCameraParameter from xrmocap.core.visualization import ( @@ -27,6 +27,8 @@ def __init__(self, pred_kps3d_paths: List[str] = None, pred_kps3d_convention: Union[None, str] = None, vis_gt_kps3d: bool = True, + vis_bottom_up: bool = False, + resolution: Tuple = None, gt_kps3d_convention: Union[None, str] = None, vis_cameras: bool = False, vis_aio_video: bool = True, @@ -97,6 +99,7 @@ def __init__(self, self.vis_percep2d = vis_percep2d self.kps2d_convention = kps2d_convention self.vis_gt_kps3d = vis_gt_kps3d + self.vis_bottom_up = vis_bottom_up self.gt_kps3d_convention = gt_kps3d_convention self.vis_cameras = vis_cameras self.vis_aio_video = vis_aio_video @@ -104,6 +107,7 @@ def __init__(self, if pred_kps3d_paths is not None \ else [] self.pred_kps3d_convention = pred_kps3d_convention + self.resolution = resolution def run(self, overwrite: bool = False) -> None: """Visualize meta-data selected in __init__(). @@ -137,6 +141,8 @@ def run(self, overwrite: bool = False) -> None: self.visualize_perception_2d(scene_idx) if self.vis_gt_kps3d: self.visualize_ground_truth_3d(scene_idx) + if self.vis_bottom_up: + self.visualize_perception_2d_bottm_up(scene_idx) def visualize_perception_2d(self, scene_idx: int) -> None: """Visualize converted 2D perception keypoints2d data. If bbox was @@ -199,6 +205,7 @@ def visualize_perception_2d(self, scene_idx: int) -> None: output_path=video_path, img_paths=frame_list, overwrite=True, + resolution=self.resolution, return_array=self.vis_aio_video) mview_plot_arr.append(plot_arr) # draw views all in one @@ -207,6 +214,68 @@ def visualize_perception_2d(self, scene_idx: int) -> None: mview_array_to_video( mview_plot_arr, video_path, logger=self.logger) + def visualize_perception_2d_bottm_up(self, scene_idx: int) -> None: + """Visualize bottom-up associated 2D perception keypoints2d data. + + Args: + scene_idx (int): + Index of this scene. + """ + scene_dir = os.path.join(self.meta_path, f'scene_{scene_idx}') + npz_path = os.path.join(self.output_dir, + f'scene{scene_idx}_associate_keypoints2d.npy') + cam_dir = os.path.join(scene_dir, 'camera_parameters') + file_names = sorted(os.listdir(cam_dir)) + cam_names = [] + view_idxs = [] + for file_name in file_names: + if file_name.startswith('fisheye_param_'): + cam_names.append(file_name) + view_idxs.append( + int( + file_name.replace('fisheye_param_', + '').replace('.json', ''))) + n_view = len(cam_names) + mview_plot_arr = [] + arr_data = np.load(npz_path) + for idx in range(n_view): + view_idx = view_idxs[idx] + if self.verbose: + self.logger.info('Visualizing perception 2D data for' + + f' scene {scene_idx} view {view_idx}') + list_path = os.path.join(scene_dir, + f'image_list_view_{view_idx:02d}.txt') + with open(list_path, 'r') as f_read: + rela_path_list = f_read.readlines() + frame_list = [ + os.path.join(self.data_root, rela_path.strip()) + for rela_path in rela_path_list + ] + kps2d = arr_data[:, :, idx, :, :] + keypoints2d = Keypoints( + kps=kps2d, + mask=kps2d[..., 2] > 0, + convention=self.kps2d_convention) + if self.kps2d_convention is not None: + keypoints2d = convert_keypoints( + keypoints2d, dst=self.kps2d_convention, approximate=True) + scene_vis_dir = os.path.join(self.output_dir, f'scene_{scene_idx}') + video_path = os.path.join( + scene_vis_dir, f'associate_kps2d_view_{view_idx:02d}.mp4') + plot_arr = visualize_keypoints2d( + keypoints=keypoints2d, + output_path=video_path, + img_paths=frame_list, + resolution=self.resolution, + overwrite=True, + return_array=self.vis_aio_video) + mview_plot_arr.append(plot_arr) + # draw views all in one + if self.vis_aio_video: + video_path = os.path.join(scene_vis_dir, 'associate_kps2d_AIO.mp4') + mview_array_to_video( + mview_plot_arr, video_path, logger=self.logger) + def visualize_ground_truth_3d(self, scene_idx: int) -> None: """Visualize converted ground truth keypoints3d data. diff --git a/xrmocap/data/dataset/bottom_up_mview_mperson_dataset.py b/xrmocap/data/dataset/bottom_up_mview_mperson_dataset.py new file mode 100644 index 00000000..7336c654 --- /dev/null +++ b/xrmocap/data/dataset/bottom_up_mview_mperson_dataset.py @@ -0,0 +1,199 @@ +# yapf: disable +import json +import logging +import os +import torch +from typing import Tuple, Union + +from xrmocap.transform.convention.keypoints_convention import ( + convert_bottom_up_kps_paf, +) +from .mview_mperson_dataset import MviewMpersonDataset + +try: + from typing import Literal +except ImportError: + from typing_extensions import Literal +# yapf: enable + + +class BottomUpMviewMpersonDataset(MviewMpersonDataset): + + def __init__(self, + data_root: str, + img_pipeline: list, + meta_path: str = 'xrmocap_meta', + test_mode: bool = True, + shuffled: bool = False, + metric_unit: Literal['meter', 'centimeter', + 'millimeter'] = 'meter', + kps2d_convention: str = 'fourdag19', + gt_kps3d_convention: Union[None, str] = None, + cam_world2cam: bool = False, + cam_k_dim: int = 3, + logger: Union[None, str, logging.Logger] = None) -> None: + """A dataset loads multi-view multi-person data from source dataset and + meta-data from data converter. + + Args: + data_root (str): + Root path of the downloaded dataset. + img_pipeline (list): + A list of image transform instances. + meta_path (str, optional): + Path to the meta-data dir. Defaults to 'xrmocap_meta'. + test_mode (bool, optional): + Whether this dataset is used to load testset. + Defaults to True. + shuffled (bool, optional): + Whether this dataset is used to load shuffled frames. + If True, getitem will always get end_of_clip=True. + Defaults to False. + metric_unit (Literal[ + 'meter', 'centimeter', 'millimeter'], optional): + Metric unit of gt3d and camera parameters. Defaults to 'meter'. + gt_kps3d_convention (Union[None, str], optional): + Target convention of keypoints3d, if None, + kps3d will keep its convention in meta-data. + Defaults to None. + cam_world2cam (bool, optional): + Direction of returned camera extrinsics. + Defaults to False. + cam_k_dim (int, optional): + Dimension of returned camera intrinsic mat. + Defaults to 3. + logger (Union[None, str, logging.Logger], optional): + Logger for logging. If None, root logger will be selected. + Defaults to None. + """ + + MviewMpersonDataset.__init__( + self, + data_root=data_root, + img_pipeline=img_pipeline, + meta_path=meta_path, + test_mode=test_mode, + shuffled=shuffled, + metric_unit=metric_unit, + kps2d_convention=kps2d_convention, + gt_kps3d_convention=gt_kps3d_convention, + cam_world2cam=cam_world2cam, + cam_k_dim=cam_k_dim, + logger=logger) + + def __getitem__( + self, index: int + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, + torch.Tensor, bool, dict]: + """Get item from the dataset. + + Args: + index (int): Index in dataset. + + Returns: + mview_img_tensor (torch.Tensor): + Multi-view image tensor in shape (n_view, h, w, c). + k_tensor (torch.Tensor): + Multi-view intrinsic tensor in shape (n_view, 3, 3). + r_tensor (torch.Tensor): + Multi-view rotation mat tensor in shape (n_view, 3, 3). + t_tensor (torch.Tensor): + Multi-view translation vector tensor in shape (n_view, 3, 3). + kps3d (torch.Tensor): + Multi-view kps3d tensor in shape (n_person, n_kps, 4), + while the last dim is confidence. If kps3d[p, k, 4] == 0, + kps3d[p, k] is invalid and do not use the data. + end_of_clip (bool): + Whether it is the last frame of this clip. When shuffled, + it is always False. + kw_data (dict): + Dict for keyword data. bbox and kps2d can be found here. + kps2d (list): + List for keypoints data with bottom-up manner. + pafs (list): + List for pafs data obtained with openpose. + """ + if index >= len(self): + raise StopIteration + scene_idx, frame_idx, end_of_clip = self.process_index_mapping(index) + # load multi-view images + img_paths = self.image_list[scene_idx][frame_idx] + mview_img_list = [] + for rela_path in img_paths: + img_path = os.path.join(self.data_root, rela_path) + img_tensor = self.img_pipeline(img_path) + mview_img_list.append(img_tensor) + mview_img_tensor = torch.stack(mview_img_list) + + k_list = [] + r_list = [] + t_list = [] + # prepare multi-view cameras + for fisheye_param in self.fisheye_params[scene_idx]: + k_list.append( + torch.tensor(fisheye_param.get_intrinsic(self.cam_k_dim))) + r_list.append(torch.tensor(fisheye_param.get_extrinsic_r())) + t_list.append(torch.tensor(fisheye_param.get_extrinsic_t())) + k_tensor = torch.stack(k_list) + r_tensor = torch.stack(r_list) + t_tensor = torch.stack(t_list) + # prepare kps3d + keypoints3d = self.gt3d[scene_idx] + kps3d = keypoints3d.get_keypoints()[frame_idx] + # if this frame is the end of clip(scene) + end_of_clip = end_of_clip and not self.shuffled + + kps2d = [] + pafs = [] + mview_keypoints2d_list = self.percep_keypoints2d[scene_idx] + mview_kps2d_list = [] + n_view = mview_img_tensor.shape[0] + for view_idx in range(n_view): + mview_kps2d_list.append( + mview_keypoints2d_list[view_idx][frame_idx]) + kps2d.append(mview_keypoints2d_list[view_idx][frame_idx]['kps']) + pafs.append(mview_keypoints2d_list[view_idx][frame_idx]['pafs']) + + return mview_img_tensor, k_tensor, r_tensor,\ + t_tensor, kps3d, end_of_clip, kps2d, pafs + + def load_perception_2d(self): + """Load multi-scene keypoints2d and paf.""" + mscene_keypoints_list = [] + for scene_idx in range(self.n_scene): + file_name = os.path.join(self.meta_path, f'scene_{scene_idx}', + 'kps2d_paf.json') + f = open(file_name, 'r') + json_data = json.load(f) + src_convention = json_data['convention'] + multi_detections = json_data['data'] + self.n_views = len(multi_detections) + mview_kps2d = [] + for view_idx in range(self.n_views): + img_size = (self.fisheye_params[scene_idx][view_idx].width, + self.fisheye_params[scene_idx][view_idx].height) + detections = multi_detections[view_idx] + convert_detections = convert_bottom_up_kps_paf( + detections, + src_convention, + self.kps2d_convention, + approximate=True) + # resize + for frame_id in range(len(detections)): + for kps_id in range( + len(convert_detections[frame_id]['kps'])): + if len(convert_detections[frame_id]['kps'] + [kps_id]) > 0: + convert_detections[frame_id]['kps'][ + kps_id][:, 0] = convert_detections[frame_id][ + 'kps'][kps_id][:, 0] * ( + img_size[0] - 1) + convert_detections[frame_id]['kps'][ + kps_id][:, 1] = convert_detections[frame_id][ + 'kps'][kps_id][:, 1] * ( + img_size[1] - 1) + + mview_kps2d.append(convert_detections) + f.close() + mscene_keypoints_list.append(mview_kps2d) + self.percep_keypoints2d = mscene_keypoints_list diff --git a/xrmocap/data/dataset/builder.py b/xrmocap/data/dataset/builder.py index 17d29829..424c3d84 100644 --- a/xrmocap/data/dataset/builder.py +++ b/xrmocap/data/dataset/builder.py @@ -2,6 +2,7 @@ from mmcv.utils import Registry from .base_dataset import BaseDataset +from .bottom_up_mview_mperson_dataset import BottomUpMviewMpersonDataset from .mview_mperson_dataset import MviewMpersonDataset from .mvp_dataset import MVPDataset @@ -11,6 +12,8 @@ DATASETS.register_module( name='MviewMpersonDataset', module=MviewMpersonDataset) DATASETS.register_module(name='MVPDataset', module=MVPDataset) +DATASETS.register_module( + name='BottomUpMviewMpersonDataset', module=BottomUpMviewMpersonDataset) def build_dataset(cfg) -> BaseDataset: diff --git a/xrmocap/data/dataset/mview_mperson_dataset.py b/xrmocap/data/dataset/mview_mperson_dataset.py index ea21419d..7b918e6c 100644 --- a/xrmocap/data/dataset/mview_mperson_dataset.py +++ b/xrmocap/data/dataset/mview_mperson_dataset.py @@ -1,3 +1,4 @@ +# yapf: disable import glob import logging import numpy as np @@ -16,6 +17,7 @@ from typing import Literal except ImportError: from typing_extensions import Literal +# yapf: enable class MviewMpersonDataset(BaseDataset): diff --git a/xrmocap/ops/bottom_up_association/__init__.py b/xrmocap/ops/bottom_up_association/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/xrmocap/ops/bottom_up_association/builder.py b/xrmocap/ops/bottom_up_association/builder.py new file mode 100644 index 00000000..b288aff4 --- /dev/null +++ b/xrmocap/ops/bottom_up_association/builder.py @@ -0,0 +1,16 @@ +# yapf: disable +from mmcv.utils import Registry + +from .fourdag_associator import FourDAGAssociator + +# yapf: enable + +BOTTOM_UP_ASSOCIATORS = Registry('bottom_up_associator') + +BOTTOM_UP_ASSOCIATORS.register_module( + name='FourDAGAssociator', module=FourDAGAssociator) + + +def build_bottom_up_associator(cfg) -> FourDAGAssociator: + """Build top_down_associator.""" + return BOTTOM_UP_ASSOCIATORS.build(cfg) diff --git a/xrmocap/ops/bottom_up_association/fourdag_associator.py b/xrmocap/ops/bottom_up_association/fourdag_associator.py new file mode 100644 index 00000000..f0f7ac77 --- /dev/null +++ b/xrmocap/ops/bottom_up_association/fourdag_associator.py @@ -0,0 +1,242 @@ +# yapf: disable +import logging +import numpy as np +from typing import List, Tuple, Union +from xrprimer.data_structure.camera import ( + FisheyeCameraParameter, PinholeCameraParameter, +) +from xrprimer.utils.log_utils import get_logger + +from xrmocap.data_structure.keypoints import Keypoints +from xrmocap.ops.bottom_up_association.graph_solver.builder import ( + build_graph_solver, +) +from xrmocap.ops.top_down_association.identity_tracking.builder import ( + BaseTracking, build_identity_tracking, +) +from xrmocap.ops.triangulation.builder import ( + BaseTriangulator, build_triangulator, +) +from xrmocap.ops.triangulation.point_selection.builder import ( + BaseSelector, build_point_selector, +) +from xrmocap.transform.keypoints3d.optim.builder import ( + build_keypoints3d_optimizer, +) + +# yapf: enable + + +class FourDAGAssociator: + + def __init__(self, + kps_convention: str = 'fourdag_19', + triangulator: Union[None, dict, BaseTriangulator] = None, + point_selector: Union[None, dict, BaseSelector] = None, + keypoints3d_optimizer=None, + n_views: int = 5, + graph_construct: Union[None, dict] = None, + graph_associate: Union[None, dict] = None, + identity_tracking: Union[None, dict, BaseTracking] = None, + min_asgn_cnt: int = 5, + use_tracking_edges: bool = True, + logger: Union[None, str, logging.Logger] = None) -> None: + + self.logger = get_logger(logger) + + if isinstance(triangulator, dict): + triangulator['logger'] = self.logger + self.triangulator = build_triangulator(triangulator) + else: + self.triangulator = triangulator + + if isinstance(keypoints3d_optimizer, dict): + keypoints3d_optimizer['logger'] = self.logger + self.keypoints3d_optimizer = build_keypoints3d_optimizer( + keypoints3d_optimizer) + else: + self.keypoints3d_optimizer = keypoints3d_optimizer + + self.n_views = n_views + self.kps_convention = kps_convention + self.last_multi_kps3d = dict() + self.use_tracking_edges = use_tracking_edges + self.min_asgn_cnt = min_asgn_cnt + if isinstance(point_selector, dict): + point_selector['logger'] = self.logger + self.point_selector = build_point_selector(point_selector) + else: + self.point_selector = point_selector + if isinstance(graph_associate, dict): + graph_associate['logger'] = self.logger + graph_associate['n_views'] = self.n_views + self.graph_associate = build_graph_solver(graph_associate) + else: + self.graph_associate = graph_associate + if isinstance(graph_construct, dict): + graph_construct['logger'] = self.logger + graph_construct['n_views'] = self.n_views + self.graph_construct = build_graph_solver(graph_construct) + else: + self.graph_construct = graph_construct + if isinstance(identity_tracking, dict): + identity_tracking['logger'] = self.logger + self.identity_tracking = build_identity_tracking(identity_tracking) + else: + self.identity_tracking = identity_tracking + + def set_cameras( + self, cameras: List[Union[FisheyeCameraParameter, + PinholeCameraParameter]] + ) -> None: + if self.triangulator is not None: + self.triangulator.set_cameras(cameras) + if self.keypoints3d_optimizer is not None: + self.keypoints3d_optimizer.set_cameras(cameras) + if hasattr(self.point_selector, 'triangulator'): + self.point_selector.triangulator.set_cameras(cameras) + self.graph_construct.set_cameras(cameras) + + def cal_keypoints2d(self, mpersons_map, kps2d): + for i, person_id in enumerate(mpersons_map.copy()): + if person_id in self.last_multi_kps3d: + continue + if sum(sum(mpersons_map[person_id] >= 0)) >= self.min_asgn_cnt: + continue + else: + mpersons_map.pop(person_id) + + m_limbs2d = {} + for person_id in mpersons_map: + if person_id in self.last_multi_kps3d: + identity = person_id + elif len(m_limbs2d) == 0: + identity = 0 + else: + identity = max(m_limbs2d) + 1 + limb2d = np.zeros((3, self.n_views * self.n_kps)) + for view in range(self.n_views): + for kps_id in range(self.n_kps): + index = mpersons_map[person_id][kps_id, view] + if index != -1: + limb2d[:, view * self.n_kps + + kps_id] = kps2d[view][kps_id][index] + m_limbs2d[identity] = limb2d + return m_limbs2d + + def associate_frame(self, + kps2d: list, + pafs: list, + end_of_clip=False) -> Tuple[Keypoints, List[int]]: + """Associate and triangulate keypoints2d in one frame. + + Args: + kps2d (List): + data for bottom-up keypoints in shape + [n_views, n_kps, n_candidatas, 3] + pafs (List): + data for pafs in shape + [n_views, n_pafs, n_candidatas_1, n_candidatas_2] + end_of_clip (bool): + indicator of end of a clip + Returns: + + keypoints3d (Keypoints): + An instance of class keypoints, + triangulated from the selected + keypoints2d. + indentities (List[int]): + A list of indentities, whose length + represets the number of person. + multi_kps2d: + the associated keypoints + mpersons_map: + the associated maps + """ + + self.n_kps = len(kps2d[0]) + graph_info = self.graph_construct(kps2d, pafs, self.last_multi_kps3d) + mpersons_map = self.graph_associate(kps2d, pafs, graph_info, + self.last_multi_kps3d) + mlimbs2d = self.cal_keypoints2d(mpersons_map, kps2d) + multi_kps2d = dict() + for person_id in mlimbs2d: + mview_kps2d = np.zeros((self.n_views, self.n_kps, 3)) + for view in range(self.n_views): + for kps_id in range(self.n_kps): + mview_kps2d[view][kps_id] = mlimbs2d[ + person_id][:, view * self.n_kps + kps_id] + multi_kps2d[person_id] = mview_kps2d + + if self.keypoints3d_optimizer is not None: + multi_kps3d = self.keypoints3d_optimizer.update(mlimbs2d) + if self.use_tracking_edges: + self.last_multi_kps3d = multi_kps3d + kps_arr = np.zeros((1, len(multi_kps3d), self.n_kps, 4)) + mask_arr = np.zeros((1, len(multi_kps3d), self.n_kps)) + for index, person_id in enumerate(multi_kps3d): + kps_arr[0, index, + ...] = multi_kps3d[person_id][:, :self.n_kps].T + mask_arr[0, index, :] = multi_kps3d[person_id][3, :self.n_kps] + keypoints3d = Keypoints( + kps=kps_arr, mask=mask_arr, convention=self.kps_convention) + identities = multi_kps3d.keys() + elif self.triangulator is not None: + multi_kps3d = [] + identities = [] + for person_id in mlimbs2d: + mview_kps2d = multi_kps2d[person_id] + matched_mkps2d = np.zeros((self.n_views, self.n_kps, 2)) + matched_mkps2d_mask = np.zeros((self.n_views, self.n_kps, 1)) + matched_mkps2d_conf = np.zeros((self.n_views, self.n_kps, 1)) + matched_mkps2d = mview_kps2d[..., :2] + matched_mkps2d_mask = np.ones_like(mview_kps2d[..., 0:1]) + matched_mkps2d_conf[..., 0] = mview_kps2d[..., 2] + selected_mask = self.point_selector.get_selection_mask( + np.concatenate((matched_mkps2d, matched_mkps2d_conf), + axis=-1), matched_mkps2d_mask) + kps3d = self.triangulator.triangulate(matched_mkps2d, + selected_mask) + + if not np.isnan(kps3d).all(): + multi_kps3d.append(kps3d) + identities.append(person_id) + multi_kps3d = np.array(multi_kps3d) + kps3d_score = np.ones_like(multi_kps3d[..., 0:1]) + kps3d = (np.concatenate((multi_kps3d, kps3d_score), axis=-1)) + kps3d = kps3d[np.newaxis] + kps3d_mask = np.ones_like(kps3d[..., 0]) + keypoints3d = Keypoints(kps=kps3d, convention=self.kps_convention) + keypoints3d.set_mask(kps3d_mask) + if self.use_tracking_edges: + for index, person_id in enumerate(identities): + self.last_multi_kps3d[ + person_id] = keypoints3d.get_keypoints()[0, index, + ...].T + if end_of_clip: + self.last_multi_kps3d = dict() + if self.keypoints3d_optimizer is not None: + self.keypoints3d_optimizer.trace_limbs.clear() + self.keypoints3d_optimizer.trace_limb_infos.clear() + return keypoints3d, identities, multi_kps2d, mpersons_map + + def assign_identities_frame(self, curr_kps3d) -> Keypoints: + """Process kps3d to Keypoints (an instance of class Keypoints, + including kps data, mask and convention). + + Args: + curr_kps3d (List[np.ndarray]): The results of each frame. + + Returns: + Keypoints: An instance of class Keypoints. + """ + frame_identity = self.identity_tracking.query(curr_kps3d) + + kps3d_score = np.ones_like(curr_kps3d[..., 0:1]) + kps3d = (np.concatenate((curr_kps3d, kps3d_score), axis=-1)) + kps3d = kps3d[np.newaxis] + kps3d_mask = np.ones_like(kps3d[..., 0]) + keypoints3d = Keypoints(kps=kps3d, convention=self.kps_convention) + keypoints3d.set_mask(kps3d_mask) + + return keypoints3d, frame_identity diff --git a/xrmocap/ops/bottom_up_association/graph_solver/builder.py b/xrmocap/ops/bottom_up_association/graph_solver/builder.py new file mode 100644 index 00000000..3f12e74f --- /dev/null +++ b/xrmocap/ops/bottom_up_association/graph_solver/builder.py @@ -0,0 +1,14 @@ +from mmcv.utils import Registry + +from .graph_associate import GraphAssociate +from .graph_construct import GraphConstruct + +GRAPHSOLVER = Registry('graph_solver') + +GRAPHSOLVER.register_module(name='GraphAssociate', module=GraphAssociate) +GRAPHSOLVER.register_module(name='GraphConstruct', module=GraphConstruct) + + +def build_graph_solver(cfg): + """Build a graph solver instance.""" + return GRAPHSOLVER.build(cfg) diff --git a/xrmocap/ops/bottom_up_association/graph_solver/graph_associate.py b/xrmocap/ops/bottom_up_association/graph_solver/graph_associate.py new file mode 100644 index 00000000..7d724bf3 --- /dev/null +++ b/xrmocap/ops/bottom_up_association/graph_solver/graph_associate.py @@ -0,0 +1,741 @@ +# yapf: disable +import copy +import heapq +import logging +import numpy as np +from typing import Union + +from xrmocap.utils.fourdag_utils import LimbInfo, welsch + +# yapf: enable + + +class Clique(): + + def __init__(self, paf_id, proposal, score=-1) -> None: + """class for limb clique, which is used for solve 4D graph. + + Args: + paf_id (int): the paf index + paf index proposal (List): + a list of allocated bone index to the clique + score (float): the score of the clique, larger score will be + solve earlier + """ + self.paf_id = paf_id + self.proposal = proposal + self.score = score + + def __lt__(self, other): + if self.score > other.score: + return True + else: + return False + + +class Voting(): + + def __init__(self) -> None: + """vote class for clique it will record the kps haven been allocated + and it will be used to solve graph.""" + self.fst = np.zeros(2, dtype=np.int8) + self.sec = np.zeros(2, dtype=np.int8) + self.fst_cnt = np.zeros(2, dtype=np.int8) + self.sec_cnt = np.zeros(2, dtype=np.int8) + self.vote = dict() + + def parse(self): + self.fst_cnt = np.zeros(2) + self.sec_cnt = np.zeros(2) + if len(self.vote) == 0: + return + + _vote = copy.deepcopy(self.vote) + for i in range(2): + for index in range(2): + person_id = max(_vote, key=lambda x: _vote[x][index]) + + if i == 0: + self.fst[index] = person_id + self.fst_cnt[index] = _vote[person_id][index] + else: + self.sec[index] = person_id + self.sec_cnt[index] = _vote[person_id][index] + _vote[person_id][index] = 0 + + +class GraphAssociate(): + + def __init__(self, + kps_convention='fourdag_19', + n_views=5, + w_epi: float = 2, + w_temp: float = 2, + w_view: float = 2, + w_paf: float = 1, + w_hier: float = 0.5, + c_view_cnt: float = 1.5, + min_check_cnt: int = 1, + logger: Union[None, str, logging.Logger] = None) -> None: + """ + + Args: + kps_convention (str): + The name of destination convention. + n_views (int): + views number of dataset + n_kps (int): + keypoints number + n_pafs (int): + paf number + w_epi (float): + clique score weight for epipolar distance + w_temp (float): + clique score weight for temporal tracking distance + w_view (float): + clique score weight for view number + w_paf (float): + clique score weight for paf edge + w_hier (float): + clique score weight for hierarchy + c_view_cnt (float): + maximal view number + min_check_cnt (int): + minimum check number + logger (Union[None, str, logging.Logger], optional): + Logger for logging. If None, root logger will be selected. + Defaults to None. + """ + self.logger = logger + self.n_views = n_views + self.limb_info = LimbInfo(kps_convention) + self.n_kps = self.limb_info.get_kps_number() + self.n_pafs = self.limb_info.get_paf_number() + self.w_epi = w_epi + self.w_temp = w_temp + self.w_view = w_view + self.w_paf = w_paf + self.w_hier = w_hier + self.c_view_cnt = c_view_cnt + self.min_check_cnt = min_check_cnt + self.paf_dict = self.limb_info.get_paf_dict() + self.hierarchy_map = self.limb_info.get_hierarchy_map() + self.m_paf_hier = np.zeros(self.n_pafs) + for paf_id in range(self.n_pafs): + self.m_paf_hier[paf_id] = min( + self.hierarchy_map[self.paf_dict[0][paf_id]], + self.hierarchy_map[self.paf_dict[1][paf_id]]) + self.m_paf_hier_size = self.m_paf_hier.max() + + self.m_kps2paf = {i: [] for i in range(self.n_kps)} + for paf_id in range(self.n_pafs): + kps_pair = [self.paf_dict[0][paf_id], self.paf_dict[1][paf_id]] + self.m_kps2paf[kps_pair[0]].append(paf_id) + self.m_kps2paf[kps_pair[1]].append(paf_id) + + self.m_assign_map = { + i: {j: [] + for j in range(self.n_kps)} + for i in range(self.n_views) + } + self.mpersons_map = dict() + + self.last_multi_kps3d = dict() + self.cliques = [] + + def __call__(self, kps2d, pafs, graph, last_multi_kps3d=dict): + """associate keypoint in multiply view. + + Args: + kps2d (list): 2D keypoints + pafs (list): part affine field + graph (list): the 4D graph to be associated + last_multi_kps3d (dict): 3D keypoints of last frame + + Returns: + mpersons_map (dict): the associate limb + """ + self.kps2d = kps2d + self.pafs = pafs + + self.m_epi_edges = graph['m_epi_edges'] + self.m_temp_edges = graph['m_temp_edges'] + self.m_bone_nodes = graph['m_bone_nodes'] + self.m_bone_epi_edges = graph['m_bone_epi_edges'] + self.m_bone_temp_edges = graph['m_bone_temp_edges'] + + self.last_multi_kps3d = last_multi_kps3d + self.solve_graph() + + return self.mpersons_map + + def solve_graph(self): + self.initialize() + self.enumerate_clques() + while len(self.cliques) > 0: + self.assign_top_clique() + + def initialize(self): + for kps_id in range(self.n_kps): + for view in range(self.n_views): + self.m_assign_map[view][kps_id] = np.full( + len(self.kps2d[view][kps_id]), -1) + + self.mpersons_map = {} + for person_id in self.last_multi_kps3d: + self.mpersons_map[person_id] = np.full((self.n_kps, self.n_views), + -1) + + def enumerate_clques(self): + tmp_cliques = {i: [] for i in range(self.n_pafs)} + for paf_id in range(self.n_pafs): + nodes = self.m_bone_nodes[paf_id] + pick = [-1] * (self.n_views + 1) + available_node = { + i: {j: [] + for j in range(self.n_views + 1)} + for i in range(self.n_views + 1) + } + + # view_cnt = 0 + index = -1 + while True: + if index >= 0 and pick[index] >= len( + available_node[index][index]): + pick[index] = -1 + index = index - 1 + if index < 0: + break + pick[index] += 1 + + elif index == len(pick) - 1: + if sum(pick[:self.n_views]) != -self.n_views: + clique = Clique(paf_id, [-1] * len(pick)) + for i in range(len(pick)): + if pick[i] != -1: + if i == len(pick) - 1: + clique.proposal[i] = list( + self.last_multi_kps3d.keys())[ + available_node[i][i][pick[i]]] + else: + clique.proposal[i] = available_node[i][i][ + pick[i]] + clique.score = self.cal_clique_score(clique) + tmp_cliques[paf_id].append(clique) + pick[index] += 1 + + else: + index += 1 + if index == 0: + for view in range(self.n_views): + for bone in range(len(nodes[view])): + available_node[0][view].append(bone) + for pid in range(len(self.last_multi_kps3d)): + available_node[0][self.n_views].append(pid) + + else: + if pick[index - 1] >= 0: + for view in range(index, self.n_views): + available_node[index][view] = [] + epiEdges = self.m_bone_epi_edges[paf_id][ + index - 1][view] + bone1_id = available_node[index - + 1][index - + 1][pick[index - + 1]] + for bone2_id in available_node[index - + 1][view]: + if epiEdges[bone1_id, bone2_id] > 0: + available_node[index][view].append( + bone2_id) + + else: + for view in range(index, self.n_views): + available_node[index][view] = available_node[ + index - 1][view][:] + + if pick[self.n_views - 1] > 0: + available_node[index][self.n_views] = [] + temp_edge = self.m_bone_temp_edges[paf_id][ + self.n_views - 1] + bone1_id = available_node[self.n_views - + 1][self.n_views - + 1][pick[self.n_views - + 1]] + for pid in available_node[index - 1][self.n_views]: + if temp_edge[pid, bone1_id] > 0: + available_node[index][self.n_views].append( + pid) + else: + available_node[index][ + self.n_views] = available_node[index - 1][ + self.n_views][:] + + for paf_id in range(self.n_pafs): + self.cliques.extend(tmp_cliques[paf_id]) + heapq.heapify(self.cliques) + + def assign_top_clique(self): + clique = heapq.heappop(self.cliques) + nodes = self.m_bone_nodes[clique.paf_id] + kps_pair = [ + self.paf_dict[0][clique.paf_id], self.paf_dict[1][clique.paf_id] + ] + if clique.proposal[self.n_views] != -1: + person_id = clique.proposal[self.n_views] + if self.check_cnt(clique, kps_pair, nodes, person_id) != -1: + person = self.mpersons_map[person_id] + _proposal = [-1] * (self.n_views + 1) + for view in range(self.n_views): + if clique.proposal[view] != -1: + node = nodes[view][clique.proposal[view]] + assign = ( + self.m_assign_map[view][kps_pair[0]][node[0]], + self.m_assign_map[view][kps_pair[1]][node[1]]) + if (assign[0] == -1 or assign[0] == person_id) and ( + assign[1] == -1 or assign[1] == person_id): + for i in range(2): + person[kps_pair[i], view] = node[i] + self.m_assign_map[view][kps_pair[i]][ + node[i]] = person_id + else: + _proposal[view] = clique.proposal[view] + self.mpersons_map[person_id] = person + self.push_clique(clique.paf_id, _proposal[:]) + + else: + _proposal = clique.proposal + _proposal[self.n_views] = -1 + self.push_clique(clique.paf_id, _proposal[:]) + + else: + voting = Voting() + voting = self.clique2voting(clique, voting) + voting.parse() + + if sum(voting.fst_cnt) == 0: + + def allocFlag(): + if sum(np.array(clique.proposal) >= 0) == 0: + return True + view_var = max(clique.proposal) + view = clique.proposal.index(view_var) + node = nodes[view][clique.proposal[view]] + person_candidate = [] + for person_id in self.mpersons_map: + + def check_cnt(): + cnt = 0 + for i in range(2): + _cnt = self.check_kps_compatibility( + view, kps_pair[i], node[i], person_id) + if _cnt == -1: + return -1 + cnt += _cnt + return cnt + + cntt = check_cnt() + if cntt >= self.min_check_cnt: + person_candidate.append([cntt, person_id]) + if len(person_candidate) == 0: + return True + person_id = max(person_candidate)[1] + person = self.mpersons_map[person_id] + for i in range(2): + person[kps_pair[i], view] = node[i] + self.m_assign_map[view][kps_pair[i]][ + node[i]] = person_id + + self.mpersons_map[person_id] = person + return False + + # ('1. A & B not assigned yet') + if allocFlag(): + person = np.full((self.n_kps, self.n_views), -1) + if len(self.mpersons_map) == 0: + person_id = 0 + else: + person_id = max(self.mpersons_map) + 1 + + for view in range(self.n_views): + if clique.proposal[view] >= 0: + node = nodes[view][clique.proposal[view]] + for i in range(2): + person[kps_pair[i], view] = node[i] + self.m_assign_map[view][kps_pair[i]][ + node[i]] = person_id + self.mpersons_map[person_id] = person + + elif min(voting.fst_cnt) == 0: + # ('2. A assigned but not B: Add B to person with A ') + valid_id = 0 if voting.fst_cnt[0] > 0 else 1 + master_id = voting.fst[valid_id] + unassignj_id = kps_pair[1 - valid_id] + person = self.mpersons_map[master_id] + _proposal = [-1] * (self.n_views + 1) + for view in range(self.n_views): + if clique.proposal[view] >= 0: + node = nodes[view][clique.proposal[view]] + unassignj_candidata = node[1 - valid_id] + assigned = self.m_assign_map[view][kps_pair[valid_id]][ + node[valid_id]] + if assigned == master_id: + if person[unassignj_id, view] == -1 and\ + self.check_kps_compatibility( + view, unassignj_id, + unassignj_candidata, master_id) >= 0: + person[unassignj_id, + view] = unassignj_candidata + self.m_assign_map[view][unassignj_id][ + unassignj_candidata] = master_id + else: + continue + + elif assigned == -1 and voting.fst_cnt[valid_id] >= 2\ + and voting.sec_cnt[valid_id] == 0\ + and (person[kps_pair[0], view] == -1 + or person[kps_pair[0], view] == node[0])\ + and (person[kps_pair[1], view] == -1 + or person[kps_pair[1], view] == node[1]): + if self.check_kps_compatibility( + view, kps_pair[0], node[0], master_id + ) >= 0 and self.check_kps_compatibility( + view, kps_pair[1], node[1], + master_id) >= 0: + for i in range(2): + person[kps_pair[i], view] = node[i] + self.m_assign_map[view][kps_pair[i]][ + node[i]] = master_id + else: + _proposal[view] = clique.proposal[view] + else: + _proposal[view] = clique.proposal[view] + + self.mpersons_map[master_id] = person + if _proposal != clique.proposal: + self.push_clique(clique.paf_id, _proposal[:]) + + elif voting.fst[0] == voting.fst[1]: + # ('4. A & B already assigned to same person') + master_id = voting.fst[0] + person = self.mpersons_map[master_id] + _proposal = [-1] * (self.n_views + 1) + for view in range(self.n_views): + if clique.proposal[view] >= 0: + node = nodes[view][clique.proposal[view]] + assign_id = [ + self.m_assign_map[view][kps_pair[0]][node[0]], + self.m_assign_map[view][kps_pair[1]][node[1]] + ] + if assign_id[0] == master_id and assign_id[ + 1] == master_id: + continue + elif self.check_kps_compatibility( + view, kps_pair[0], node[0], master_id + ) == -1 or self.check_kps_compatibility( + view, kps_pair[1], node[1], master_id) == -1: + _proposal[view] = clique.proposal[view] + + elif (assign_id[0] == master_id and assign_id[1] + == -1) or (assign_id[0] == -1 + and assign_id[1] == master_id): + valid_id = 0 if assign_id[1] == -1 else 1 + unassignj_id = kps_pair[1 - valid_id] + unassignj_candidata = node[1 - valid_id] + if person[unassignj_id, view] == -1 or person[ + unassignj_id, view] == unassignj_candidata: + person[unassignj_id, + view] = unassignj_candidata + self.m_assign_map[view][unassignj_id][ + unassignj_candidata] = master_id + else: + _proposal[view] = clique.proposal[view] + elif max(assign_id) == -1 and sum( + voting.sec_cnt) == 0 and ( + person[kps_pair[0], view] == -1 + or person[kps_pair[0], view] == node[0] + ) and (person[kps_pair[1], view] == -1 or + person[kps_pair[1], view] == node[1]): + for i in range(2): + person[kps_pair[i], view] = node[i] + self.m_assign_map[view][kps_pair[i]][ + node[i]] = master_id + else: + _proposal[view] = clique.proposal[view] + + if _proposal != clique.proposal: + self.push_clique(clique.paf_id, _proposal[:]) + self.mpersons_map[master_id] = person + + else: + # ('5. A & B already assigned to different people') + for index in range(2): + while voting.sec_cnt[index] != 0: + master_id = min(voting.fst[index], voting.sec[index]) + slave_id = max(voting.fst[index], voting.sec[index]) + assert slave_id <= max(self.mpersons_map) + + if self.check_person_compatibility( + master_id, slave_id) >= 0: + self.merge_person(master_id, slave_id) + voting = self.clique2voting(clique, voting) + voting.parse() + else: + voting.vote[ + voting.fst[index]][index] = voting.vote[ + voting.sec[index]][index] = 0 + iter = max( + voting.vote, + key=lambda x: voting.vote[x][index]) + voting.sec[index] = iter + voting.sec_cnt[index] = voting.vote[iter][index] + + if voting.fst[0] != voting.fst[1]: + conflict = [0] * self.n_views + master_id = min(voting.fst) + slave_id = max(voting.fst) + for view in range(self.n_views): + conflict[ + view] = 1 if self.check_person_compatibility_sview( + master_id, slave_id, view) == -1 else 0 + + if sum(conflict) == 0: + self.merge_person(master_id, slave_id) + _proposal = [-1] * (self.n_views + 1) + master = self.mpersons_map[master_id] + for view in range(self.n_views): + if clique.proposal[view] >= 0: + assert clique.proposal[view] < len(nodes[view]) + node = nodes[view][clique.proposal[view]] + if master[kps_pair[0], + view] != node[0] or master[ + kps_pair[1], view] != node[1]: + _proposal[view] = clique.proposal[view] + self.push_clique(clique.paf_id, _proposal[:]) + else: + _proposal_pair = np.full((self.n_views + 1, 2), -1) + for i in range(len(conflict)): + _proposal_pair[i, conflict[i]] = clique.proposal[i] + + if min(_proposal_pair[:, 0]) >= 0 and min( + _proposal_pair[:, 1]) >= 0: + self.push_clique(clique.paf_id, + _proposal_pair[:, 0].copy()) + self.push_clique(clique.paf_id, + _proposal_pair[:, 1].copy()) + + elif sum( + np.array(clique.proposal[:self.n_views]) >= 0 + ) > 1: + for i in range(len(conflict)): + _proposal = [-1] * (self.n_views + 1) + _proposal[i] = clique.proposal[i] + self.push_clique(clique.paf_id, _proposal[:]) + + def cal_clique_score(self, clique): + scores = [] + for view1 in range(self.n_views - 1): + if clique.proposal[view1] == -1: + continue + for view2 in range(view1 + 1, self.n_views): + if clique.proposal[view2] == -1: + continue + scores.append(self.m_bone_epi_edges[clique.paf_id][view1] + [view2][clique.proposal[view1], + clique.proposal[view2]]) + + if len(scores) > 0: + epi_score = sum(scores) / len(scores) + else: + epi_score = 1 + + scores = [] + person_id = clique.proposal[self.n_views] + if person_id != -1: + for view in range(self.n_views): + if clique.proposal[view] == -1: + continue + scores.append(self.m_bone_temp_edges[clique.paf_id][view][ + list(self.last_multi_kps3d.keys()).index(person_id), + clique.proposal[view]]) + + if len(scores) > 0: + temp_score = sum(scores) / len(scores) + else: + temp_score = 0 + + scores = [] + for view in range(self.n_views): + if clique.proposal[view] == -1: + continue + candidata_bone = self.m_bone_nodes[clique.paf_id][view][ + clique.proposal[view]] + scores.append(self.pafs[view][clique.paf_id][candidata_bone[0], + candidata_bone[1]]) + + paf_score = sum(scores) / len(scores) + var = sum(np.array(clique.proposal[:self.n_views]) >= 0) + view_score = welsch(self.c_view_cnt, var) + hier_score = 1 - pow( + self.m_paf_hier[clique.paf_id] / self.m_paf_hier_size, 4) + return (self.w_epi * epi_score + self.w_temp * temp_score + + self.w_paf * paf_score + self.w_view * view_score + + self.w_hier * hier_score) / ( + self.w_epi + self.w_temp + self.w_paf + self.w_view + + self.w_hier) + + def check_cnt(self, clique, kps_pair, nodes, person_id): + cnt = 0 + for view in range(self.n_views): + index = clique.proposal[view] + if index != -1: + for i in range(2): + _cnt = self.check_kps_compatibility( + view, kps_pair[i], nodes[view][index][i], person_id) + if _cnt == -1: + return -1 + else: + cnt += _cnt + return cnt + + def check_kps_compatibility(self, view, kps_id, candidate, pid): + person = self.mpersons_map[pid] + check_cnt = 0 + if person[kps_id][view] != -1 and person[kps_id][view] != candidate: + return -1 + + for paf_id in self.m_kps2paf[kps_id]: + check_kps_id = self.paf_dict[0][paf_id] + self.paf_dict[1][ + paf_id] - kps_id + if person[check_kps_id, view] == -1: + continue + kps_candidate1 = candidate + kps_candidate2 = person[check_kps_id, view] + if kps_id == self.paf_dict[1][paf_id]: + kps_candidate1, kps_candidate2 = kps_candidate2, kps_candidate1 + + if self.pafs[view][paf_id][kps_candidate1, kps_candidate2] > 0: + check_cnt = check_cnt + 1 + else: + return -1 + + for i in range(self.n_views): + if i == view or person[kps_id, i] == -1: + continue + if self.m_epi_edges[kps_id][view][i][candidate, + int(person[kps_id, i])] > 0: + check_cnt = check_cnt + 1 + else: + return -1 + return check_cnt + + def push_clique(self, paf_id, proposal): + if max(proposal[:self.n_views]) == -1: + return + clique = Clique(paf_id, proposal) + clique.score = self.cal_clique_score(clique) + heapq.heappush(self.cliques, clique) + + def check_person_compatibility_sview(self, master_id, slave_id, view): + assert master_id < slave_id + if slave_id < len(self.last_multi_kps3d): + return -1 + check_cnt = 0 + master = self.mpersons_map[master_id] + slave = self.mpersons_map[slave_id] + + for kps_id in range(self.n_kps): + if master[kps_id, + view] != -1 and slave[kps_id, view] != -1 and master[ + kps_id, view] != slave[kps_id, view]: + return -1 + + if master_id < len(self.last_multi_kps3d): + for kps_id in range(self.n_kps): + if slave[kps_id, view] != -1: + if self.m_temp_edges[kps_id][view][ + master_id, slave[kps_id][view]] > 0: + check_cnt = check_cnt + 1 + else: + return -1 + + for paf_id in range(self.n_pafs): + paf = self.pafs[view][paf_id] + for candidate in [[ + master[self.paf_dict[0][paf_id], view], + slave[self.paf_dict[1][paf_id], view] + ], + [ + slave[self.paf_dict[0][paf_id], view], + master[self.paf_dict[1][paf_id], view] + ]]: + if min(candidate) >= 0: + if paf[candidate[0], candidate[1]] > 0: + check_cnt = check_cnt + 1 + else: + return -1 + return check_cnt + + def check_person_compatibility(self, master_id, slave_id): + assert master_id < slave_id + if slave_id < len(self.last_multi_kps3d): + return -1 + + check_cnt = 0 + master = self.mpersons_map[master_id] + slave = self.mpersons_map[slave_id] + + for view in range(self.n_views): + _check_cnt = self.check_person_compatibility_sview( + master_id, slave_id, view) + if _check_cnt == -1: + return -1 + else: + check_cnt += _check_cnt + + for kps_id in range(self.n_kps): + for view1 in range(self.n_views - 1): + candidate1_id = master[kps_id, view1] + if candidate1_id != -1: + for view2 in range(view1 + 1, self.n_views): + candidate2_id = slave[kps_id, view2] + if candidate2_id != -1: + if self.m_epi_edges[kps_id][view1][view2][ + candidate1_id, candidate2_id] > 0: + check_cnt += 1 + else: + return -1 + return check_cnt + + def merge_person(self, master_id, slave_id): + assert master_id < slave_id + master = self.mpersons_map[master_id] + slave = self.mpersons_map[slave_id] + for view in range(self.n_views): + for kps_id in range(self.n_kps): + if slave[kps_id, view] != -1: + master[kps_id, view] = slave[kps_id, view] + self.m_assign_map[view][kps_id][slave[kps_id, + view]] = master_id + + self.mpersons_map[master_id] = master + self.mpersons_map.pop(slave_id) + + def clique2voting(self, clique, voting): + voting.vote = {} + if len(self.mpersons_map) == 0: + return voting + + for view in range(self.n_views): + index = clique.proposal[view] + if index != -1: + node = self.m_bone_nodes[clique.paf_id][view][index] + for i in range(2): + assigned = self.m_assign_map[view][self.paf_dict[i][ + clique.paf_id]][node[i]] + if assigned != -1: + if assigned not in voting.vote: + voting.vote[assigned] = np.zeros(2) + + voting.vote[assigned][i] += 1 + return voting diff --git a/xrmocap/ops/bottom_up_association/graph_solver/graph_construct.py b/xrmocap/ops/bottom_up_association/graph_solver/graph_construct.py new file mode 100644 index 00000000..26d506aa --- /dev/null +++ b/xrmocap/ops/bottom_up_association/graph_solver/graph_construct.py @@ -0,0 +1,282 @@ +# yapf: disable +import logging +import numpy as np +from typing import Union + +from xrmocap.utils.fourdag_utils import LimbInfo, line2linedist, point2linedist + +# yapf: enable + + +class Camera(): + + def __init__(self, cam_param) -> None: + super().__init__() + c_K = cam_param.intrinsic33() + c_T = np.array(cam_param.get_extrinsic_t()) + c_R = np.array(cam_param.get_extrinsic_r()) + c_Ki = np.linalg.inv(c_K) + self.c_Rt_Ki = np.matmul(c_R.T, c_Ki) + self.Pos = -np.matmul(c_R.T, c_T) + + def cal_ray(self, uv): + var = -self.c_Rt_Ki.dot(np.append(uv, 1).T) + return var / np.linalg.norm(var) + + +class GraphConstruct(): + + def __init__(self, + kps_convention='fourdag_19', + n_views=5, + max_epi_dist: float = 0.15, + max_temp_dist: float = 0.2, + normalize_edges: bool = True, + logger: Union[None, str, logging.Logger] = None) -> None: + """ + + Args: + kps_convention (str): + The name of destination convention. + n_views (int): + views number of dataset + n_kps (int): + keypoints number + n_pafs (int): + paf number + max_epi_dist (float): + maximal epipolar distance to be accepted + max_temp_dist (float): + maximal temporal tracking distance to be accepted + normalize_edges (bool): + indicator to normalize all edges + logger (Union[None, str, logging.Logger], optional): + Logger for logging. If None, root logger will be selected. + Defaults to None. + """ + self.logger = logger + self.n_views = n_views + self.limb_info = LimbInfo(kps_convention) + self.n_kps = self.limb_info.get_kps_number() + self.n_pafs = self.limb_info.get_paf_number() + self.max_epi_dist = max_epi_dist + self.max_temp_dist = max_temp_dist + self.normalize_edges = normalize_edges + self.paf_dict = self.limb_info.get_paf_dict() + self.m_epi_edges = { + i: { + j: {k: -1 + for k in range(self.n_views)} + for j in range(self.n_views) + } + for i in range(self.n_kps) + } + self.m_temp_edges = { + i: {j: -1 + for j in range(self.n_views)} + for i in range(self.n_kps) + } + self.m_kps_rays = { + i: {j: [] + for j in range(self.n_kps)} + for i in range(self.n_views) + } + self.m_bone_nodes = { + i: {j: [] + for j in range(self.n_views)} + for i in range(self.n_pafs) + } + self.m_bone_epi_edges = { + i: { + j: {k: [] + for k in range(self.n_views)} + for j in range(self.n_views) + } + for i in range(self.n_pafs) + } + self.m_bone_temp_edges = { + i: {j: [] + for j in range(self.n_views)} + for i in range(self.n_pafs) + } + + self.cameras = [] + self.last_multi_kps3d = dict() + + def set_cameras(self, cameras_param): + for view in range(len(cameras_param)): + self.cameras.append(Camera(cameras_param[view])) + + def __call__(self, kps2d, pafs, last_multi_kps3d=dict): + """construct the 4D graph. + + Args: + kps2d (list): 2D keypoints + pafs (list): part affine field + last_multi_kps3d (dict): 3D keypoints of last frame + + Returns: + graph (dict): the constructed 4D graph + """ + self.kps2d = kps2d + self.pafs = pafs + self.last_multi_kps3d = last_multi_kps3d + self.construct_graph() + + return dict( + m_epi_edges=self.m_epi_edges, + m_temp_edges=self.m_temp_edges, + m_bone_nodes=self.m_bone_nodes, + m_bone_epi_edges=self.m_bone_epi_edges, + m_bone_temp_edges=self.m_bone_temp_edges) + + def construct_graph(self): + self._calculate_kps_rays() + self._calculate_paf_edges() + self._calculate_epi_edges() + self._calculate_temp_edges() + + self._calculate_bone_nodes() + self._calculate_bone_epi_edges() + self._calculate_bone_temp_edges() + + def _calculate_kps_rays(self): + for view in range(self.n_views): + cam = self.cameras[view] + for kps_id in range(self.n_kps): + self.m_kps_rays[view][kps_id] = [] + kps = self.kps2d[view][kps_id] + for kps_candidate in range(len(kps)): + self.m_kps_rays[view][kps_id].append( + cam.cal_ray(kps[kps_candidate][:2])) + + def _calculate_paf_edges(self): + if self.normalize_edges: + for paf_id in range(self.n_pafs): + for detection in self.pafs: + pafs = detection[paf_id] + if np.sum(pafs) > 0: + row_factor = np.clip(pafs.sum(1), 1.0, None) + col_factor = np.clip(pafs.sum(0), 1.0, None) + for i in range(len(row_factor)): + pafs[i] /= row_factor[i] + for j in range(len(col_factor)): + pafs[:, j] /= col_factor[j] + detection[paf_id] = pafs + + def _calculate_epi_edges(self): + for kps_id in range(self.n_kps): + for view1 in range(self.n_views - 1): + cam1 = self.cameras[view1] + for view2 in range(view1 + 1, self.n_views): + cam2 = self.cameras[view2] + kps1 = self.kps2d[view1][kps_id] + kps2 = self.kps2d[view2][kps_id] + ray1 = self.m_kps_rays[view1][kps_id] + ray2 = self.m_kps_rays[view2][kps_id] + + if len(kps1) > 0 and len(kps2) > 0: + epi = np.full((len(kps1), len(kps2)), -1.0) + for kps1_candidate in range(len(kps1)): + for kps2_candidate in range(len(kps2)): + dist = line2linedist(cam1.Pos, + ray1[kps1_candidate], + cam2.Pos, + ray2[kps2_candidate]) + if dist < self.max_epi_dist: + epi[kps1_candidate, + kps2_candidate] = \ + 1 - dist / self.max_epi_dist + + if self.normalize_edges: + row_factor = np.clip(epi.sum(1), 1.0, None) + col_factor = np.clip(epi.sum(0), 1.0, None) + for i in range(len(row_factor)): + epi[i] /= row_factor[i] + for j in range(len(col_factor)): + epi[:, j] /= col_factor[j] + self.m_epi_edges[kps_id][view1][view2] = epi + self.m_epi_edges[kps_id][view2][view1] = epi.T + + def _calculate_temp_edges(self): + for kps_id in range(self.n_kps): + for view in range(self.n_views): + rays = self.m_kps_rays[view][kps_id] + if len(self.last_multi_kps3d) > 0 and len(rays) > 0: + temp = np.full((len(self.last_multi_kps3d), len(rays)), + -1.0) + for pid, person_id in enumerate(self.last_multi_kps3d): + limb = self.last_multi_kps3d[person_id] + if limb[3, kps_id] > 0: + for kps_candidate in range(len(rays)): + dist = point2linedist(limb[:, kps_id][:3], + self.cameras[view].Pos, + rays[kps_candidate]) + if dist < self.max_temp_dist: + temp[ + pid, + kps_candidate] = \ + 1 - dist / self.max_temp_dist + + if self.normalize_edges: + row_factor = np.clip(temp.sum(1), 1.0, None) + col_factor = np.clip(temp.sum(0), 1.0, None) + for i in range(len(row_factor)): + temp[i] /= row_factor[i] + for j in range(len(col_factor)): + temp[:, j] /= col_factor[j] + self.m_temp_edges[kps_id][view] = temp + + def _calculate_bone_nodes(self): + for paf_id in range(self.n_pafs): + kps1, kps2 = self.paf_dict[0][paf_id], self.paf_dict[1][paf_id] + for view in range(self.n_views): + self.m_bone_nodes[paf_id][view] = [] + for kps1_candidate in range(len(self.kps2d[view][kps1])): + for kps2_candidate in range(len(self.kps2d[view][kps2])): + if self.pafs[view][paf_id][kps1_candidate, + kps2_candidate] > 0: + self.m_bone_nodes[paf_id][view].append( + (kps1_candidate, kps2_candidate)) + + def _calculate_bone_epi_edges(self): + for paf_id in range(self.n_pafs): + kps_pair = [self.paf_dict[0][paf_id], self.paf_dict[1][paf_id]] + for view1 in range(self.n_views - 1): + for view2 in range(view1 + 1, self.n_views): + nodes1 = self.m_bone_nodes[paf_id][view1] + nodes2 = self.m_bone_nodes[paf_id][view2] + epi = np.full((len(nodes1), len(nodes2)), -1.0) + for bone1_id in range(len(nodes1)): + for bone2_id in range(len(nodes2)): + node1 = nodes1[bone1_id] + node2 = nodes2[bone2_id] + epidist = np.zeros(2) + for i in range(2): + epidist[i] = self.m_epi_edges[ + kps_pair[i]][view1][view2][node1[i], + node2[i]] + if epidist.min() < 0: + continue + epi[bone1_id, bone2_id] = epidist.mean() + self.m_bone_epi_edges[paf_id][view1][view2] = epi + self.m_bone_epi_edges[paf_id][view2][view1] = epi.T + + def _calculate_bone_temp_edges(self): + for paf_id in range(self.n_pafs): + kps_pair = [self.paf_dict[0][paf_id], self.paf_dict[1][paf_id]] + for view in range(self.n_views): + nodes = self.m_bone_nodes[paf_id][view] + temp = np.full((len(self.last_multi_kps3d), len(nodes)), -1.0) + for pid in range(len(temp)): + for node_candidate in range(len(nodes)): + node = nodes[node_candidate] + tempdist = [] + for i in range(2): + tempdist.append(self.m_temp_edges[kps_pair[i]] + [view][pid][node[i]]) + if min(tempdist) > 0: + temp[ + pid, + node_candidate] = sum(tempdist) / len(tempdist) + self.m_bone_temp_edges[paf_id][view] = temp diff --git a/xrmocap/ops/triangulation/builder.py b/xrmocap/ops/triangulation/builder.py index 958adaeb..bffc7213 100644 --- a/xrmocap/ops/triangulation/builder.py +++ b/xrmocap/ops/triangulation/builder.py @@ -3,6 +3,10 @@ ) from .aniposelib_triangulator import AniposelibTriangulator +from .jacobi_triangulator import JacobiTriangulator TRIANGULATORS.register_module( name='AniposelibTriangulator', module=AniposelibTriangulator) + +TRIANGULATORS.register_module( + name='JacobiTriangulator', module=JacobiTriangulator) diff --git a/xrmocap/ops/triangulation/jacobi_triangulator.py b/xrmocap/ops/triangulation/jacobi_triangulator.py new file mode 100644 index 00000000..8a07bec5 --- /dev/null +++ b/xrmocap/ops/triangulation/jacobi_triangulator.py @@ -0,0 +1,155 @@ +# yapf: disable +import numpy as np +from typing import List, Union +from xrprimer.data_structure.camera import ( + FisheyeCameraParameter, PinholeCameraParameter, +) +from xrprimer.ops.triangulation.base_triangulator import BaseTriangulator + +from xrmocap.utils.triangulation_utils import prepare_triangulate_input + +# yapf: enable + + +class JacobiTriangulator(BaseTriangulator): + + def __init__(self, + camera_parameters: List[FisheyeCameraParameter] = [], + maxIter_time=20, + update_tolerance=1e-4, + regular_term=1e-4, + logger=None): + """Triangulator for points triangulation, based on jacobi optimization. + + Args: + camera_parameters (List[FisheyeCameraParameter]): + A list of Pinhole/FisheyeCameraParameter, or a list + of paths to dumped Pinhole/FisheyeCameraParameters. + maxIter_time (int): + maximal iteration to optimize + update_tolerance (float): + indicator of convergent in optimization + regular_term (float): + regulat term + logger (Union[None, str, logging.Logger], optional): + Logger for logging. If None, root logger will be selected. + Defaults to None. + """ + super().__init__(camera_parameters=camera_parameters, logger=logger) + self.projs = None + self.loss = None + + self.maxIter_time = maxIter_time + self.update_tolerance = update_tolerance + self.regular_term = regular_term + + self.logger = logger + + if len(self.camera_parameters) > 0: + self._prepare_proj_mat(self.camera_parameters) + + def _solve(self, points, points_c): + points = points.T + convergent = False + loss = 1e10 + pos = np.zeros(3, dtype=np.float32) + + if sum(points_c > 0) < 2: + return pos, loss + + for iter_time in range(self.maxIter_time): + if convergent: + break + ATA = self.regular_term * np.identity(3, dtype=np.float32) + ATb = np.zeros(3, dtype=np.float32) + for view in range(points.shape[1]): + if points_c[view] > 0: + proj = self.projs[:, 4 * view:4 * view + 4] + xyz = np.matmul(proj, np.append(pos, 1)) + jacobi = np.zeros((2, 3), dtype=np.float32) + jacobi = np.array([ + 1.0 / xyz[2], 0.0, -xyz[0] / (xyz[2] * xyz[2]), 0.0, + 1.0 / xyz[2], -xyz[1] / (xyz[2] * xyz[2]) + ], + dtype=np.float32).reshape((2, 3)) + jacobi = np.matmul(jacobi, proj[:, :3]) + w = points_c[view] + ATA += w * np.matmul(jacobi.T, jacobi) + ATb += w * np.matmul( + jacobi.T, (points[:, view][:2] - xyz[:2] / xyz[2])) + + delta = np.linalg.solve(ATA, ATb) + loss = np.linalg.norm(delta) + if np.linalg.norm(delta) < self.update_tolerance: + convergent = True + else: + pos += delta + return pos, loss + + def triangulate( + self, + points: Union[np.ndarray, list, tuple], + points_mask: Union[np.ndarray, list, tuple] = None) -> np.ndarray: + + points, points_mask = prepare_triangulate_input( + camera_number=len(self.camera_parameters), + points=points, + points_mask=points_mask, + logger=self.logger) + + points2d = points[..., :2].copy() + input_points2d_shape = points2d.shape + n_view = input_points2d_shape[0] + points2d = points2d.reshape(n_view, -1, 2) + points_mask = points_mask.reshape(n_view, -1, 1) + ignored_indexes = np.where(points_mask != 1) + points2d_c = points[..., 2].copy() + points2d_c = points2d_c.reshape(n_view, -1, 1) + points2d_c[ignored_indexes[0], ignored_indexes[1], :] = 0 + n_points = points2d.shape[1] + self.loss = np.full(n_points, 10e9) + points3d = [] + for point_id in range(n_points): + pos, loss = self._solve(points2d[:, point_id], + points2d_c[:, point_id]) + points3d.append(pos) + self.loss[point_id] = loss + points3d = np.array(points3d) + + output_points3d_shape = np.array(input_points2d_shape[1:]) + output_points3d_shape[-1] = 3 + points3d = points3d.reshape(*output_points3d_shape) + return points3d + + def _prepare_proj_mat(self, camera_parameters) -> np.ndarray: + projs = np.zeros((3, len(camera_parameters) * 4)) + for view in range(len(camera_parameters)): + K = camera_parameters[view].intrinsic33() + T = np.array(camera_parameters[view].get_extrinsic_t()) + R = np.array(camera_parameters[view].get_extrinsic_r()) + Proj = np.zeros((3, 4), dtype=np.float) + for i in range(3): + for j in range(4): + Proj[i, j] = R[i, j] if j < 3 else T[i] + projs[:, 4 * view:4 * view + 4] = np.matmul(K, Proj) + self.projs = projs + + def set_cameras( + self, camera_parameters: List[Union[PinholeCameraParameter, + FisheyeCameraParameter]] + ) -> None: + """Set cameras for this triangulator. + + Args: + camera_parameters (List[Union[PinholeCameraParameter, + FisheyeCameraParameter]]): + A list of PinholeCameraParameter, or a list + of FisheyeCameraParameter. + """ + if len(camera_parameters) > 0 and \ + isinstance(camera_parameters[0], str): + self.logger.error('camera_parameters must be a list' + + ' of camera parameter instances, not strs.') + raise TypeError + self._prepare_proj_mat(camera_parameters) + super().set_cameras(camera_parameters=camera_parameters) diff --git a/xrmocap/transform/convention/keypoints_convention/__init__.py b/xrmocap/transform/convention/keypoints_convention/__init__.py index 123c55ef..215cb403 100644 --- a/xrmocap/transform/convention/keypoints_convention/__init__.py +++ b/xrmocap/transform/convention/keypoints_convention/__init__.py @@ -12,12 +12,14 @@ from typing import List from xrmocap.data_structure.keypoints import Keypoints -from . import campus, human_data, panoptic # noqa:F401 +from . import campus, fourdag_19, human_data, panoptic # noqa:F401 +from .paf import ALL_PAF_MAPPING # yapf: enable if isinstance(KEYPOINTS_FACTORY, dict): KEYPOINTS_FACTORY['campus'] = campus.CAMPUS_KEYPOINTS KEYPOINTS_FACTORY['panoptic'] = panoptic.PANOPTIC_KEYPOINTS + KEYPOINTS_FACTORY['fourdag_19'] = fourdag_19.FOURDAG19_KEYPOINTS def convert_keypoints( @@ -112,6 +114,93 @@ def new_array_func(shape, value, ref_data, if_uint8): return ret_kps +def convert_bottom_up_kps_paf( + kps_paf: List, + src: str, + dst: str, + approximate: bool = False, + keypoints_factory: dict = KEYPOINTS_FACTORY, +): + """Convert keypoints and pafs following the mapping correspondence between + src and dst keypoints definition. + + Args: + kps_paf (List): + A list of dict of 2D keypoints and pafs in shape + [{'kps':[],'pafs':[]},...] + src (str): + The name of source convention. + dst (str): + The name of destination convention. + approximate (bool, optional): + Whether approximate mapping is allowed. + Defaults to False. + keypoints_factory (dict, optional): + A dict to store all the keypoint conventions. + Defaults to KEYPOINTS_FACTORY. + + Returns: + dst_detections (list): the destination keypoints and paf + """ + n_frame = len(kps_paf) + dst_n_kps = get_keypoint_num( + convention=dst, keypoints_factory=keypoints_factory) + dst_idxs, src_idxs, _ = \ + get_mapping(src, dst, approximate, keypoints_factory) + paf_mapping = ALL_PAF_MAPPING[src][dst] + + dst_detections = [] + for i in range(n_frame): + var = { + 'kps': [np.array([]) for j in range(dst_n_kps)], + 'pafs': [np.array([]) for k in range(len(paf_mapping))] + } + dst_detections.append(var) + for frame_id in range(n_frame): + for i in range(len(dst_idxs)): + dst_detections[frame_id]['kps'][dst_idxs[i]] = np.array( + kps_paf[frame_id]['kps'][src_idxs[i]], dtype=np.float32) + for i in range(len(paf_mapping)): + if isinstance(paf_mapping[i], list): + if paf_mapping[i][0] < 0: + dst_detections[frame_id]['pafs'][i] = np.array( + kps_paf[frame_id]['pafs'][-paf_mapping[i][0]], + dtype=np.float32).T + else: + dst_detections[frame_id]['pafs'][i] = np.array( + kps_paf[frame_id]['pafs'][paf_mapping[i][0]], + dtype=np.float32) + dst_detections[frame_id]['pafs'][ + i] = dst_detections[frame_id]['pafs'][i] * ( + dst_detections[frame_id]['pafs'][i] > 0.1) + for path_id in paf_mapping[i][1:]: + if path_id < 0: + arr = np.array( + kps_paf[frame_id]['pafs'][-path_id], + dtype=np.float32).T + else: + arr = np.array( + kps_paf[frame_id]['pafs'][path_id], + dtype=np.float32) + dst_detections[frame_id]['pafs'][i] = np.matmul( + dst_detections[frame_id]['pafs'][i], arr) + dst_detections[frame_id]['pafs'][ + i] = dst_detections[frame_id]['pafs'][i] * ( + dst_detections[frame_id]['pafs'][i] > 0.1) + dst_detections[frame_id]['pafs'][i] = dst_detections[frame_id][ + 'pafs'][i] * len(paf_mapping[i]) + else: + if paf_mapping[i] < 0: + dst_detections[frame_id]['pafs'][i] = np.array( + kps_paf[frame_id]['pafs'][-paf_mapping[i]], + dtype=np.float32).T + else: + dst_detections[frame_id]['pafs'][i] = np.array( + kps_paf[frame_id]['pafs'][paf_mapping[i]], + dtype=np.float32) + return dst_detections + + def get_keypoints_factory() -> dict: """Get the KEYPOINTS_FACTORY defined in keypoints convention. diff --git a/xrmocap/transform/convention/keypoints_convention/fourdag_19.py b/xrmocap/transform/convention/keypoints_convention/fourdag_19.py new file mode 100644 index 00000000..98747c5a --- /dev/null +++ b/xrmocap/transform/convention/keypoints_convention/fourdag_19.py @@ -0,0 +1,21 @@ +FOURDAG19_KEYPOINTS = [ + 'pelvis_openpose', # 'mid_hip' + 'neck_openpose', # 'upper_neck' + 'right_hip_openpose', + 'left_hip_openpose', + 'nose_openpose', + 'right_shoulder_openpose', + 'left_shoulder_openpose', + 'right_knee_openpose', + 'left_knee_openpose', + 'right_ear_openpose', + 'left_ear_openpose', + 'right_elbow_openpose', + 'left_elbow_openpose', + 'right_ankle_openpose', + 'left_ankle_openpose', + 'right_wrist_openpose', + 'left_wrist_openpose', + 'left_bigtoe_openpose', + 'right_bigtoe_openpose', +] diff --git a/xrmocap/transform/convention/keypoints_convention/paf.py b/xrmocap/transform/convention/keypoints_convention/paf.py new file mode 100644 index 00000000..da2519c9 --- /dev/null +++ b/xrmocap/transform/convention/keypoints_convention/paf.py @@ -0,0 +1,10 @@ +ALL_PAF_MAPPING = dict( + openpose_25=dict( + openpose_25=list(range(26)), + fourdag_19=[ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 23 + ], + coco=[ + 17, 16, 19, 18, -14, -10, [-11, 7], 12, 8, 13, 9, [-11, 0, 4], + [-7, 0, 3], [-4, 3], 5, 1, 6, 2 + ])) diff --git a/xrmocap/transform/keypoints3d/optim/builder.py b/xrmocap/transform/keypoints3d/optim/builder.py index 3f9fed58..50d4791f 100644 --- a/xrmocap/transform/keypoints3d/optim/builder.py +++ b/xrmocap/transform/keypoints3d/optim/builder.py @@ -2,6 +2,8 @@ from .aniposelib_optimizer import AniposelibOptimizer from .base_optimizer import BaseOptimizer +from .fourdag_base_optimizer import FourDAGBaseOptimizer +from .fourdag_optimization import FourDAGOptimizer from .median_smooth import MedianSmooth from .nan_interpolation import NanInterpolation from .smpl_shape_aware_optimizer import SMPLShapeAwareOptimizer @@ -19,6 +21,10 @@ name='AniposelibOptimizer', module=AniposelibOptimizer) KEYPOINTS3D_OPTIMIZERS.register_module( name='SMPLShapeAwareOptimizer', module=SMPLShapeAwareOptimizer) +KEYPOINTS3D_OPTIMIZERS.register_module( + name='FourDAGBaseOptimizer', module=FourDAGBaseOptimizer) +KEYPOINTS3D_OPTIMIZERS.register_module( + name='FourDAGOptimizer', module=FourDAGOptimizer) def build_keypoints3d_optimizer(cfg) -> BaseOptimizer: diff --git a/xrmocap/transform/keypoints3d/optim/fourdag_base_optimizer.py b/xrmocap/transform/keypoints3d/optim/fourdag_base_optimizer.py new file mode 100644 index 00000000..99b6841b --- /dev/null +++ b/xrmocap/transform/keypoints3d/optim/fourdag_base_optimizer.py @@ -0,0 +1,73 @@ +# yapf: disable +import numpy as np +from typing import Union + +from xrmocap.ops.triangulation.builder import ( + BaseTriangulator, build_triangulator, +) +from xrmocap.utils.fourdag_utils import LimbInfo + +# yapf: enable + + +class FourDAGBaseOptimizer(): + + def __init__(self, + triangulator: Union[None, dict, BaseTriangulator] = None, + kps_convention='fourdag_19', + min_triangulate_cnt: int = 15, + triangulate_thresh: float = 0.05, + logger=None): + """Base class for fourdag optimizater. + + Args: + triangulator: + triangulator to construct 3D keypoints + kps_convention (str): + The name of keypoints convention. + min_triangulate_cnt (int): + the minimum amount of 3D keypoints to be accepted + triangulate_thresh (float): + the maximal triangulate loss to be accepted + logger (Union[None, str, logging.Logger], optional): + Logger for logging. If None, root logger will be selected. + Defaults to None. + """ + if isinstance(triangulator, dict): + self.triangulator = build_triangulator(triangulator) + else: + self.triangulator = triangulator + + self.kps_convention = kps_convention + self.min_triangulate_cnt = min_triangulate_cnt + self.triangulate_thresh = triangulate_thresh + + self.projs = None + self.trace_limbs = dict() + self.trace_limb_infos = dict() + self.limb_info = LimbInfo(self.kps_convention) + + def triangulate_person(self, limb2d): + kps2d = limb2d.T.reshape((-1, self.limb_info.get_kps_number(), 3)) + kps3d = self.triangulator.triangulate(kps2d) + mask = self.triangulator.loss < self.triangulate_thresh + limb = np.concatenate((kps3d.T, mask.reshape(1, -1)), axis=0) + return limb + + def set_cameras(self, camera_parameters): + self.triangulator.set_cameras(camera_parameters) + self.projs = self.triangulator.projs + + def update(self, limbs2d): + for pidx, corr_id in enumerate(limbs2d): + limb = self.triangulate_person(limbs2d[corr_id]) + active = sum(limb[3] > 0) >= self.min_triangulate_cnt + if corr_id in self.trace_limbs: + if active: + self.trace_limbs[corr_id] = limb + else: + self.trace_limbs.pop(corr_id) + elif active: + self.trace_limbs[corr_id] = limb + + return self.trace_limbs diff --git a/xrmocap/transform/keypoints3d/optim/fourdag_optimization.py b/xrmocap/transform/keypoints3d/optim/fourdag_optimization.py new file mode 100644 index 00000000..97ea977f --- /dev/null +++ b/xrmocap/transform/keypoints3d/optim/fourdag_optimization.py @@ -0,0 +1,653 @@ +# yapf: disable +import copy +import numpy as np +from typing import Union + +from xrmocap.ops.triangulation.builder import BaseTriangulator +from xrmocap.transform.keypoints3d.optim.fourdag_base_optimizer import ( + FourDAGBaseOptimizer, +) +from xrmocap.utils.fourdag_utils import ( + LimbInfo, rodrigues, rodrigues_jacobi, welsch, +) + +# yapf: enable + + +class PersonInfo(): + + def __init__(self, kps_convention) -> None: + """save limb information.""" + self.kps_convention = kps_convention + self.limb_info = LimbInfo(self.kps_convention) + self.boneLen = np.zeros( + self.limb_info.get_kps_number() - 1, dtype=np.float32) + self.boneCnt = np.zeros( + self.limb_info.get_kps_number() - 1, dtype=np.float32) + self.active = 0.0 + self.shape_fixed = False + self.data = np.zeros( + 3 + self.limb_info.get_kps_number() * 3 + + self.limb_info.get_shape_size(), + dtype=np.float32) + + def push_previous_bones(self, limb): + for kps_id in range(1, self.limb_info.get_kps_number()): + prt_idx = self.limb_info.get_kps_parent()[kps_id] + if limb[3, kps_id] > 0 and limb[3, prt_idx] > 0: + len = np.linalg.norm(limb[:, kps_id][:3] - + limb[:, prt_idx][:3]) + self.boneLen[kps_id - 1] = (self.boneCnt[kps_id - 1] * + self.boneLen[kps_id - 1] + len) / ( + self.boneCnt[kps_id - 1] + 1) + self.boneCnt[kps_id - 1] += 1 + + def get_trans(self): + return self.data[:3] + + def get_pose(self): + return self.data[3:3 + self.limb_info.get_kps_number() * 3] + + def get_trans_pose(self): + return self.data[:3 + self.limb_info.get_kps_number() * 3] + + def get_shape(self): + return self.data[-self.limb_info.get_shape_size():] + + +class SolverTerm(): + + def __init__(self, + w_kps3d=0, + w_bone3d=0, + w_kps2d=0, + projs=None, + w_temporal_trans=0, + w_temporal_pose=0, + w_temporal_shape=0, + w_regular_pose=0, + w_regular_shape=0, + w_square_shape=0): + """save some weight and information when conducting optimization.""" + # kps 3d + self.w_kps3d = w_kps3d + self.kps3d_target = None + + # bone 3d + self.w_bone3d = w_bone3d + self.bone3d_target = None + + # kps 2d + self.w_kps2d = w_kps2d + self.projs = projs + self.kps2d_target = None + + # temporal + self.w_temporal_trans = w_temporal_trans + self.w_temporal_pose = w_temporal_pose + self.w_temporal_shape = w_temporal_shape + self.paramPrev = None + + # regular + self.w_regular_pose = w_regular_pose + self.w_regular_shape = w_regular_shape + self.w_square_shape = w_square_shape + + def set_kps3d_target(self, kps3d_target): + self.kps3d_target = kps3d_target + + def set_bone3d_target(self, bone3d_target): + self.bone3d_target = bone3d_target + + def set_kps2d_target(self, kps2d_target): + self.kps2d_target = kps2d_target + + def set_paramPrev(self, paramPrev): + self.paramPrev = paramPrev + + +class LimbSolver(): + + def __init__(self, kps_convention) -> None: + """slove human pose and shape.""" + self.kps_convention = kps_convention + self.limb_info = LimbInfo(self.kps_convention) + self.m_kps = np.array(self.limb_info.get_kps_prior()).reshape( + 3, self.limb_info.get_kps_number()) + self.shape_blend = np.array(self.limb_info.get_shape_blend()).reshape( + self.limb_info.get_kps_number() * 3, + self.limb_info.get_shape_size()) + self.bone_shape_blend = np.zeros( + (3 * (self.limb_info.get_kps_number() - 1), + self.limb_info.get_shape_size()), + dtype=np.float32) + self.limb_info = LimbInfo(self.kps_convention) + for kps_id in range(1, self.limb_info.get_kps_number()): + self.bone_shape_blend[3 * (kps_id - 1):3 * (kps_id - 1)+3]\ + = self.shape_blend[3 * kps_id:3 * kps_id+3] \ + - self.shape_blend[ + 3 * self.limb_info.get_kps_parent()[kps_id]: + 3 * self.limb_info.get_kps_parent()[kps_id]+3] + + def cal_kps_with_chain_warps(self, chain_warps): + kps_final = np.zeros((3, int(chain_warps.shape[1] / 4)), + dtype=np.float32) + for kps_id in range(kps_final.shape[1]): + kps_final[:, kps_id] = (chain_warps[0:0 + 3, 4 * kps_id + + 3:4 * kps_id + 3 + 1]).reshape( + (-1)) + return kps_final + + def cal_kps_with_param(self, param, j_cut=-1): + j_cut = j_cut if j_cut > 0 else self.m_kps.shape[1] + kps_blend = self.cal_kps_blend(param) + return self.cal_kps_with_chain_warps( + self.cal_chain_warps( + self.cal_node_warps(param, kps_blend[:, :j_cut]))) + + def cal_kps_blend(self, param): + kps_offset = np.matmul(self.shape_blend, param.get_shape()) + kps_blend = self.m_kps + kps_offset.reshape((self.m_kps.shape[1], 3)).T + return kps_blend + + def cal_node_warps(self, param, kps_blend): + node_warps = np.zeros((4, kps_blend.shape[1] * 4), dtype=np.float32) + for kps_id in range(kps_blend.shape[1]): + matrix = np.identity(4, dtype=np.float32) + if kps_id == 0: + matrix[:3, -1:] = (kps_blend[:, kps_id] + + param.get_trans()).reshape((-1, 1)) + else: + matrix[:3, -1:] = ( + kps_blend[:, kps_id] - + kps_blend[:, self.limb_info.get_kps_parent()[kps_id]] + ).reshape((-1, 1)) + + matrix[:3, :3] = rodrigues(param.get_pose()[3 * kps_id:3 * kps_id + + 3]) + node_warps[:4, 4 * kps_id:4 * kps_id + 4] = matrix + return node_warps + + def cal_chain_warps(self, node_warps): + chain_warps = np.zeros((4, node_warps.shape[1]), dtype=np.float32) + for kps_id in range(int(node_warps.shape[1] / 4)): + if kps_id == 0: + chain_warps[:, kps_id * 4:kps_id * 4 + + 4] = node_warps[:, kps_id * 4:kps_id * 4 + 4] + else: + chain_warps[:, kps_id * 4:kps_id * 4 + 4] = np.matmul( + chain_warps[:, + self.limb_info.get_kps_parent()[kps_id] * + 4:self.limb_info.get_kps_parent()[kps_id] * 4 + + 4], node_warps[:, kps_id * 4:kps_id * 4 + 4]) + return chain_warps + + def align_root_affine(self, term, param): + # align root affine + param.data[:3] = term.kps3d_target[:, 0][:3] - self.m_kps[:, 0] + + def cal_axes(x_axis, y_axis): + axes = np.zeros((3, 3), dtype=np.float32) + axes[:, 0] = x_axis / np.linalg.norm(x_axis) + axes[:, 2] = np.cross(x_axis, y_axis) / np.linalg.norm( + np.cross(x_axis, y_axis)) + axes[:, 1] = np.cross(axes[:, 2], axes[:, 0]) / np.linalg.norm( + np.cross(axes[:, 2], axes[:, 0])) + return axes + + mat = np.matmul( + cal_axes(term.kps3d_target[:, 2][:3] - term.kps3d_target[:, 1][:3], + term.kps3d_target[:, 3][:3] - + term.kps3d_target[:, 1][:3]), (np.linalg.inv( + cal_axes(self.m_kps[:, 2] - self.m_kps[:, 1], + self.m_kps[:, 3] - self.m_kps[:, 1])))) + angle = np.arccos((mat[0, 0] + mat[1, 1] + mat[2, 2] - 1) / 2) + x = (mat[2, 1] - mat[1, 2]) / np.sqrt((mat[2, 1] - mat[1, 2])**2 + + (mat[0, 2] - mat[2, 0])**2 + + (mat[1, 0] - mat[0, 1])**2) + y = (mat[0, 2] - mat[2, 0]) / np.sqrt((mat[2, 1] - mat[1, 2])**2 + + (mat[0, 2] - mat[2, 0])**2 + + (mat[1, 0] - mat[0, 1])**2) + z = (mat[1, 0] - mat[0, 1]) / np.sqrt((mat[2, 1] - mat[1, 2])**2 + + (mat[0, 2] - mat[2, 0])**2 + + (mat[1, 0] - mat[0, 1])**2) + param.data[3:3 + self.limb_info.get_kps_number() * + 3][:3] = angle * np.array([x, y, z], dtype=np.float32) + + def solve_pose(self, + term, + param, + maxIter_time, + hierarchy=False, + update_thresh=1e-4): + kps_blend = self.cal_kps_blend(param) + hier_size = max(self.limb_info.get_hierarchy_map()) + hier = 0 if hierarchy else hier_size + j_cut = 0 + while hier <= hier_size: + while j_cut < self.limb_info.get_kps_number( + ) and self.limb_info.get_hierarchy_map()[j_cut] <= hier: + j_cut += 1 + for iter_time in range(maxIter_time): + node_warps = self.cal_node_warps(param, kps_blend[:, :j_cut]) + chain_warps = self.cal_chain_warps(node_warps) + kps_final = self.cal_kps_with_chain_warps(chain_warps) + kps_jacobi = np.zeros((3 * j_cut, 3 + 3 * j_cut), + dtype=np.float32) + ATA = np.zeros((3 + 3 * j_cut, 3 + 3 * j_cut), + dtype=np.float32) + ATb = np.zeros((3 + 3 * j_cut), dtype=np.float32) + node_warps_jacobi = np.zeros((9, 3 * j_cut), dtype=np.float32) + for kps_id in range(j_cut): + node_warps_jacobi[:, 3 * kps_id:3 * kps_id + + 3] = rodrigues_jacobi( + param.get_pose()[3 * + kps_id:3 * kps_id + + 3]).T + for d_jidx in range(j_cut): + kps_jacobi[3 * d_jidx:3 * d_jidx + 3, :3] = np.identity( + 3, dtype=np.float32) + for dAxis in range(3): + d_chain_warps = np.zeros((4, 4 * j_cut), + dtype=np.float32) + valid = np.zeros(j_cut, dtype=np.float32) + valid[d_jidx] = 1 + d_chain_warps[:3, 4 * d_jidx:4 * d_jidx + + 3] = node_warps_jacobi[:, 3 * d_jidx + + dAxis].copy( + ).reshape( + (3, 3)).T + if d_jidx != 0: + d_chain_warps[:, 4 * d_jidx:4 * d_jidx + + 4] = np.matmul( + chain_warps[:, + 4 * self.limb_info. + get_kps_parent( + )[d_jidx]:4 * + self.limb_info. + get_kps_parent( + )[d_jidx] + 4], + d_chain_warps[:, 4 * + d_jidx:4 * d_jidx + + 4]) + + for kps_id in range(d_jidx + 1, j_cut): + prt_idx = self.limb_info.get_kps_parent()[kps_id] + valid[kps_id] = valid[prt_idx] + if valid[kps_id]: + d_chain_warps[:, 4 * kps_id:4 * kps_id + + 4] = np.matmul( + d_chain_warps[:, + 4 * prt_idx:4 * + prt_idx + 4], + node_warps[:, 4 * kps_id:4 * + kps_id + 4]) + kps_jacobi[kps_id * 3:kps_id * 3 + 3, + 3 + d_jidx * 3 + dAxis:3 + + d_jidx * 3 + dAxis + + 1] = d_chain_warps[0:0 + 3, + 4 * kps_id + + 3:4 * kps_id + + 3 + 1] + if term.w_kps3d > 0: + for kps_id in range(j_cut): + if term.kps3d_target[3, kps_id] > 0: + w = term.w_kps3d * term.kps3d_target[3, kps_id] + jacobi = kps_jacobi[3 * kps_id:3 * kps_id + 3] + ATA += w * np.matmul(jacobi.T, jacobi) + ATb += w * np.matmul( + jacobi.T, + (term.kps3d_target[0:0 + 3, kps_id:kps_id + 1] + - kps_final[:, kps_id].reshape( + (-1, 1)))).reshape(-1) + + if term.w_kps2d > 0: + for view in range(int(term.projs.shape[1] / 4)): + kps2d_target = term.kps2d_target[:, + view * self.limb_info. + get_kps_number(): + view * self.limb_info. + get_kps_number() + + self.limb_info. + get_kps_number()] + if sum(kps2d_target[2] > 0) > 0: + proj = term.projs[:, view * 4:view * 4 + 4] + for kps_id in range(j_cut): + if kps2d_target[2, kps_id] > 0: + abc = np.matmul( + proj, + np.append(kps_final[:, kps_id], 1)) + proj_jacobi = np.array( + [ + 1.0 / abc[2], 0.0, -abc[0] / + (abc[2] * abc[2]), 0.0, 1.0 / + abc[2], -abc[1] / (abc[2] * abc[2]) + ], + dtype=np.float32).reshape((2, 3)) + proj_jacobi = np.matmul( + proj_jacobi, proj[:, :3]) + + w = term.w_kps2d * kps2d_target[2, kps_id] + jacobi = np.matmul( + proj_jacobi, + kps_jacobi[3 * kps_id:3 * kps_id + 3]) + + ATA += w * np.matmul(jacobi.T, jacobi) + ATb += w * np.matmul( + jacobi.T, + kps2d_target[:2, kps_id:kps_id + + 1].reshape(-1) - + abc[:2] / abc[2]) + + if term.w_temporal_trans > 0: + ATA[:3, :3] += term.w_temporal_trans * np.identity( + 3, dtype=np.float32) + ATb[:3] += term.w_temporal_trans * ( + term.paramPrev.get_trans() - param.get_trans()) + + if term.w_temporal_pose > 0: + ATA[-3 * j_cut:, + -3 * j_cut:] += term.w_temporal_pose * np.identity( + 3 * j_cut, dtype=np.float32) + ATb[-3 * j_cut:] += term.w_temporal_pose * ( + term.paramPrev.get_pose()[:3 * j_cut] - + param.get_pose()[:3 * j_cut]) + + if term.w_regular_pose > 0: + ATA += term.w_regular_pose * np.identity( + 3 + 3 * j_cut, dtype=np.float32) + + delta = np.linalg.solve(ATA, ATb) + param.data[:3 + self.limb_info.get_kps_number() * + 3][:3 + 3 * j_cut] += delta + + if np.linalg.norm(delta) < update_thresh: + break + hier += 1 + + def solve_shape(self, term, param, maxIter_time, update_thresh=1e-4): + for iter_time in range(maxIter_time): + # calc status + kps_blend = self.cal_kps_blend(param) + ATA = np.zeros((self.limb_info.get_shape_size(), + self.limb_info.get_shape_size()), + dtype=np.float32) + ATb = np.zeros(self.limb_info.get_shape_size(), dtype=np.float32) + + if term.w_bone3d > 0: + for kps_id in range(1, self.limb_info.get_kps_number()): + if term.bone3d_target[1, kps_id - 1] > 0: + w = term.w_bone3d * term.bone3d_target[1, kps_id - 1] + prt_idx = self.limb_info.get_kps_parent()[kps_id] + dir = kps_blend[:, kps_id] - kps_blend[:, prt_idx] + jacobi = self.bone_shape_blend[3 * (kps_id - 1):3 * + (kps_id - 1) + 3] + ATA += w * np.matmul(jacobi.T, jacobi) + ATb += w * np.matmul( + jacobi.T, term.bone3d_target[0, kps_id - 1] * + (dir / np.linalg.norm(dir)) - dir) + + if term.w_kps3d > 0 or term.w_kps2d > 0: + chain_warps = self.cal_chain_warps( + self.cal_node_warps(param, kps_blend)) + kps_final = self.cal_kps_with_chain_warps(chain_warps) + kps_jacobi = np.zeros((3 * self.limb_info.get_kps_number(), + self.limb_info.get_shape_size()), + dtype=np.float32) + for kps_id in range(self.limb_info.get_kps_number()): + if kps_id == 0: + kps_jacobi[3 * kps_id:3 * kps_id + + 3] = self.shape_blend[3 * + kps_id:3 * kps_id + 3] + else: + prt_idx = self.limb_info.get_kps_parent()[kps_id] + kps_jacobi[3 * kps_id:3 * kps_id+3] =\ + kps_jacobi[3 * prt_idx:3 * prt_idx+3] \ + + chain_warps[:3, 4*prt_idx+3] * ( + self.shape_blend[3 * kps_id:3 * kps_id+3] - + self.shape_blend[3 * prt_idx:3 * prt_idx+3]) + + if term.w_kps3d > 0: + for kps_id in range(self.limb_info.get_kps_number()): + if term.kps3d_target[3, kps_id] > 0: + w = term.w_kps3d * term.kps3d_target[3, kps_id] + jacobi = kps_jacobi[3 * kps_id:3 * kps_id + 3] + ATA += w * np.matmul(jacobi.T, jacobi) + ATb += w * np.matmul( + jacobi.T, + (term.kps3d_target[0 + 3, kps_id + 1] - + kps_final[:, kps_id])) + + if term.w_kps2d > 0: + for view in range(int(len(term.projs[0]) / 4)): + kps2d_target = term.kps2d_target[ + view * self.limb_info.get_kps_number():view * + self.limb_info.get_kps_number() + + self.limb_info.get_kps_number()] + if sum(kps2d_target[2] > 0) > 0: + proj = term.projs[view * 4:view * 4 + 4] + for kps_id in range( + self.limb_info.get_kps_number()): + if kps2d_target[2, kps_id] > 0: + abc = proj * np.append( + kps_final[:, kps_id], 1) + proj_jacobi = np.zeros((2, 3), + dtype=np.float32) + proj_jacobi = np.array( + [ + 1.0 / abc[2], 0.0, -abc[0] / + (abc[2] * abc[2]), 0.0, 1.0 / + abc[2], -abc[1] / (abc[2] * abc[2]) + ], + dtype=np.float32).reshape((2, 3)) + proj_jacobi = proj_jacobi * proj[:, :3] + + w = term.w_kps2d * kps2d_target[2, kps_id] + jacobi = proj_jacobi * kps_jacobi[ + 3 * kps_id:3 * kps_id + 3] + ATA += w * np.matmul(jacobi.T, jacobi) + ATb += w * np.matmul( + jacobi.T, + kps2d_target[0 + 2, kps_id + 1] - + abc[:2] / abc[2]) + + if term.w_temporal_shape > 0: + ATA += term.w_temporal_shape * np.identity( + self.limb_info.get_shape_size(), dtype=np.float32) + ATb += term.w_temporal_shape * ( + term.paramPrev.get_shape() - param.get_shape()) + + if term.w_square_shape > 0: + ATA += term.w_square_shape * np.identity( + self.limb_info.get_shape_size(), dtype=np.float32) + ATb -= term.w_square_shape * param.get_shape() + + if term.w_regular_shape > 0: + ATA += term.w_regular_shape * np.identity( + self.limb_info.get_shape_size(), dtype=np.float32) + + delta = np.linalg.solve(ATA, ATb) + param.data[-self.limb_info.get_shape_size():] += delta + + if np.linalg.norm(delta) < update_thresh: + break + + +class FourDAGOptimizer(FourDAGBaseOptimizer): + + def __init__(self, + triangulator: Union[None, dict, BaseTriangulator] = None, + active_rate: float = 0.1, + min_track_cnt: int = 5, + bone_capacity: int = 100, + w_bone3d: float = 1.0, + w_square_shape: float = 1e-2, + shape_max_iter: int = 5, + w_kps3d: float = 1.0, + w_regular_pose: float = 1e-3, + pose_max_iter: int = 20, + w_kps2d: float = 1e-5, + w_temporal_trans: float = 1e-1, + w_temporal_pose: float = 1e-2, + min_triangulate_cnt: int = 15, + init_active: float = 0.9, + triangulate_thresh: float = 0.05, + kps_convention: str = 'fourdag_19', + logger=None): + """optimize with 2D projections loss, shape prior and temporal + smoothing. + + Args: + triangulator: + triangulator to construct 3D keypoints + active_rate (float): + active value degression rate + min_track_cnt (int): + minimum track value + bone_capacity (int): + the minimum bone capacity to turn to optimization + w_bone3d (float): + weight for 3D kepoints loss to solve shape + w_square_shape (float): + weight for shape regulization loss + shape_max_iter (int): + maximal iteration to solve shape + w_kps3d (float): + weight for 3D kepoints loss to solve pose + w_regular_pose (float): + weight for pose regulization loss + pose_max_iter (int): + maximal iteration to solve pose + w_kps2d (float): + weight for 2D kepoints loss to solve pose + w_temporal_trans (float): + weight for temporal smoothing loss to solve pose + w_temporal_pose (float): + weight for temporal smoothing loss to solve pose + min_triangulate_cnt (int): + the minimum amount of 3D keypoints to be accepted + init_active: + initial weight for active value + triangulate_thresh (float): + the maximal triangulate loss to be accepted + kps_convention (str): + The name of keypoints convention. + logger (Union[None, str, logging.Logger], optional): + Logger for logging. If None, root logger will be selected. + Defaults to None. + """ + + super().__init__( + triangulator=triangulator, + kps_convention=kps_convention, + min_triangulate_cnt=min_triangulate_cnt, + triangulate_thresh=triangulate_thresh, + logger=logger) + self.active_rate = active_rate + self.min_track_cnt = min_track_cnt + self.bone_capacity = bone_capacity + self.w_bone3d = w_bone3d + self.w_square_shape = w_square_shape + self.shape_max_iter = shape_max_iter + self.w_kps3d = w_kps3d + self.w_regular_pose = w_regular_pose + self.pose_max_iter = pose_max_iter + self.w_kps2d = w_kps2d + self.w_temporal_trans = w_temporal_trans + self.w_temporal_pose = w_temporal_pose + self.init_active = init_active + self.limb_solver = LimbSolver(kps_convention) + self.limb_info = LimbInfo(self.kps_convention) + + def update(self, limbs2d): + for pid, corr_id in enumerate(limbs2d): + if corr_id in self.trace_limbs: + info = self.trace_limb_infos[corr_id] + limb = self.trace_limbs[corr_id] + active = min( + info.active + self.active_rate * + (2.0 * welsch(self.min_track_cnt, + sum(limbs2d[corr_id][2] > 0)) - 1.0), 1.0) + if info.active < 0: + self.trace_limbs.pop(corr_id) + self.trace_limb_infos.pop(corr_id) + continue + + if not info.shape_fixed: + limb = self.triangulate_person(limbs2d[corr_id]) + if sum(limb[3] > 0) >= self.min_triangulate_cnt: + info.push_previous_bones(limb) + if min(info.boneCnt) >= self.bone_capacity: + info.push_previous_bones(limb) + shape_term = SolverTerm( + w_bone3d=self.w_bone3d, + w_square_shape=self.w_square_shape) + shape_term.set_bone3d_target( + np.row_stack((info.boneLen.T, + np.ones( + info.boneLen.shape[0], + dtype=np.float32)))) + self.limb_solver.solve_shape( + shape_term, info, self.shape_max_iter) + + # align pose + pose_term = SolverTerm( + w_kps3d=self.w_kps3d, + w_regular_pose=self.w_regular_pose) + pose_term.set_kps3d_target(limb) + self.limb_solver.align_root_affine(pose_term, info) + self.limb_solver.solve_pose( + pose_term, info, self.pose_max_iter) + limb[:3] = self.limb_solver.cal_kps_with_param( + info) + info.shape_fixed = True + self.trace_limbs[corr_id] = limb + + else: + # align pose + pose_term = SolverTerm( + w_kps2d=self.w_kps2d, + projs=self.projs, + w_regular_pose=self.w_regular_pose, + w_temporal_trans=self.w_temporal_trans, + w_temporal_pose=self.w_temporal_pose) + pose_term.set_kps2d_target(copy.deepcopy(limbs2d[corr_id])) + # filter single view correspondence + corr_cnt = np.zeros( + self.limb_info.get_kps_number(), dtype=np.float32) + kps_confidence = np.ones( + self.limb_info.get_kps_number(), dtype=np.float32) + for view in range(int(self.projs.shape[1] / 4)): + corr_cnt += ( + (pose_term. + kps2d_target[:, + view * self.limb_info.get_kps_number( + ):self.limb_info.get_kps_number() * + (view + 1)][2].T > 0).astype(np.int)) + for kps_id in range(self.limb_info.get_kps_number()): + if corr_cnt[kps_id] <= 1: + kps_confidence[kps_id] = 0 + for view in range(int(self.projs.shape[1] / 4)): + pose_term.kps2d_target[:, + view * self.limb_info. + get_kps_number() + + kps_id] = 0 + + pose_term.set_paramPrev(info) + self.limb_solver.solve_pose(pose_term, info, + self.pose_max_iter) + limb[:3] = self.limb_solver.cal_kps_with_param(info) + limb[3] = kps_confidence.T + # update active + info.active = active + else: + limb = self.triangulate_person(limbs2d[corr_id]) + # alloc new person + if sum(limb[3] > 0) >= self.min_triangulate_cnt: + self.trace_limb_infos[corr_id] = PersonInfo( + self.kps_convention) + info = self.trace_limb_infos[corr_id] + info.push_previous_bones(limb) + info.active = self.init_active + self.trace_limbs[corr_id] = limb + return self.trace_limbs diff --git a/xrmocap/utils/fourdag_utils.py b/xrmocap/utils/fourdag_utils.py new file mode 100644 index 00000000..ca2617fa --- /dev/null +++ b/xrmocap/utils/fourdag_utils.py @@ -0,0 +1,124 @@ +import json +import math +import numpy as np + + +class LimbInfo(): + + def __init__(self, kps_convention) -> None: + self.kps_convention = kps_convention + with open('./weight/limb_info.json', 'r') as f: + self.info_dict = json.load(f)[self.kps_convention] + + def get_kps_number(self): + """get keypoints number.""" + return self.info_dict['n_kps'] + + def get_paf_number(self): + """get paf number.""" + return self.info_dict['n_pafs'] + + def get_shape_size(self): + """get the prior shape number.""" + return self.info_dict['shape_size'] + + def get_kps_parent(self): + """get keypoints parent list.""" + return self.info_dict['kps_parent'] + + def get_shape_blend(self): + """get shape blend.""" + return self.info_dict['shape_blend'] + + def get_kps_prior(self): + """get prior keypoints.""" + return self.info_dict['m_kps'] + + def get_hierarchy_map(self): + """get hierarchy map for keypoints.""" + return self.info_dict['hierarchy_map'] + + def get_paf_dict(self): + """get paf dict.""" + return self.info_dict['paf_dict'] + + +def welsch(c, x): + x = x / c + return 1 - math.exp(-x * x / 2) + + +def line2linedist(pa, raya, pb, rayb): + if abs(np.vdot(raya, rayb)) < 1e-5: + return point2linedist(pa, pb, raya) + else: + ve = np.cross(raya, rayb) + ve = ve / np.linalg.norm(ve) + ve = abs(np.vdot((pa - pb), ve)) + return ve + + +def point2linedist(pa, pb, ray): + ve = np.cross(pa - pb, ray) + return np.linalg.norm(ve) + + +def skew(vec): + m_skew = np.zeros((3, 3), dtype=np.float32) + m_skew = np.array( + [0, -vec[2], vec[1], vec[2], 0, -vec[0], -vec[1], vec[0], 0], + dtype=np.float32).reshape((3, 3)) + return m_skew + + +def rodrigues(vec): + theta = np.linalg.norm(vec) + identity = np.identity(3, dtype=np.float32) + if abs(theta) < 1e-5: + return identity + else: + c = np.cos(theta) + s = np.sin(theta) + r = vec / theta + return c * identity + np.matmul((1 - c) * r.reshape( + (-1, 1)), r.reshape((1, -1))) + s * skew(r) + + +def rodrigues_jacobi(vec): + theta = np.linalg.norm(vec) + d_skew = np.zeros((3, 9), dtype=np.float32) + d_skew[0, 5] = d_skew[1, 6] = d_skew[2, 1] = -1 + d_skew[0, 7] = d_skew[1, 2] = d_skew[2, 3] = 1 + if abs(theta) < 1e-5: + return -d_skew + else: + c = np.cos(theta) + s = np.sin(theta) + c1 = 1 - c + itheta = 1 / theta + r = vec / theta + rrt = np.matmul(r.reshape((-1, 1)), r.reshape((1, -1))) + m_skew = skew(r) + identity = np.identity(3, dtype=np.float32) + drrt = np.array([ + r[0] + r[0], r[1], r[2], r[1], 0, 0, r[2], 0, 0, 0, r[0], 0, r[0], + r[1] + r[1], r[2], 0, r[2], 0, 0, 0, r[0], 0, 0, r[1], r[0], r[1], + r[2] + r[2] + ], + dtype=np.float32).reshape((3, 9)) + jaocbi = np.zeros((3, 9), dtype=np.float32) + a = np.zeros((5, 1), dtype=np.float32) + for i in range(3): + a = np.array([ + -s * r[i], (s - 2 * c1 * itheta) * r[i], c1 * itheta, + (c - s * itheta) * r[i], s * itheta + ], + dtype=np.float32).reshape((5, 1)) + for j in range(3): + for k in range(3): + + jaocbi[i, k + k + k + j] = ( + a[0] * identity[j, k] + a[1] * rrt[j, k] + + a[2] * drrt[i, j + j + j + k] + a[3] * m_skew[j, k] + + a[4] * d_skew[i, j + j + j + k]) + return jaocbi diff --git a/xrmocap/utils/mvpose_utils.py b/xrmocap/utils/mvpose_utils.py index b998b8f0..c7e02945 100644 --- a/xrmocap/utils/mvpose_utils.py +++ b/xrmocap/utils/mvpose_utils.py @@ -311,13 +311,22 @@ def add_campus_jaw_headtop(nose, kps3d_campus): for frame_idx in range(nose.shape[0]): for i, kps3d in enumerate(kps3d_campus[frame_idx]): add_kps3d = np.zeros((2, 3)) - add_kps3d[0] = (kps3d[8] + kps3d[9]) / 2 # Use middle of shoulder - add_kps3d[1] = nose[frame_idx][i] # use nose - add_kps3d[1] = add_kps3d[0] + ( - add_kps3d[1] - add_kps3d[0]) * np.array([0.75, 0.75, 1.5]) - add_kps3d[0] = add_kps3d[0] + (nose[frame_idx][i] - - add_kps3d[0]) * np.array( - [1. / 2., 1. / 2., 1. / 2.]) + + hip_center = (kps3d[2] + kps3d[3]) / 2 + shouler_cneter = (kps3d[8] + kps3d[9]) / 2 + head_center = nose[frame_idx][i] + add_kps3d[0] = shouler_cneter + (head_center - + shouler_cneter) * 0.5 + face_dir = np.cross(shouler_cneter - hip_center, + kps3d[8] - kps3d[9]) + if np.isnan(face_dir).any(): + add_kps3d[1] = shouler_cneter + ( + head_center - shouler_cneter) * np.array([0.75, 0.75, 1.5]) + else: + face_dir = face_dir / np.linalg.norm(face_dir) + z_dir = np.array([0., 0., 1.], dtype=np.float) + add_kps3d[1] = add_kps3d[0] + face_dir * 0.125 + z_dir * 0.145 + kps3d[-2:] = add_kps3d return kps3d_campus