Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add vitpose_wholebody #9282

Open
wants to merge 2 commits into
base: release/2.8
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions deploy/python/det_keypoint_unite_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,15 @@
from preprocess import decode_image
from infer import Detector, DetectorPicoDet, PredictConfig, print_arguments, get_test_images, bench_log
from keypoint_infer import KeyPointDetector, PredictConfig_KeyPoint
from visualize import visualize_pose
from visualize import visualize_pose, visualize_pose_point131
from benchmark_utils import PaddleInferBenchmark
from utils import get_current_memory_mb
from keypoint_postprocess import translate_to_ori_images

KEYPOINT_SUPPORT_MODELS = {
'HigherHRNet': 'keypoint_bottomup',
'HRNet': 'keypoint_topdown'
'HRNet': 'keypoint_topdown',
'VitPose_TopDown_WholeBody': 'keypoint_topdown_wholebody'
}


Expand Down Expand Up @@ -178,7 +179,7 @@ def topdown_unite_predict_video(detector,

keypoint_res['keypoint'][0][0] = smooth_keypoints.tolist()

im = visualize_pose(
im = visualize_pose_point131(
frame,
keypoint_res,
visual_thresh=FLAGS.keypoint_threshold,
Expand Down Expand Up @@ -329,8 +330,7 @@ def main():
enable_mkldnn=FLAGS.enable_mkldnn,
use_dark=FLAGS.use_dark)
keypoint_arch = topdown_keypoint_detector.pred_config.arch
assert KEYPOINT_SUPPORT_MODELS[
keypoint_arch] == 'keypoint_topdown', 'Detection-Keypoint unite inference only supports topdown models.'
assert KEYPOINT_SUPPORT_MODELS[keypoint_arch] == 'keypoint_topdown' or KEYPOINT_SUPPORT_MODELS[keypoint_arch] == 'keypoint_topdown_wholebody', 'Detection-Keypoint unite inference only supports topdown models.'

# predict from video file or camera video stream
if FLAGS.video_file is not None or FLAGS.camera_id != -1:
Expand Down
2 changes: 1 addition & 1 deletion deploy/python/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from benchmark_utils import PaddleInferBenchmark
from picodet_postprocess import PicoDetPostProcess
from preprocess import preprocess, Resize, NormalizeImage, Permute, PadStride, LetterBoxResize, WarpAffine, Pad, decode_image, CULaneResize
from keypoint_preprocess import EvalAffine, TopDownEvalAffine, expand_crop
from keypoint_preprocess import EvalAffine, TopDownEvalAffine, TopDownAffineImage, expand_crop
from clrnet_postprocess import CLRNetPostProcess
from visualize import visualize_box_mask, imshow_lanes
from utils import argsparser, Timer, get_current_memory_mb, multiclass_nms, coco_clsid2catid
Expand Down
49 changes: 48 additions & 1 deletion deploy/python/keypoint_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,40 @@
# Global dictionary
KEYPOINT_SUPPORT_MODELS = {
'HigherHRNet': 'keypoint_bottomup',
'HRNet': 'keypoint_topdown'
'HRNet': 'keypoint_topdown',
'VitPose_TopDown_WholeBody': 'keypoint_topdown_wholebody'
}


def _box2cs(image_size, box):
"""This encodes bbox(x,y,w,h) into (center, scale)

Args:
x, y, w, h

Returns:
tuple: A tuple containing center and scale.

- np.ndarray[float32](2,): Center of the bbox (x, y).
- np.ndarray[float32](2,): Scale of the bbox w & h.
"""

x, y, w, h = box[:4]
input_size = image_size
aspect_ratio = input_size[0] / input_size[1]
center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)

if w > aspect_ratio * h:
h = w * 1.0 / aspect_ratio
elif w < aspect_ratio * h:
w = h * aspect_ratio

# pixel std is 200.0
scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
scale = scale * 1.25

return center, scale

class KeyPointDetector(Detector):
"""
Args:
Expand Down Expand Up @@ -137,6 +167,23 @@ def postprocess(self, inputs, result):
imshape = inputs['im_shape'][:, ::-1]
center = np.round(imshape / 2.)
scale = imshape / 200.
keypoint_postprocess = HRNetPostProcess(use_dark=self.use_dark)
kpts, scores = keypoint_postprocess(np_heatmap, center, scale)
results['keypoint'] = kpts
results['score'] = scores
return results
elif KEYPOINT_SUPPORT_MODELS[
self.pred_config.arch] == 'keypoint_topdown_wholebody':
results = {}
imshape = inputs['im_shape'][:, ::-1]
center = []
scale = []
for i in range(len(inputs['im_shape'])):
transize = np.shape(inputs["image"])
tmp_center, tmp_scale = _box2cs([np.shape(inputs["image"])[-1],np.shape(inputs["image"])[-2]], [0,0,inputs['im_shape'][i][1],inputs['im_shape'][i][0]] )
center.append(tmp_center)
scale.append(tmp_scale)

keypoint_postprocess = HRNetPostProcess(use_dark=self.use_dark)
kpts, scores = keypoint_postprocess(np_heatmap, center, scale)
results['keypoint'] = kpts
Expand Down
77 changes: 77 additions & 0 deletions deploy/python/keypoint_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,83 @@
import numpy as np


def _box2cs(image_size, box):
"""This encodes bbox(x,y,w,h) into (center, scale)

Args:
x, y, w, h

Returns:
tuple: A tuple containing center and scale.

- np.ndarray[float32](2,): Center of the bbox (x, y).
- np.ndarray[float32](2,): Scale of the bbox w & h.
"""

x, y, w, h = box[:4]
input_size = image_size
aspect_ratio = input_size[0] / input_size[1]
center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)

if w > aspect_ratio * h:
h = w * 1.0 / aspect_ratio
elif w < aspect_ratio * h:
w = h * aspect_ratio

# pixel std is 200.0
scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
scale = scale * 1.25

return center, scale

class TopDownAffineImage(object):
"""apply affine transform to image and coords

Args:
trainsize (list): [w, h], the standard size used to train
use_udp (bool): whether to use Unbiased Data Processing.
records(dict): the dict contained the image and coords

Returns:
records (dict): contain the image and coords after tranformed

"""

def __init__(self, trainsize, use_udp=False, use_box2cs=True):
self.trainsize = trainsize
self.use_udp = use_udp
self.use_box2cs = use_box2cs

def __call__(self, records, im_info):
if self.use_box2cs:
center, scale = _box2cs(self.trainsize, [0,0,im_info['im_shape'][1],im_info['im_shape'][0]])
else:
imshape = im_info['im_shape'][::-1]
center = im_info['center'] if 'center' in im_info else imshape / 2.
scale = im_info['scale'] if 'scale' in im_info else imshape

image = records
rot = records['rotate'] if "rotate" in records else 0
if self.use_udp:
trans = get_warp_matrix(
rot, center * 2.0,
[self.trainsize[0] - 1.0, self.trainsize[1] - 1.0],
scale * 200.0)
image = cv2.warpAffine(
image,
trans, (int(self.trainsize[0]), int(self.trainsize[1])),
flags=cv2.INTER_LINEAR)
joints[:, 0:2] = warp_affine_joints(joints[:, 0:2].copy(), trans)
else:
trans = get_affine_transform(center, scale *
200, rot, self.trainsize)
image = cv2.warpAffine(
image,
trans, (int(self.trainsize[0]), int(self.trainsize[1])),
flags=cv2.INTER_LINEAR)
return image, im_info


class EvalAffine(object):
def __init__(self, size, stride=64):
super(EvalAffine, self).__init__()
Expand Down
51 changes: 51 additions & 0 deletions deploy/python/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@
import numpy as np
import PIL
from PIL import Image, ImageDraw, ImageFile
import json


from mmengine.structures import InstanceData
from mmpose.structures import PoseDataSample
from mmpose.visualization import PoseLocalVisualizer

from mmpose.structures import merge_data_samples, split_instances

ImageFile.LOAD_TRUNCATED_IMAGES = True

def imagedraw_textsize_c(draw, text):
Expand Down Expand Up @@ -235,6 +244,48 @@ def get_color(idx):
return color


def visualize_pose_point131(imgfile,
results,
visual_thresh=0.3,
save_name='pose.jpg',
save_dir='output',
returnimg=False,
ids=None):
pose_local_visualizer = PoseLocalVisualizer(vis_backends= [{'type': 'LocalVisBackend'}], name= 'visualizer', radius= 3, alpha= 0.8, line_width= 1)
# with open("/paddle/mmpose-dev-1.x/dataset_meta.json", 'r') as f:
with open("deploy/python/dataset_meta.json", 'r') as f:
meta_data = json.load(f)

pred_instances = InstanceData()
pose_local_visualizer.set_dataset_meta(meta_data, skeleton_style="mmpose")
image = cv2.imread(imgfile) if type(imgfile) == str else imgfile
skeletons, score = results['keypoint']
keypoints = []
scores = []
for i in range(len(skeletons[0])):
keypoints.append([skeletons[0][i][0], skeletons[0][i][1]])
scores.append(skeletons[0][i][2])
keypoints = [keypoints]
skeletons = np.array(skeletons)
scores = np.array(scores)
pred_instances.keypoints = skeletons

pred_pose_data_sample = PoseDataSample()
pred_pose_data_sample.pred_instances = pred_instances

blank_image = np.zeros(image.shape, dtype=np.uint8)
pose_local_visualizer.add_datasample('image', blank_image, data_sample=pred_pose_data_sample,
draw_gt=False,
draw_heatmap=False,
draw_bbox=True,
show_kpt_idx=False,
skeleton_style='mmpose',
show=False,
wait_time=0,
kpt_thr=visual_thresh)

return pose_local_visualizer.get_image()

def visualize_pose(imgfile,
results,
visual_thresh=0.6,
Expand Down
5 changes: 3 additions & 2 deletions ppdet/engine/export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,11 @@
'YOLOF': 40,
'METRO_Body': 3,
'DETR': 3,
'CLRNet': 3
'CLRNet': 3,
'VitPose_TopDown_WholeBody': 3
}

KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet']
KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet', 'VitPose_TopDown_WholeBody']
MOT_ARCH = ['JDE', 'FairMOT', 'DeepSORT', 'ByteTrack', 'CenterTrack']
LANE_ARCH = ['CLRNet']

Expand Down
2 changes: 2 additions & 0 deletions ppdet/modeling/architectures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from . import keypoint_hrhrnet
from . import keypoint_hrnet
from . import keypoint_vitpose
from . import keypoint_vitpose_wholebody
from . import jde
from . import deepsort
from . import fairmot
Expand Down Expand Up @@ -60,6 +61,7 @@
from .keypoint_hrhrnet import *
from .keypoint_hrnet import *
from .keypoint_vitpose import *
from .keypoint_vitpose_wholebody import *
from .jde import *
from .deepsort import *
from .fairmot import *
Expand Down
Loading