forked from yihongXU/deepMOT
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbox_utils.py
202 lines (165 loc) · 7.41 KB
/
box_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# ==========================================================================
#
# This file is a part of implementation for paper:
# DeepMOT: A Differentiable Framework for Training Multiple Object Trackers.
# This contribution is headed by Perception research team, INRIA.
#
# Contributor(s) : Yihong Xu
# INRIA contact : [email protected]
#
# ===========================================================================
import numpy as np
import copy
import torch
import cv2
# numpy version #
def xywh2xyxy(bbox):
"""
convert bbox from [x,y,w,h] to [x1, y1, x2, y2]
:param bbox: bbox in string [x, y, w, h], list
:return: bbox in float [x1, y1, x2, y2], list
"""
copy.deepcopy(bbox)
bbox[0] = float(bbox[0])
bbox[1] = float(bbox[1])
bbox[2] = float(bbox[2]) + bbox[0]
bbox[3] = float(bbox[3]) + bbox[1]
return bbox
def bb_fast_IOU_v1(boxA, boxB):
"""
Calculation of IOU, version numpy
:param boxA: numpy array [top left x, top left y, x2, y2]
:param boxB: numpy array of [top left x, top left y, x2, y2], shape = [num_bboxes, 4]
:return: IOU of two bounding boxes of shape [num_bboxes]
"""
if type(boxA) is type([]):
boxA = np.array(copy.deepcopy(boxA), dtype=np.float32)[-4:]
boxB = np.array(copy.deepcopy(boxB), dtype=np.float32)[:, -4:]
# determine the (x, y)-coordinates of the intersection rectangle
xA = np.maximum(boxA[0], boxB[:, 0])
yA = np.maximum(boxA[1], boxB[:, 1])
xB = np.minimum(boxA[2], boxB[:, 2])
yB = np.minimum(boxA[3], boxB[:, 3])
# compute the area of intersection rectangle
interArea = np.maximum(0.0, xB - xA + 1) * np.maximum(0.0, yB - yA + 1)
# compute the area of both the prediction and ground-truth
# rectangles
boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
boxBArea = (boxB[:, 2] - boxB[:, 0] + 1) * (boxB[:, 3] - boxB[:, 1] + 1)
# compute the intersection over union by taking the intersection
# area and dividing it by the sum of prediction + ground-truth
# areas - the interesection area
iou = interArea / (boxAArea + boxBArea - interArea)
return iou
def warpcoordinates(coordinates, warp_matrix):
"""
camera motion compensations
:param coordinates: np.darray of shape [num_bbox, 4], 4=[x,y,x,y] or [num_bbox, 2], 2=[x,y]
:param warp_matrix: numpy.darray of shape [2,3]
:return: warped coordinates: np.darray of shape [num_bbox, 4], 4=[x,y,x,y]
"""
if coordinates.shape[1] == 4:
split_tl = coordinates[:, 0:2].copy()
split_br = coordinates[:, 2:4].copy()
pad_ones = np.ones((split_tl.shape[0], 1))
split_tl = np.transpose(np.hstack([split_tl, pad_ones]))
split_br = np.transpose(np.hstack([split_br, pad_ones]))
warped_tl = np.transpose(np.dot(warp_matrix, split_tl))
warped_br = np.transpose(np.dot(warp_matrix, split_br))
return np.hstack([warped_tl, warped_br])
else:
pad_ones = np.ones((coordinates.shape[0], 1))
coordinates = np.transpose(np.hstack([coordinates, pad_ones]))
return np.transpose(np.dot(warp_matrix, coordinates))
def getWarpMatrix(im1, im2):
"""
get warp matrix
:param im1: curr image, numpy array, (h, w, c)
:param im2: prev image, numpy array, (h, w, c)
:return: affine transformation matrix, numpy array, (h, w)
"""
im1Gray = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY)
im2Gray_ref = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY)
warp_mode = cv2.MOTION_AFFINE
warp_matrix = np.eye(2, 3, dtype=np.float32)
cc, warp_matrix = cv2.findTransformECC(im2Gray_ref, im1Gray, warp_matrix, warp_mode)
return warp_matrix
# torch version #
def IOUmask_fast(boxA, boxesB):
"""
get iou among boxA and many others boxes
:param boxA: [top left x, top left y, bottom right x, bottom right y], float torch tensor requiring gradients of shape (4,)!! not shape (1,4) !!
:param boxesB: [top left x, top left y, bottom right x, bottom right y], float torch tensor requiring gradients of shape (4,)!! not shape (1,4) !!
:return: iou
"""
boxesB = torch.FloatTensor(boxesB).cuda() # gt box
# determine the (x, y)-coor dinates of the intersection rectangle
xA = torch.max(boxA[0], boxesB[:, 0])
yA = torch.max(boxA[1], boxesB[:, 1])
xB = torch.min(boxA[2], boxesB[:, 2])
yB = torch.min(boxA[3], boxesB[:, 3])
# compute the area of intersection rectangle
interArea = torch.max(torch.zeros(1).cuda(), xB - xA + 1)\
* torch.max(torch.zeros(1).cuda(), yB - yA + 1)
# compute the area of both the prediction and ground-truth
# rectangles
boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
boxesBArea = (boxesB[:, 2] - boxesB[:, 0] + 1) * (boxesB[:, 3] - boxesB[:, 1] + 1)
iou = interArea / (boxAArea + boxesBArea - interArea)
return iou.unsqueeze(0)
def calculate_dist_fastV4_torch(bbox_det, bbox_gt, im_h, im_w):
"""
differentiable Version, normalized L2 distance
:param bbox_det: one detection bbox [x1, y1, x2, y2]
:param bbox_gt: list of ground truth bboxes [[x1, y1, x2, y2], ... ]
:param im_h: image height
:param im_w: image width
:return: normalized euclidean distance between detection and ground truth
"""
gt_box = torch.FloatTensor(bbox_gt).cuda() # gt box
D = (float(im_h)**2 + im_w**2)**0.5
c_gt_x, c_gt_y = 0.5 * (gt_box[:, 0] + gt_box[:, 2]), 0.5 * (gt_box[:, 1] + gt_box[:, 3])
c_det_x, c_det_y = 0.5 * (bbox_det[0] + bbox_det[2]), 0.5 * (bbox_det[1] + bbox_det[3])
# add eps=1e-12 for gradient numerical stability
return (1.0 - torch.exp(-5.0*torch.sqrt(1e-12+((c_gt_x-c_det_x)/D)**2 + ((c_gt_y-c_det_y)/D)**2))).unsqueeze(0)
def make_single_matrix_torchV2_fast(gt_bboxes, track_bboxes, img_h, img_w):
"""
Version torch, differentiable
:param gt_bboxes: list of ground truth bboxes !! [x1, y1, x2, y2] !! from img of frameID
:param det_bboxes: detection/hypothesis bboxes !! [x1, y1, x2, y2] !! from img of frameID, torch tensor Variable, [num_dets,4]
:param frameID: ID of this frame
:param img_h: height of the image
:param img_w: width of the image
:param T: threshold
:return: matrix
"""
# number of detections = N = height of matrix
N = track_bboxes.size(0)
gt = np.array(gt_bboxes, dtype=np.float32)
tmp = []
for i in range(N):
iou = IOUmask_fast(track_bboxes[i], gt[:, -4:])
l2_distance = calculate_dist_fastV4_torch(track_bboxes[i], gt[:, -4:], img_h, img_w)
tmp.append(0.5*(l2_distance + (1.0 - iou)))
dist_mat = torch.cat(tmp, dim=0)
if gt.shape[1] == 5:
gt_ids = gt[:, 0].astype(np.int32).tolist()
else:
gt_ids = []
return gt_ids, dist_mat.unsqueeze(0)
def mix_track_detV2(iou_mat, det, track):
"""
refine track bounding boxes by detections
:param iou_mat: iou between dets and tracks, numpy array, [num_track, num_det]
:param det: detection bbox matrix, numpy array, [num_detections, 4]
:param track: prediction from trackers, numpy array, [num_tracks, 4]
:return: refined new tracks, numpy array, [num_tracks, 4]
"""
values, idx = torch.max(iou_mat, dim=1)
mask = torch.ones_like(values)
for i in range(iou_mat.shape[0]):
# iou < 0.6, no refinement
if float(iou_mat[i, idx[i]]) <= 0.6:
mask[i] = 0.0
values = mask*values
return (1.0-values).view(-1, 1)*track + values.view(-1, 1)*torch.index_select(det, 0, idx)