-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprepare_data_tsdf.py
413 lines (327 loc) · 17.8 KB
/
prepare_data_tsdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
# Copyright 2020 Magic Leap, Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Originating Author: Zak Murez (zak.murez.com)
import argparse
import json
import os
# os.environ['CUDA_VISIBLE_DEVICES'] = '1'
import open3d as o3d
import numpy as np
import torch
import trimesh
from vPlaneRecover.data import SceneDataset, load_info_json
from vPlaneRecover.datasets.scannet import prepare_scannet_scene, prepare_scannet_splits
from vPlaneRecover.datasets.sample import prepare_sample_scene
import vPlaneRecover.transforms as transforms
from vPlaneRecover.tsdf import TSDFFusion, TSDF, coordinates, depth_to_world
from data_prep_util import *
from matplotlib.cm import get_cmap as colormap
from vPlaneRecover.transforms import NYU40_COLORMAP
PI = np.pi
# for 16: control 3 sigma in 1 voxel in one side (3sigma = 16*3),
# for 8 in 2 voxels in total (3sigma = 5*8)
# for 4 in 5 voxels in total (3sigma = 11*4)
# STD = {16: 1 , 8: 2, 4: 5 }
ADOPT_THRES = 0.03
NORM_THRES = np.cos(np.deg2rad(30))
def fuse_scene(path, path_meta, scene, voxel_size, trunc_ratio=3, max_depth=3,
vol_prcnt=.995, vol_margin=1.5, fuse_semseg=False, device=0,
verbose=2):
""" Use TSDF fusion with GT depth maps to generate GT TSDFs
Args:
path_meta: path to save the TSDFs
(we recommend creating a parallel directory structure to save
derived data so that we don't modify the original dataset)
scene: name of scene to process
voxel_size: voxel size of TSDF
trunc_ratio: truncation distance in voxel units
max_depth: mask out large depth values since they are noisy
vol_prcnt: for computing the bounding volume of the TSDF... ignore outliers
vol_margin: padding for computing bounding volume of the TSDF
fuse_semseg: whether to accumulate semseg images for GT semseg
(prefered method is to not accumulate and insted transfer labels
from ground truth labeled mesh)
device: cpu/ which gpu
verbose: how much logging to print
Returns:
writes a TSDF (.npz) file into path_meta/scene
Notes: we use a conservative value of max_depth=3 to reduce noise in the
ground truth. However, this means some distant data is missing which can
create artifacts. Nevertheless, we found we acheived the best 2d metrics
with the less noisy ground truth.
"""
if verbose>0:
print('fusing', scene, 'voxel size', voxel_size)
info_file = os.path.join(path_meta, scene, 'info.json')
# get gpu device for this worker
device = torch.device('cuda', device) # gpu for this process
# get the dataset
transform = transforms.Compose([transforms.ResizeImage((640,480)),
transforms.ToTensor(),
transforms.InstanceToSemseg('nyu40'),
transforms.IntrinsicsPoseToProjection(),
])
frame_types=['depth', 'semseg'] if fuse_semseg else ['depth']
dataset = SceneDataset(info_file, transform, frame_types)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=None,
batch_sampler=None, num_workers=4)
# find volume bounds and origin by backprojecting depth maps to point clouds
# use a subset of the frames to save time
if len(dataset)<=200:
dataset1 = dataset
else:
inds = np.linspace(0,len(dataset)-1,200).astype(np.int) # start:0 end: end, uniformly sample 200
dataset1 = torch.utils.data.Subset(dataset, inds)
dataloader1 = torch.utils.data.DataLoader(dataset1, batch_size=None,
batch_sampler=None, num_workers=4)
pts = []
for i, frame in enumerate(dataloader1):
projection = frame['projection'].to(device)
depth = frame['depth'].to(device)
depth[depth>max_depth]=0
pts.append( depth_to_world(projection, depth).view(3,-1).T )
pts = torch.cat(pts)
pts = pts[torch.isfinite(pts[:,0])].cpu().numpy()
# use top and bottom vol_prcnt of points plus vol_margin
origin = torch.as_tensor(np.quantile(pts, 1-vol_prcnt, axis=0)-vol_margin).float() # choose the 0.005 lowest point along x axis - 1.5 as orginal
vol_max = torch.as_tensor(np.quantile(pts, vol_prcnt, axis=0)+vol_margin).float() # the most highest point along x axis as end
vol_dim = ((vol_max-origin)/(float(voxel_size)/100)).int().tolist() # the x, y ,z voxel number
# initialize tsdf
tsdf_fusion = TSDFFusion(vol_dim, float(voxel_size)/100, origin,
trunc_ratio, device, label=fuse_semseg)
# integrate frames
for i, frame in enumerate(dataloader):
#' keys in frame : 'file_name_image', 'file_name_depth', 'file_name_instance', 'image', 'depth', 'semseg', 'projection'
if verbose>1 and i%25==0:
print(scene, 'integrating voxel size', voxel_size, i, len(dataset))
projection = frame['projection'].to(device)
image = frame['image'].to(device)
depth = frame['depth'].to(device)
semseg = frame['semseg'].to(device) if fuse_semseg else None
# only use reliable depth
depth[depth>max_depth]=0
# this function do Eq.3,4
tsdf_fusion.integrate(projection, depth, image, semseg)
# save mesh and tsdf
file_name_vol = os.path.join(path_meta, scene, 'tsdf_%02d.npz'%voxel_size)
file_name_mesh = os.path.join(path_meta, scene, 'mesh_%02d.ply'%voxel_size)
tsdf = tsdf_fusion.get_tsdf()
tsdf.save(file_name_vol)
output_meshs = ['color']
meshes = tsdf.get_mesh(output_meshs)
for key in meshes:
if key == 'semseg':
meshes[key].export(file_name_mesh.replace('.ply', '_semseg.ply'))
else:
meshes[key].export(file_name_mesh.replace('.ply', '_%s.ply'%(key)))
# update info json
data = load_info_json(info_file)
data['file_name_vol_%02d'%voxel_size] = file_name_vol
json.dump(data, open(info_file, 'w'))
# torch.cuda.empty_cache()
# use labeled mesh to label surface voxels in tsdf
def label_scene(path_meta, scene, voxel_size, dist_thresh=.05, verbose=2, trunc_ratio=3, device=0):
""" Transfer instance labels from ground truth mesh to TSDF
For each voxel find the nearest vertex and transfer the label if
it is close enough to the voxel.
Args:
path_meta: path to save the TSDFs
(we recommend creating a parallel directory structure to save
derived data so that we don't modify the original dataset)
scene: name of scene to process
voxel_size: voxel size of TSDF to process
dist_thresh: beyond this distance labels are not transferd
verbose: how much logging to print
Returns:
Updates the TSDF (.npz) file with the instance volume
"""
# dist_thresh: beyond this distance to nearest gt mesh vertex,
# voxels are not labeled
# device = torch.device('cuda', 0)
device = torch.device('cuda', device)
if verbose>0:
print('labeling', scene)
info_file = os.path.join(path_meta, scene, 'info.json')
data = load_info_json(info_file)
# each vertex in gt mesh indexs a seg group
segIndices = json.load(open(data['file_name_seg_indices'], 'r'))['segIndices']
# maps seg groups to instances
segGroups = json.load(open(data['file_name_seg_groups'], 'r'))['segGroups']
mapping = {ind:group['id']+1 for group in segGroups for ind in group['segments']}
# get per vertex instance ids (0 is unknown, [1,...] are objects)
n = len(segIndices)
instance_verts = torch.zeros(n, dtype=torch.long)
for i in range(n):
if segIndices[i] in mapping:
instance_verts[i] = mapping[segIndices[i]]
# load vertex locations
mesh = trimesh.load(data['file_name_mesh_gt'], process=False) # if process=True, trimesh will merge vertices and remove NaN values, unacceptable
verts = mesh.vertices
normals = mesh.vertex_normals # the fitting mesh has some face deleted, we use original mesh to get init normal
n_verts = verts.shape[0]
# the plane information is encoded in color mesh, note we reduplicate the verts to fill the gap in pln mesh, should remove them
mesh_plane = trimesh.load(data['file_name_plane_mesh'], process=False)
plane_verts = mesh_plane.vertices[:n_verts]
assert verts.shape == plane_verts.shape, "plane mesh must have same vertices with orignal mesh" # ensure after merge vertices, they are same
colors = mesh_plane.visual.vertex_colors.view(np.ndarray)[:n_verts]
# decode plane info
plane_param = np.load(data['file_name_plane_param'])
plane_ins, plane_id2param, new_plane_verts = map_planes(plane_verts, colors, plane_param, angle_interval=30) # plane_norm, plane_cls,
# construct kdtree of vertices for fast nn lookup
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(verts)
kdtree = o3d.geometry.KDTreeFlann(pcd)
pcd2 = o3d.geometry.PointCloud()
pcd2.points = o3d.utility.Vector3dVector(new_plane_verts)
kdtree_pln = o3d.geometry.KDTreeFlann(pcd2)
# load tsdf volume
tsdf = TSDF.load(data['file_name_vol_%02d'%voxel_size])
coords_voxel = coordinates(tsdf.tsdf_vol.size(), device=torch.device('cpu'))
coords_mesh = coords_voxel.type(torch.float) * tsdf.voxel_size + tsdf.origin.T
mask = tsdf.tsdf_vol.abs().view(-1)<1 #tsdf = max(-1, min(1, sdf/t)),t=0.012 here, if vol == -1 or 1 means invaild
# transfer vertex instance ids to voxels near surface
instance_vol = torch.zeros(len(mask), dtype=torch.long)
normal_vol = get_normal(tsdf.tsdf_vol)
planeIns_vol = torch.zeros(len(mask), dtype=torch.long).to(device)
# assign nearest (toward voxel center) point's attribute as voxel value, except for centroid
for i in mask.nonzero():
_, inds, dist = kdtree.search_knn_vector_3d(coords_mesh[:,i], 1)
if dist[0]< tsdf.voxel_size + 0.1:
instance_vol[i] = instance_verts[inds[0]]
# planeIns_vol[i] = plane_ins[inds[0]]
_, inds, dist = kdtree_pln.search_knn_vector_3d(coords_mesh[:, i], 1)
if dist[0] < (tsdf.voxel_size*3) * trunc_ratio: # large dist and remove later by param filter
planeIns_vol[i] = plane_ins[inds[0]]
# remove inaccurate plane Ins
for i in range(1, plane_id2param.shape[0]):
cur_plane_param = torch.from_numpy(plane_id2param[i]).float()
plane_dist = ( cur_plane_param.to(device) @ coords_mesh[:, planeIns_vol == i].float().to(device) - 1).abs() /\
cur_plane_param.norm().to(device) / trunc_ratio /tsdf.voxel_size
planeIns_vol[planeIns_vol == i][plane_dist >=1] = 0
planeIns_vol = planeIns_vol.cpu()
# planeCls_vol[0, planeIns_vol > 0] = 1 # first digit control if it is a plane
tsdf.attribute_vols['instance'] = instance_vol.view(list(tsdf.tsdf_vol.size())) #update previous saving result
tsdf.attribute_vols['plane_ins'] = planeIns_vol.view(list(tsdf.tsdf_vol.size()))
tsdf.attribute_vols['plane_norm'] = normal_vol#.view(list(tsdf.tsdf_vol.size()) + [3]).permute([3,0,1,2]) #same as color
tsdf.save(data['file_name_vol_%02d'%voxel_size],
addtional_info={'plane_id2param': torch.from_numpy(plane_id2param).float()})
# viz
key = 'vol_%02d'%voxel_size
temp_data = {key:tsdf, 'instances':data['instances'], 'dataset':data['dataset']}
tsdf = transforms.InstanceToSemseg('nyu40')(temp_data)[key]
output_meshes = ['semseg', 'plane_ins'] # 'centroid_prob', 'plane_cls', 'normal',
meshes = tsdf.get_mesh(output_meshes)
fname = data['file_name_vol_%02d' % voxel_size]
for key in meshes:
if key == 'semseg':
meshes[key].export(fname.replace('tsdf', 'mesh').replace('.npz','_semseg.ply'))
if key == 'plane_cls':
meshes[key].export(fname.replace('tsdf', 'mesh').replace('.npz', '_planeCls.ply'))
if key == 'norm':
meshes[key].export(fname.replace('tsdf', 'mesh').replace('.npz', '_normal.ply'))
if key == 'plane_ins':
meshes[key].export(fname.replace('tsdf', 'mesh').replace('.npz', '_planeIns.ply'))
_cmap = np.array(NYU40_COLORMAP)
_mask = planeIns_vol.cpu().view(-1) > 0
colors = np.zeros([coords_mesh.shape[1], 4])
n_plane = (planeIns_vol.max() + 1).item()
if n_plane - 41 > 0:
cmap = (colormap('jet')(np.linspace(0, 1, n_plane - 41))[:, :3] * 255).astype(np.uint8)
cmap = cmap[np.random.permutation(n_plane - 41), :]
plane_color = np.concatenate([_cmap, cmap], axis=0)
else:
plane_color = _cmap
colors[_mask] += np.concatenate([plane_color[planeIns_vol, :],
np.ones([colors.shape[0], 1]) * 255], axis=1)[_mask]
valid_pnt_mask = colors.sum(axis=1) > 0
pld = trimesh.points.PointCloud(vertices=coords_mesh.numpy()[:, valid_pnt_mask].T, colors=colors[valid_pnt_mask].clip(0, 255))
# pld.show() #for debug
pld.export(fname.replace('tsdf', 'mesh').replace('.npz', '_planeIns_pld.ply'))
def prepare_scannet(path, plane_path, path_meta, i=0, n=1, max_depth=3):
""" Create all derived data need for the Scannet dataset
For each scene an info.json file is created containg all meta data required
by the dataloaders. We also create the ground truth TSDFs by fusing the
ground truth TSDFs and add semantic labels
Args:
path: path to the scannet dataset
plane_path: path to plane fitting result
path_meta: path to save all the derived data
(we recommend creating a parallel directory structure so that
we don't modify the original dataset)
i: process id (used for parallel processing)
(this process operates on scenes [i::n])
n: number of processes
max_depth: mask out large depth values since they are noisy
Returns:
Writes files to path_meta
"""
tmp = {0:0, 1:1, 2:2, 3:3}
_scenes = []
valid_scenes = set()
with open('meta_file/filtered_scenes.txt') as f:
for line in f:
valid_scenes.add(line.strip())
_scenes += sorted([os.path.join('scans', scene)
for scene in os.listdir(os.path.join(path, 'scans'))
if scene in valid_scenes])
if i==0:
prepare_scannet_splits(path, path_meta, _scenes)
with open( 'meta_file/scannet_val.txt') as vf:
val_list = [os.path.basename(os.path.dirname(line.strip()))for line in vf]
base_val = set([x[:-3] for x in val_list])
scene_to_process = []
for scene in _scenes:
base_scene = scene[:-3]
if os.path.isdir(path_meta) and os.path.isfile(path_meta + '/%s/mesh_04_planeIns.ply'%scene) and \
os.path.isfile(path_meta + '/%s/mesh_08_planeIns.ply'%scene) and \
os.path.isfile(path_meta + '/%s/mesh_16_planeIns.ply'%scene): continue
if base_scene in base_val and base_val[-2:] != '00':
continue
scene_to_process.append(scene)
scenes = scene_to_process[tmp[i]::n]
for scene in scenes:
if os.path.isdir(path_meta) and os.path.isfile(path_meta + '/%s/mesh_04_planeIns.ply'%scene) and \
os.path.isfile(path_meta + '/%s/mesh_08_planeIns.ply'%scene) and \
os.path.isfile(path_meta + '/%s/mesh_16_planeIns.ply'%scene) :
continue # if the scene is fully processed, ignore
print('========processing {} device {}==========='.format(scene, i))
prepare_scannet_scene(scene, path, plane_path, path_meta, val_list)
for voxel_size in [4,8,16]:
fuse_scene(path, path_meta, scene, voxel_size, device=i%8, max_depth=max_depth, trunc_ratio=3)
if scene.split('/')[0]=='scans':
label_scene(path_meta, scene, voxel_size,trunc_ratio=3, device=i%8)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Fuse ground truth tsdf on Scannet')
parser.add_argument("--path", default= '/data/ScanNet/ScanNet_raw_data/', metavar="DIR", help="path to raw scannet dataset")
parser.add_argument("--plane_path", default='/data/ScanNet/ScanNet_raw_data/video_plane_fitting/merged_err_thres=0.6/', metavar="DIR",
help="path to plane fitting results")
parser.add_argument("--path_meta", default='/data/ScanNet/planeMVS_data2/', metavar="DIR", help="path to store processed (derived) dataset")
parser.add_argument("--dataset", default='scannet', type=str, help="which dataset to prepare")
parser.add_argument('--i', default=0, type=int, help='index of part for parallel processing')
parser.add_argument('--n', default=1, type=int, help='number of parts to devide data into for parallel processing')
parser.add_argument('--max_depth', default=3., type=float, help='mask out large depth values since they are noisy')
args = parser.parse_args()
i=args.i
n=args.n
assert 0<=i and i<n
if not os.path.isdir(args.path_meta) :
os.makedirs(args.path_meta)
if args.dataset == 'scannet':
prepare_scannet(
os.path.join(args.path, 'scannet'),
args.plane_path,
os.path.join(args.path_meta, 'scannet'),
i,
n,
args.max_depth
)
else:
raise NotImplementedError('unknown dataset %s'%args.dataset)