-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdtu_yao.py
173 lines (145 loc) · 6.65 KB
/
dtu_yao.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
from torch.utils.data import Dataset
import numpy as np
import os
from PIL import Image
from datasets.data_io import *
# the DTU dataset preprocessed by Yao Yao (only for training)
class MVSDataset(Dataset):
def __init__(self, datapath, listfile, mode, nviews, ndepths=192, interval_scale=1.06, **kwargs):
super(MVSDataset, self).__init__()
self.datapath = datapath
self.listfile = listfile
self.mode = mode
self.nviews = nviews
self.ndepths = ndepths
self.interval_scale = interval_scale
assert self.mode in ["train", "val", "test"]
self.metas = self.build_list()
def build_list(self):
metas = []
with open(self.listfile) as f:
scans = f.readlines()
scans = [line.rstrip() for line in scans]
# scans
for scan in scans:
pair_file = "Cameras/pair.txt"
# read the pair file
with open(os.path.join(self.datapath, pair_file)) as f:
num_viewpoint = int(f.readline())
# viewpoints (49)
for view_idx in range(num_viewpoint):
ref_view = int(f.readline().rstrip())
src_views = [int(x) for x in f.readline().rstrip().split()[1::2]]
# light conditions 0-6
for light_idx in range(7):
metas.append((scan, light_idx, ref_view, src_views))
print("dataset", self.mode, "metas:", len(metas))
return metas
def __len__(self):
return len(self.metas)
def read_cam_file(self, filename):
with open(filename) as f:
lines = f.readlines()
lines = [line.rstrip() for line in lines]
# extrinsics: line [1,5), 4x4 matrix
extrinsics = np.fromstring(' '.join(lines[1:5]), dtype=np.float32, sep=' ').reshape((4, 4))
# intrinsics: line [7-10), 3x3 matrix
intrinsics = np.fromstring(' '.join(lines[7:10]), dtype=np.float32, sep=' ').reshape((3, 3))
# depth_min & depth_interval: line 11
depth_min = float(lines[11].split()[0])
depth_interval = float(lines[11].split()[1]) * self.interval_scale
return intrinsics, extrinsics, depth_min, depth_interval
def read_img(self, filename):
img = Image.open(filename)
# scale 0~255 to 0~1
np_img = np.array(img, dtype=np.float32) / 255.
return np_img
def read_depth(self, filename):
# read pfm depth file
return np.array(read_pfm(filename)[0], dtype=np.float32)
def __getitem__(self, idx):
meta = self.metas[idx]
scan, light_idx, ref_view, src_views = meta
# use only the reference view and first nviews-1 source views
view_ids = [ref_view] + src_views[:self.nviews - 1]
imgs = []
mask = None
depth = None
depth_values = None
proj_matrices = []
intrinsics_list = []
extrinsics_list = []
for i, vid in enumerate(view_ids):
# NOTE that the id in image file names is from 1 to 49 (not 0~48)
img_filename = os.path.join(self.datapath,
'Rectified/{}_train/rect_{:0>3}_{}_r5000.png'.format(scan, vid + 1, light_idx))
mask_filename = os.path.join(self.datapath, 'Depths/{}_train/depth_visual_{:0>4}.png'.format(scan, vid))
depth_filename = os.path.join(self.datapath, 'Depths/{}_train/depth_map_{:0>4}.pfm'.format(scan, vid))
proj_mat_filename = os.path.join(self.datapath, 'Cameras/train/{:0>8}_cam.txt').format(vid)
imgs.append(self.read_img(img_filename))
intrinsics, extrinsics, depth_min, depth_interval = self.read_cam_file(proj_mat_filename)
intrinsics_list.append(intrinsics)
extrinsics_list.append(extrinsics)
# multiply intrinsics and extrinsics to get projection matrix
proj_mat = extrinsics.copy()
proj_mat[:3, :4] = np.matmul(intrinsics, proj_mat[:3, :4])
proj_matrices.append(proj_mat)
if i == 0: # reference view
depth_values = 1.0/np.linspace(1.0/depth_min, 1.0/(depth_interval * self.ndepths + depth_min), self.ndepths,
dtype=np.float32)
mask = self.read_img(mask_filename)
depth = self.read_depth(depth_filename)
imgs = np.stack(imgs).transpose([0, 3, 1, 2])
proj_matrices = np.stack(proj_matrices)
return {
"imgs": imgs,
"proj_matrices": proj_matrices,
"depth": depth,
"depth_values": depth_values,
"mask": mask,
}
if __name__ == "__main__":
# some testing code, just IGNORE it
dataset = MVSDataset("/home/xyguo/dataset/dtu_mvs/processed/mvs_training/dtu/", '../lists/dtu/train.txt', 'train',
3, 128)
item = dataset[50]
dataset = MVSDataset("/home/xyguo/dataset/dtu_mvs/processed/mvs_training/dtu/", '../lists/dtu/val.txt', 'val', 3,
128)
item = dataset[50]
dataset = MVSDataset("/home/xyguo/dataset/dtu_mvs/processed/mvs_training/dtu/", '../lists/dtu/test.txt', 'test', 5,
128)
item = dataset[50]
# test homography here
print(item.keys())
print("imgs", item["imgs"].shape)
print("depth", item["depth"].shape)
print("depth_values", item["depth_values"].shape)
print("mask", item["mask"].shape)
ref_img = item["imgs"][0].transpose([1, 2, 0])[::4, ::4]
src_imgs = [item["imgs"][i].transpose([1, 2, 0])[::4, ::4] for i in range(1, 5)]
ref_proj_mat = item["proj_matrices"][0]
src_proj_mats = [item["proj_matrices"][i] for i in range(1, 5)]
mask = item["mask"]
depth = item["depth"]
height = ref_img.shape[0]
width = ref_img.shape[1]
xx, yy = np.meshgrid(np.arange(0, width), np.arange(0, height))
print("yy", yy.max(), yy.min())
yy = yy.reshape([-1])
xx = xx.reshape([-1])
X = np.vstack((xx, yy, np.ones_like(xx)))
D = depth.reshape([-1])
print("X", "D", X.shape, D.shape)
X = np.vstack((X * D, np.ones_like(xx)))
X = np.matmul(np.linalg.inv(ref_proj_mat), X)
X = np.matmul(src_proj_mats[0], X)
X /= X[2]
X = X[:2]
yy = X[0].reshape([height, width]).astype(np.float32)
xx = X[1].reshape([height, width]).astype(np.float32)
import cv2
warped = cv2.remap(src_imgs[0], yy, xx, interpolation=cv2.INTER_LINEAR)
warped[mask[:, :] < 0.5] = 0
cv2.imwrite('../tmp0.png', ref_img[:, :, ::-1] * 255)
cv2.imwrite('../tmp1.png', warped[:, :, ::-1] * 255)
cv2.imwrite('../tmp2.png', src_imgs[0][:, :, ::-1] * 255)