Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revisiting #1504 and fix colmap world coordinate transform #2793

Merged
merged 2 commits into from
Jan 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 16 additions & 11 deletions nerfstudio/data/dataparsers/colmap_dataparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@ class ColmapDataParserConfig(DataParserConfig):
"""The method to use to center the poses."""
auto_scale_poses: bool = True
"""Whether to automatically scale the poses to fit in +/- 1 bounding box."""
assume_colmap_world_coordinate_convention: bool = True
"""Colmap optimized world often have y direction of the first camera pointing towards down direction,
while nerfstudio world set z direction to be up direction for viewer. Therefore, we usually need to apply an extra
transform when orientation_method=none. This parameter has no effects if orientation_method is set other than none.
When this parameter is set to False, no extra transform is applied when reading data from colmap.
"""
eval_mode: Literal["fraction", "filename", "interval", "all"] = "interval"
"""
The method to use for splitting the dataset into train and eval.
Expand Down Expand Up @@ -154,10 +160,10 @@ def _get_all_images_and_cameras(self, recon_dir: Path):
c2w = np.linalg.inv(w2c)
# Convert from COLMAP's camera coordinate system (OpenCV) to ours (OpenGL)
c2w[0:3, 1:3] *= -1
# Why do we want to flip Z with a handedness transform?
# See https://github.com/nerfstudio-project/nerfstudio/issues/1504
c2w = c2w[np.array([1, 0, 2, 3]), :]
c2w[2, :] *= -1
if self.config.assume_colmap_world_coordinate_convention:
# world coordinate transform: map colmap gravity guess (-y) to nerfstudio convention (+z)
c2w = c2w[np.array([0, 2, 1, 3]), :]
c2w[2, :] *= -1

frame = {
"file_path": (self.config.data / self.config.images_path / im_data.name).as_posix(),
Expand All @@ -181,12 +187,12 @@ def _get_all_images_and_cameras(self, recon_dir: Path):

out = {}
out["frames"] = frames
# Why do we want to flip Z with a handedness transform?
# See https://github.com/nerfstudio-project/nerfstudio/issues/1504
applied_transform = np.eye(4)[:3, :]
applied_transform = applied_transform[np.array([1, 0, 2]), :]
applied_transform[2, :] *= -1
out["applied_transform"] = applied_transform.tolist()
if self.config.assume_colmap_world_coordinate_convention:
# world coordinate transform: map colmap gravity guess (-y) to nerfstudio convention (+z)
applied_transform = np.eye(4)[:3, :]
applied_transform = applied_transform[np.array([0, 2, 1]), :]
applied_transform[2, :] *= -1
out["applied_transform"] = applied_transform.tolist()
out["camera_model"] = camera_model
assert len(frames) > 0, "No images found in the colmap model"
return out
Expand Down Expand Up @@ -304,7 +310,6 @@ def _generate_dataparser_outputs(self, split: str = "train", **kwargs):
if self.config.auto_scale_poses:
scale_factor /= float(torch.max(torch.abs(poses[:, :3, 3])))
scale_factor *= self.config.scale_factor

poses[:, :3, 3] *= scale_factor

# Choose image_filenames and poses based on split, but after auto orient and scaling the poses.
Expand Down
19 changes: 12 additions & 7 deletions nerfstudio/process_data/colmap_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,7 @@ def colmap_to_json(
camera_mask_path: Optional[Path] = None,
image_id_to_depth_path: Optional[Dict[int, Path]] = None,
image_rename_map: Optional[Dict[str, str]] = None,
keep_original_world_coordinate: bool = False,
) -> int:
"""Converts COLMAP's cameras.bin and images.bin to a JSON file.

Expand All @@ -401,7 +402,9 @@ def colmap_to_json(
camera_mask_path: Path to the camera mask.
image_id_to_depth_path: When including sfm-based depth, embed these depth file paths in the exported json
image_rename_map: Use these image names instead of the names embedded in the COLMAP db

keep_original_world_coordinate: If True, no extra transform will be applied to world coordinate.
Colmap optimized world often have y direction of the first camera pointing towards down direction,
while nerfstudio world set z direction to be up direction for viewer.
Returns:
The number of registered images.
"""
Expand Down Expand Up @@ -430,8 +433,9 @@ def colmap_to_json(
c2w = np.linalg.inv(w2c)
# Convert from COLMAP's camera coordinate system (OpenCV) to ours (OpenGL)
c2w[0:3, 1:3] *= -1
c2w = c2w[np.array([1, 0, 2, 3]), :]
c2w[2, :] *= -1
if not keep_original_world_coordinate:
c2w = c2w[np.array([0, 2, 1, 3]), :]
c2w[2, :] *= -1

name = im_data.name
if image_rename_map is not None:
Expand All @@ -455,10 +459,11 @@ def colmap_to_json(
out = parse_colmap_camera_params(cam_id_to_camera[1])
out["frames"] = frames

applied_transform = np.eye(4)[:3, :]
applied_transform = applied_transform[np.array([1, 0, 2]), :]
applied_transform[2, :] *= -1
out["applied_transform"] = applied_transform.tolist()
if not keep_original_world_coordinate:
applied_transform = np.eye(4)[:3, :]
applied_transform = applied_transform[np.array([0, 2, 1]), :]
applied_transform[2, :] *= -1
out["applied_transform"] = applied_transform.tolist()

with open(output_dir / "transforms.json", "w", encoding="utf-8") as f:
json.dump(out, f, indent=4)
Expand Down
Loading