Skip to content

Commit

Permalink
Revisiting #1504 and fix colmap world coordinate transform (#2793)
Browse files Browse the repository at this point in the history
fix colmap world coordinate transform
  • Loading branch information
jb-ye authored Jan 19, 2024
1 parent 46847ab commit 25a00a0
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 18 deletions.
27 changes: 16 additions & 11 deletions nerfstudio/data/dataparsers/colmap_dataparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@ class ColmapDataParserConfig(DataParserConfig):
"""The method to use to center the poses."""
auto_scale_poses: bool = True
"""Whether to automatically scale the poses to fit in +/- 1 bounding box."""
assume_colmap_world_coordinate_convention: bool = True
"""Colmap optimized world often have y direction of the first camera pointing towards down direction,
while nerfstudio world set z direction to be up direction for viewer. Therefore, we usually need to apply an extra
transform when orientation_method=none. This parameter has no effects if orientation_method is set other than none.
When this parameter is set to False, no extra transform is applied when reading data from colmap.
"""
eval_mode: Literal["fraction", "filename", "interval", "all"] = "interval"
"""
The method to use for splitting the dataset into train and eval.
Expand Down Expand Up @@ -154,10 +160,10 @@ def _get_all_images_and_cameras(self, recon_dir: Path):
c2w = np.linalg.inv(w2c)
# Convert from COLMAP's camera coordinate system (OpenCV) to ours (OpenGL)
c2w[0:3, 1:3] *= -1
# Why do we want to flip Z with a handedness transform?
# See https://github.com/nerfstudio-project/nerfstudio/issues/1504
c2w = c2w[np.array([1, 0, 2, 3]), :]
c2w[2, :] *= -1
if self.config.assume_colmap_world_coordinate_convention:
# world coordinate transform: map colmap gravity guess (-y) to nerfstudio convention (+z)
c2w = c2w[np.array([0, 2, 1, 3]), :]
c2w[2, :] *= -1

frame = {
"file_path": (self.config.data / self.config.images_path / im_data.name).as_posix(),
Expand All @@ -181,12 +187,12 @@ def _get_all_images_and_cameras(self, recon_dir: Path):

out = {}
out["frames"] = frames
# Why do we want to flip Z with a handedness transform?
# See https://github.com/nerfstudio-project/nerfstudio/issues/1504
applied_transform = np.eye(4)[:3, :]
applied_transform = applied_transform[np.array([1, 0, 2]), :]
applied_transform[2, :] *= -1
out["applied_transform"] = applied_transform.tolist()
if self.config.assume_colmap_world_coordinate_convention:
# world coordinate transform: map colmap gravity guess (-y) to nerfstudio convention (+z)
applied_transform = np.eye(4)[:3, :]
applied_transform = applied_transform[np.array([0, 2, 1]), :]
applied_transform[2, :] *= -1
out["applied_transform"] = applied_transform.tolist()
out["camera_model"] = camera_model
assert len(frames) > 0, "No images found in the colmap model"
return out
Expand Down Expand Up @@ -304,7 +310,6 @@ def _generate_dataparser_outputs(self, split: str = "train", **kwargs):
if self.config.auto_scale_poses:
scale_factor /= float(torch.max(torch.abs(poses[:, :3, 3])))
scale_factor *= self.config.scale_factor

poses[:, :3, 3] *= scale_factor

# Choose image_filenames and poses based on split, but after auto orient and scaling the poses.
Expand Down
19 changes: 12 additions & 7 deletions nerfstudio/process_data/colmap_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,7 @@ def colmap_to_json(
camera_mask_path: Optional[Path] = None,
image_id_to_depth_path: Optional[Dict[int, Path]] = None,
image_rename_map: Optional[Dict[str, str]] = None,
keep_original_world_coordinate: bool = False,
) -> int:
"""Converts COLMAP's cameras.bin and images.bin to a JSON file.
Expand All @@ -401,7 +402,9 @@ def colmap_to_json(
camera_mask_path: Path to the camera mask.
image_id_to_depth_path: When including sfm-based depth, embed these depth file paths in the exported json
image_rename_map: Use these image names instead of the names embedded in the COLMAP db
keep_original_world_coordinate: If True, no extra transform will be applied to world coordinate.
Colmap optimized world often have y direction of the first camera pointing towards down direction,
while nerfstudio world set z direction to be up direction for viewer.
Returns:
The number of registered images.
"""
Expand Down Expand Up @@ -430,8 +433,9 @@ def colmap_to_json(
c2w = np.linalg.inv(w2c)
# Convert from COLMAP's camera coordinate system (OpenCV) to ours (OpenGL)
c2w[0:3, 1:3] *= -1
c2w = c2w[np.array([1, 0, 2, 3]), :]
c2w[2, :] *= -1
if not keep_original_world_coordinate:
c2w = c2w[np.array([0, 2, 1, 3]), :]
c2w[2, :] *= -1

name = im_data.name
if image_rename_map is not None:
Expand All @@ -455,10 +459,11 @@ def colmap_to_json(
out = parse_colmap_camera_params(cam_id_to_camera[1])
out["frames"] = frames

applied_transform = np.eye(4)[:3, :]
applied_transform = applied_transform[np.array([1, 0, 2]), :]
applied_transform[2, :] *= -1
out["applied_transform"] = applied_transform.tolist()
if not keep_original_world_coordinate:
applied_transform = np.eye(4)[:3, :]
applied_transform = applied_transform[np.array([0, 2, 1]), :]
applied_transform[2, :] *= -1
out["applied_transform"] = applied_transform.tolist()

with open(output_dir / "transforms.json", "w", encoding="utf-8") as f:
json.dump(out, f, indent=4)
Expand Down

0 comments on commit 25a00a0

Please sign in to comment.