Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Undistortion should not occur for already-undistorted images #2726

Merged
merged 11 commits into from
Jan 10, 2024
4 changes: 2 additions & 2 deletions nerfstudio/cameras/cameras.py
Original file line number Diff line number Diff line change
Expand Up @@ -986,5 +986,5 @@ def rescale_output_resolution(
self.fy = self.fy * scaling_factor
self.cx = self.cx * scaling_factor
self.cy = self.cy * scaling_factor
self.height = (self.height * scaling_factor).to(torch.int64)
self.width = (self.width * scaling_factor).to(torch.int64)
self.height = torch.round(self.height * scaling_factor).to(torch.int64)
self.width = torch.round(self.width * scaling_factor).to(torch.int64)
62 changes: 29 additions & 33 deletions nerfstudio/data/datamanagers/full_images_datamanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,20 +132,20 @@ def cache_images(self, cache_images_option):
continue
distortion_params = camera.distortion_params.numpy()
image = data["image"].numpy()

if camera.camera_type.item() == CameraType.PERSPECTIVE.value:
distortion_params = np.array(
[
distortion_params[0],
distortion_params[1],
distortion_params[4],
distortion_params[5],
distortion_params[2],
distortion_params[3],
0,
0,
]
)
distortion_params = np.array(
[
distortion_params[0],
distortion_params[1],
distortion_params[4],
distortion_params[5],
distortion_params[2],
distortion_params[3],
0,
0,
]
)

if camera.camera_type.item() == CameraType.PERSPECTIVE.value and np.any(distortion_params):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change wouldn't be compatible with other features (mask, depth_image). I would suggest to check np.any(distortion_params) and if False, return the original K and image without triggering cv2 functions. Something like below should work

if np.any(distortion_params):
    newK, roi = cv2.getOptimalNewCameraMatrix(K, distortion_params, (image.shape[1], image.shape[0]), 0)
    image = cv2.undistort(image, K, distortion_params, None, newK)  # type: ignore
else:
    newK = K
    roi = 0, 0, image.width, image.height

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, I found the following lines are problematic, roi was calculated for undistorted images, not the original image/mask. Should be deleted maybe.

                if "mask" in data:
                    data["mask"] = data["mask"][y : y + h, x : x + w]
                if "depth_image" in data:
                    data["depth_image"] = data["depth_image"][y : y + h, x : x + w]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great catch. Fixed.
Per the other comment - I removed the lines for the mask, because indeed it occurs twice in the code. I left the depth as is.

newK, roi = cv2.getOptimalNewCameraMatrix(K, distortion_params, (image.shape[1], image.shape[0]), 0)
image = cv2.undistort(image, K, distortion_params, None, newK) # type: ignore
# crop the image and update the intrinsics accordingly
Expand Down Expand Up @@ -184,10 +184,7 @@ def cache_images(self, cache_images_option):
mask = cv2.fisheye.undistortImage(mask, K, distortion_params, None, newK)
data["mask"] = torch.from_numpy(mask).bool()
K = newK
else:
raise NotImplementedError("Only perspective and fisheye cameras are supported")
data["image"] = torch.from_numpy(image)

cached_train.append(data)

self.train_dataset.cameras.fx[i] = float(K[0, 0])
Expand All @@ -206,20 +203,20 @@ def cache_images(self, cache_images_option):
continue
distortion_params = camera.distortion_params.numpy()
image = data["image"].numpy()

if camera.camera_type.item() == CameraType.PERSPECTIVE.value:
distortion_params = np.array(
[
distortion_params[0],
distortion_params[1],
distortion_params[4],
distortion_params[5],
distortion_params[2],
distortion_params[3],
0,
0,
]
)
distortion_params = np.array(
[
distortion_params[0],
distortion_params[1],
distortion_params[4],
distortion_params[5],
distortion_params[2],
distortion_params[3],
0,
0,
]
)

if camera.camera_type.item() == CameraType.PERSPECTIVE.value and np.any(distortion_params):
newK, roi = cv2.getOptimalNewCameraMatrix(K, distortion_params, (image.shape[1], image.shape[0]), 0)
image = cv2.undistort(image, K, distortion_params, None, newK) # type: ignore
# crop the image and update the intrinsics accordingly
Expand Down Expand Up @@ -254,8 +251,7 @@ def cache_images(self, cache_images_option):
mask = cv2.fisheye.undistortImage(mask, K, distortion_params, None, newK)
data["mask"] = torch.from_numpy(mask).bool()
K = newK
else:
raise NotImplementedError("Only perspective and fisheye cameras are supported")

data["image"] = torch.from_numpy(image)

cached_eval.append(data)
Expand Down
2 changes: 1 addition & 1 deletion nerfstudio/data/dataparsers/colmap_dataparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ def _downscale_images(self, paths, get_fname, downscale_factor: int, nearest_nei
path_out.parent.mkdir(parents=True, exist_ok=True)
ffmpeg_cmd = [
f'ffmpeg -y -noautorotate -i "{path}" ',
f"-q:v 2 -vf scale=iw/{downscale_factor}:ih/{downscale_factor}{nn_flag} ",
f"-q:v 2 -vf scale=round(iw/{downscale_factor}):round(ih/{downscale_factor}){nn_flag} ",
f'"{path_out}"',
]
ffmpeg_cmd = " ".join(ffmpeg_cmd)
Expand Down
6 changes: 3 additions & 3 deletions nerfstudio/process_data/process_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def convert_video_to_images(

spacing = num_frames // num_frames_target

downscale_chains = [f"[t{i}]scale=iw/{2**i}:ih/{2**i}[out{i}]" for i in range(num_downscales + 1)]
downscale_chains = [f"[t{i}]scale=round(iw/{2**i}):round(ih/{2**i})[out{i}]" for i in range(num_downscales + 1)]
downscale_dirs = [Path(str(image_dir) + (f"_{2**i}" if i > 0 else "")) for i in range(num_downscales + 1)]
downscale_paths = [downscale_dirs[i] / f"{image_prefix}%05d.png" for i in range(num_downscales + 1)]

Expand Down Expand Up @@ -280,7 +280,7 @@ def copy_images_list(
copied_image_paths.append(copied_image_path)

nn_flag = "" if not nearest_neighbor else ":flags=neighbor"
downscale_chains = [f"[t{i}]scale=iw/{2**i}:ih/{2**i}{nn_flag}[out{i}]" for i in range(num_downscales + 1)]
downscale_chains = [f"[t{i}]scale=round(iw/{2**i}):round(ih/{2**i}){nn_flag}[out{i}]" for i in range(num_downscales + 1)]
downscale_dirs = [Path(str(image_dir) + (f"_{2**i}" if i > 0 else "")) for i in range(num_downscales + 1)]

for dir in downscale_dirs:
Expand Down Expand Up @@ -462,7 +462,7 @@ def downscale_images(
nn_flag = "" if not nearest_neighbor else ":flags=neighbor"
ffmpeg_cmd = [
f'ffmpeg -y -noautorotate -i "{image_dir / filename}" ',
f"-q:v 2 -vf scale=iw/{downscale_factor}:ih/{downscale_factor}{nn_flag} ",
f"-q:v 2 -vf scale=round(iw/{downscale_factor}):round(ih/{downscale_factor}){nn_flag} ",
f'"{downscale_dir / filename}"',
]
ffmpeg_cmd = " ".join(ffmpeg_cmd)
Expand Down
Loading