From ead9b37c658708af7971871839c6fbf613d8092d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Kulh=C3=A1nek?= Date: Mon, 11 Sep 2023 09:50:18 +0200 Subject: [PATCH 001/101] Allow colmap parser to load 3D points (#2408) --- .../data/dataparsers/colmap_dataparser.py | 78 ++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/nerfstudio/data/dataparsers/colmap_dataparser.py b/nerfstudio/data/dataparsers/colmap_dataparser.py index a8a993d305..046f6d5d35 100644 --- a/nerfstudio/data/dataparsers/colmap_dataparser.py +++ b/nerfstudio/data/dataparsers/colmap_dataparser.py @@ -71,6 +71,10 @@ class ColmapDataParserConfig(DataParserConfig): """Path to depth maps directory. If not set, depths are not loaded.""" colmap_path: Path = Path("sparse/0") """Path to the colmap reconstruction directory relative to the data path.""" + load_3D_points: bool = True + """Whether to load the 3D points from the colmap reconstruction.""" + max_2D_matches_per_3D_point: int = -1 + """Maximum number of 2D matches per 3D point. If set to -1, all 2D matches are loaded. If set to 0, no 2D matches are loaded.""" class ColmapDataParser(DataParser): @@ -202,7 +206,7 @@ def _get_image_indices(self, image_filenames, split): raise ValueError(f"Unknown dataparser split {split}") return indices - def _generate_dataparser_outputs(self, split: str = "train"): + def _generate_dataparser_outputs(self, split: str = "train", **kwargs): assert self.config.data.exists(), f"Data directory {self.config.data} does not exist." colmap_path = self.config.data / self.config.colmap_path assert colmap_path.exists(), f"Colmap path {colmap_path} does not exist." @@ -328,6 +332,11 @@ def _generate_dataparser_outputs(self, split: str = "train"): applied_scale = float(meta["applied_scale"]) scale_factor *= applied_scale + metadata = {} + if self.config.load_3D_points: + # Load 3D points + metadata.update(self._load_3D_points(colmap_path, transform_matrix, scale_factor)) + dataparser_outputs = DataparserOutputs( image_filenames=image_filenames, cameras=cameras, @@ -338,10 +347,77 @@ def _generate_dataparser_outputs(self, split: str = "train"): metadata={ "depth_filenames": depth_filenames if len(depth_filenames) > 0 else None, "depth_unit_scale_factor": self.config.depth_unit_scale_factor, + **metadata, }, ) return dataparser_outputs + def _load_3D_points(self, colmap_path: Path, transform_matrix: torch.Tensor, scale_factor: float): + if (colmap_path / "points3D.bin").exists(): + colmap_points = colmap_utils.read_points3D_binary(colmap_path / "points3D.bin") + elif (colmap_path / "points3D.txt").exists(): + colmap_points = colmap_utils.read_points3D_text(colmap_path / "points3D.txt") + else: + raise ValueError(f"Could not find points3D.txt or points3D.bin in {colmap_path}") + points3D = torch.from_numpy(np.array([p.xyz for p in colmap_points.values()], dtype=np.float32)) + points3D = ( + torch.cat( + ( + points3D, + torch.ones_like(points3D[..., :1]), + ), + -1, + ) + @ transform_matrix.T + ) + points3D *= scale_factor + + # Load point colours + points3D_rgb = torch.from_numpy(np.array([p.rgb for p in colmap_points.values()], dtype=np.uint8)) + points3D_num_points = torch.tensor([len(p.image_ids) for p in colmap_points.values()], dtype=torch.int64) + out = { + "points3D_xyz": points3D, + "points3D_rgb": points3D_rgb, + "points3D_error": torch.from_numpy(np.array([p.error for p in colmap_points.values()], dtype=np.float32)), + "points3D_num_points": points3D_num_points, + } + if self.config.max_2D_matches_per_3D_point != 0: + if (colmap_path / "images.txt").exists(): + im_id_to_image = colmap_utils.read_images_text(colmap_path / "images.txt") + elif (colmap_path / "images.bin").exists(): + im_id_to_image = colmap_utils.read_images_binary(colmap_path / "images.bin") + else: + raise ValueError(f"Could not find images.txt or images.bin in {colmap_path}") + downscale_factor = self._downscale_factor + max_num_points = int(torch.max(points3D_num_points).item()) + if self.config.max_2D_matches_per_3D_point > 0: + max_num_points = min(max_num_points, self.config.max_2D_matches_per_3D_point) + points3D_image_ids = [] + points3D_image_xy = [] + for p in colmap_points.values(): + nids = np.array(p.image_ids, dtype=np.int64) + nxy_ids = np.array(p.point2D_idxs, dtype=np.int32) + if self.config.max_2D_matches_per_3D_point != -1: + # Randomly sample 2D matches + idxs = np.argsort(p.error)[: self.config.max_2D_matches_per_3D_point] + nids = nids[idxs] + nxy_ids = nxy_ids[idxs] + nxy = [im_id_to_image[im_id].xys[pt_idx] for im_id, pt_idx in zip(nids, nxy_ids)] + nxy = torch.from_numpy(np.stack(nxy).astype(np.float32)) + nids = torch.from_numpy(nids) + assert len(nids.shape) == 1 + assert len(nxy.shape) == 2 + points3D_image_ids.append( + torch.cat((nids, torch.full((max_num_points - len(nids),), -1, dtype=torch.int64))) + ) + points3D_image_xy.append( + torch.cat((nxy, torch.full((max_num_points - len(nxy), nxy.shape[-1]), 0, dtype=torch.float32))) + / downscale_factor + ) + out["points3D_image_ids"] = torch.stack(points3D_image_ids, dim=0) + out["points3D_image_xy"] = torch.stack(points3D_image_xy, dim=0) + return out + def _setup_downscale_factor( self, image_filenames: List[Path], mask_filenames: List[Path], depth_filenames: List[Path] ): From 570ccf3c0a298160caad72745d13db0c23d45a2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Kulh=C3=A1nek?= Date: Mon, 11 Sep 2023 18:00:44 +0200 Subject: [PATCH 002/101] Rename COLMAP 3D point features (#2417) --- nerfstudio/data/dataparsers/colmap_dataparser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nerfstudio/data/dataparsers/colmap_dataparser.py b/nerfstudio/data/dataparsers/colmap_dataparser.py index 046f6d5d35..2c15258d6d 100644 --- a/nerfstudio/data/dataparsers/colmap_dataparser.py +++ b/nerfstudio/data/dataparsers/colmap_dataparser.py @@ -379,7 +379,7 @@ def _load_3D_points(self, colmap_path: Path, transform_matrix: torch.Tensor, sca "points3D_xyz": points3D, "points3D_rgb": points3D_rgb, "points3D_error": torch.from_numpy(np.array([p.error for p in colmap_points.values()], dtype=np.float32)), - "points3D_num_points": points3D_num_points, + "points3D_num_points2D": points3D_num_points, } if self.config.max_2D_matches_per_3D_point != 0: if (colmap_path / "images.txt").exists(): @@ -415,7 +415,7 @@ def _load_3D_points(self, colmap_path: Path, transform_matrix: torch.Tensor, sca / downscale_factor ) out["points3D_image_ids"] = torch.stack(points3D_image_ids, dim=0) - out["points3D_image_xy"] = torch.stack(points3D_image_xy, dim=0) + out["points3D_points2D_xy"] = torch.stack(points3D_image_xy, dim=0) return out def _setup_downscale_factor( From cc98fb6bd4eb5d14a2a75bfa3dc46a6bc0d2666e Mon Sep 17 00:00:00 2001 From: Maxim Bonnaerens Date: Mon, 11 Sep 2023 20:07:52 +0200 Subject: [PATCH 003/101] Fix benchmarking: apply get_background_color in renderer and set profiler to none (#2397) * apply get_background_color and set profiler to none * Apply override first --------- Co-authored-by: Brent Yi --- nerfstudio/model_components/renderers.py | 17 +++++++---------- .../benchmarking/launch_train_blender.sh | 6 +++--- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/nerfstudio/model_components/renderers.py b/nerfstudio/model_components/renderers.py index 20a506b1ea..1fde0d693c 100644 --- a/nerfstudio/model_components/renderers.py +++ b/nerfstudio/model_components/renderers.py @@ -102,21 +102,18 @@ def combine_rgb( comp_rgb = torch.sum(weights * rgb, dim=-2) accumulated_weight = torch.sum(weights, dim=-2) if BACKGROUND_COLOR_OVERRIDE is not None: - # This case must be before the others or the override is not properly applied - background_color = cls.get_background_color( - BACKGROUND_COLOR_OVERRIDE, shape=comp_rgb.shape, device=comp_rgb.device - ) - elif background_color == "random": + background_color = BACKGROUND_COLOR_OVERRIDE + if background_color == "random": # If background color is random, the predicted color is returned without blending, # as if the background color was black. return comp_rgb - elif background_color == "last_sample": # Note, this is only supported for non-packed samples. background_color = rgb[..., -1, :] + background_color = cls.get_background_color(background_color, shape=comp_rgb.shape, device=comp_rgb.device) assert isinstance(background_color, torch.Tensor) - comp_rgb = comp_rgb + background_color.to(comp_rgb.device) * (1.0 - accumulated_weight) + comp_rgb = comp_rgb + background_color * (1.0 - accumulated_weight) return comp_rgb @classmethod @@ -124,13 +121,13 @@ def get_background_color( cls, background_color: BackgroundColor, shape: Tuple[int, ...], device: torch.device ) -> Union[Float[Tensor, "3"], Float[Tensor, "*bs 3"]]: """Returns the RGB background color for a specified background color. - Note: This function CANNOT be called for background_color being either "last_sample" or "random". Args: - rgb: RGB for each sample. - background_color: The background color specification. + background_color: The background color specification. If a string is provided, it must be a valid color name. + shape: Shape of the output tensor. + device: Device on which to create the tensor. Returns: Background color as RGB. diff --git a/nerfstudio/scripts/benchmarking/launch_train_blender.sh b/nerfstudio/scripts/benchmarking/launch_train_blender.sh index cbdd7d754a..7b9db727db 100644 --- a/nerfstudio/scripts/benchmarking/launch_train_blender.sh +++ b/nerfstudio/scripts/benchmarking/launch_train_blender.sh @@ -17,7 +17,7 @@ while getopts "m:v:s" opt; do m ) method_name="$OPTARG" ;; v ) vis="$OPTARG" ;; s ) single=true ;; - ? ) helpFunction ;; + ? ) helpFunction ;; esac done @@ -83,12 +83,12 @@ for dataset in "${DATASETS[@]}"; do --steps-per-save=1000 \ --max-num-iterations=16500 \ --logging.local-writer.enable=False \ - --logging.enable-profiler=False \ + --logging.profiler="none" \ --vis "${vis}" \ --timestamp "$timestamp" \ ${dataparser} & GPU_PID[$idx]=$! echo "Launched ${method_name} ${dataset} on gpu ${GPU_IDX[$idx]}, ${tag}" - + # update gpu ((idx=(idx+1)%len)) done From e9330d515d6c7c81905e0b61ca3bd1e018d7ecdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Kulh=C3=A1nek?= Date: Wed, 13 Sep 2023 19:55:24 +0200 Subject: [PATCH 004/101] Colmap parser: fix downscale for complex paths (#2425) --- .../data/dataparsers/colmap_dataparser.py | 89 ++++++++++++++----- 1 file changed, 65 insertions(+), 24 deletions(-) diff --git a/nerfstudio/data/dataparsers/colmap_dataparser.py b/nerfstudio/data/dataparsers/colmap_dataparser.py index 2c15258d6d..851a30367f 100644 --- a/nerfstudio/data/dataparsers/colmap_dataparser.py +++ b/nerfstudio/data/dataparsers/colmap_dataparser.py @@ -19,6 +19,7 @@ import sys from dataclasses import dataclass, field from pathlib import Path +from functools import partial from typing import List, Literal, Optional, Type import numpy as np @@ -32,8 +33,8 @@ from nerfstudio.data.scene_box import SceneBox from nerfstudio.data.utils import colmap_parsing_utils as colmap_utils from nerfstudio.process_data.colmap_utils import parse_colmap_camera_params -from nerfstudio.process_data.process_data_utils import downscale_images -from nerfstudio.utils.rich_utils import CONSOLE +from nerfstudio.utils.scripts import run_command +from nerfstudio.utils.rich_utils import CONSOLE, status MAX_AUTO_RESOLUTION = 1600 @@ -139,15 +140,19 @@ def _get_all_images_and_cameras(self, recon_dir: Path): c2w[2, :] *= -1 frame = { - "file_path": (self.config.images_path / im_data.name).as_posix(), + "file_path": (self.config.data / self.config.images_path / im_data.name).as_posix(), "transform_matrix": c2w, "colmap_im_id": im_id, } frame.update(cameras[im_data.camera_id]) if self.config.masks_path is not None: - frame["mask_path"] = ((self.config.masks_path / im_data.name).with_suffix(".png").as_posix(),) + frame["mask_path"] = ( + (self.config.data / self.config.masks_path / im_data.name).with_suffix(".png").as_posix(), + ) if self.config.depths_path is not None: - frame["depth_path"] = ((self.config.depths_path / im_data.name).with_suffix(".png").as_posix(),) + frame["depth_path"] = ( + (self.config.data / self.config.depths_path / im_data.name).with_suffix(".png").as_posix(), + ) frames.append(frame) if camera_model is not None: assert camera_model == frame["camera_model"], "Multiple camera models are not supported" @@ -175,7 +180,7 @@ def _get_image_indices(self, image_filenames, split): with (self.config.data / f"{split}_list.txt").open("r", encoding="utf8") as f: filenames = f.read().splitlines() # Validate split first - split_filenames = set(self.config.images_path / x for x in filenames) + split_filenames = set(self.config.data / self.config.images_path / x for x in filenames) unmatched_filenames = split_filenames.difference(image_filenames) if unmatched_filenames: raise RuntimeError( @@ -418,6 +423,25 @@ def _load_3D_points(self, colmap_path: Path, transform_matrix: torch.Tensor, sca out["points3D_points2D_xy"] = torch.stack(points3D_image_xy, dim=0) return out + def _downscale_images(self, paths, get_fname, downscale_factor: int, nearest_neighbor: bool = False): + with status(msg="[bold yellow]Downscaling images...", spinner="growVertical"): + assert downscale_factor > 1 + assert isinstance(downscale_factor, int) + # Using %05d ffmpeg commands appears to be unreliable (skips images). + for path in paths: + nn_flag = "" if not nearest_neighbor else ":flags=neighbor" + path_out = get_fname(path) + path_out.parent.mkdir(parents=True, exist_ok=True) + ffmpeg_cmd = [ + f'ffmpeg -y -noautorotate -i "{path}" ', + f"-q:v 2 -vf scale=iw/{downscale_factor}:ih/{downscale_factor}{nn_flag} ", + f'"{path_out}"', + ] + ffmpeg_cmd = " ".join(ffmpeg_cmd) + run_command(ffmpeg_cmd) + + CONSOLE.log("[bold green]:tada: Done downscaling images.") + def _setup_downscale_factor( self, image_filenames: List[Path], mask_filenames: List[Path], depth_filenames: List[Path] ): @@ -425,17 +449,16 @@ def _setup_downscale_factor( Setup the downscale factor for the dataset. This is used to downscale the images and cameras. """ - def get_fname(filepath: Path) -> Path: + def get_fname(parent: Path, filepath: Path) -> Path: """Returns transformed file name when downscale factor is applied""" - parts = list(filepath.parts) - parts[-2] += f"_{self._downscale_factor}" - filepath = Path(*parts) - return self.config.data / filepath + rel_part = filepath.relative_to(parent) + base_part = parent.parent / (str(parent.name) + f"_{self._downscale_factor}") + return base_part / rel_part filepath = next(iter(image_filenames)) if self._downscale_factor is None: if self.config.downscale_factor is None: - test_img = Image.open(self.config.data / filepath) + test_img = Image.open(filepath) h, w = test_img.size max_res = max(h, w) df = 0 @@ -448,7 +471,9 @@ def get_fname(filepath: Path) -> Path: CONSOLE.log(f"Using image downscale factor of {self._downscale_factor}") else: self._downscale_factor = self.config.downscale_factor - if self._downscale_factor > 1 and not all(get_fname(fp).parent.exists() for fp in image_filenames): + if self._downscale_factor > 1 and not all( + get_fname(self.config.data / self.config.images_path, fp).parent.exists() for fp in image_filenames + ): # Downscaled images not found # Ask if user wants to downscale the images automatically here CONSOLE.print( @@ -456,23 +481,39 @@ def get_fname(filepath: Path) -> Path: ) if Confirm.ask("\nWould you like to downscale the images now?", default=False, console=CONSOLE): # Install the method - image_dir = self.config.data / image_filenames[0].parent - num_downscales = int(math.log2(self._downscale_factor)) - assert 2**num_downscales == self._downscale_factor, "Downscale factor must be a power of 2" - downscale_images(image_dir, num_downscales, folder_name=image_dir.name, nearest_neighbor=False) + self._downscale_images( + image_filenames, + partial(get_fname, self.config.data / self.config.images_path), + self._downscale_factor, + nearest_neighbor=False, + ) if len(mask_filenames) > 0: - mask_dir = mask_filenames[0].parent - downscale_images(mask_dir, num_downscales, folder_name=mask_dir.name, nearest_neighbor=True) + assert self.config.masks_path is not None + self._downscale_images( + mask_filenames, + partial(get_fname, self.config.data / self.config.masks_path), + self._downscale_factor, + nearest_neighbor=True, + ) if len(depth_filenames) > 0: - depth_dir = depth_filenames[0].parent - downscale_images(depth_dir, num_downscales, folder_name=depth_dir.name, nearest_neighbor=False) + assert self.config.depths_path is not None + self._downscale_images( + depth_filenames, + partial(get_fname, self.config.data / self.config.depths_path), + self._downscale_factor, + nearest_neighbor=True, + ) else: sys.exit(1) # Return transformed filenames if self._downscale_factor > 1: - image_filenames = [get_fname(fp) for fp in image_filenames] - mask_filenames = [get_fname(fp) for fp in mask_filenames] - depth_filenames = [get_fname(fp) for fp in depth_filenames] + image_filenames = [get_fname(self.config.data / self.config.images_path, fp) for fp in image_filenames] + if len(mask_filenames) > 0: + assert self.config.masks_path is not None + mask_filenames = [get_fname(self.config.data / self.config.masks_path, fp) for fp in mask_filenames] + if len(depth_filenames) > 0: + assert self.config.depths_path is not None + depth_filenames = [get_fname(self.config.data / self.config.depths_path, fp) for fp in depth_filenames] assert isinstance(self._downscale_factor, int) return image_filenames, mask_filenames, depth_filenames, self._downscale_factor From 805e6e702d328bf6fc2bf9721cbb0a4eedf03a35 Mon Sep 17 00:00:00 2001 From: anc2001 <71151378+anc2001@users.noreply.github.com> Date: Thu, 14 Sep 2023 09:11:15 -0700 Subject: [PATCH 005/101] fix pixel sampling with masks issue when data is list (#2369) Co-authored-by: Brent Yi --- nerfstudio/data/pixel_samplers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nerfstudio/data/pixel_samplers.py b/nerfstudio/data/pixel_samplers.py index b852c31b19..463d08294c 100644 --- a/nerfstudio/data/pixel_samplers.py +++ b/nerfstudio/data/pixel_samplers.py @@ -215,7 +215,7 @@ def collate_image_dataset_batch_list(self, batch: Dict, num_rays_per_batch: int, num_rays_in_batch = num_rays_per_batch - (num_images - 1) * num_rays_in_batch indices = self.sample_method( - num_rays_in_batch, 1, image_height, image_width, mask=batch["mask"][i], device=device + num_rays_in_batch, 1, image_height, image_width, mask=batch["mask"][i].unsqueeze(0), device=device ) indices[:, 0] = i all_indices.append(indices) From 7269a7ee1639cdb6d287988a648f53075ca8e1ea Mon Sep 17 00:00:00 2001 From: Sid Mehta Date: Fri, 15 Sep 2023 09:21:09 -0700 Subject: [PATCH 006/101] Add Comet Logging (#2431) * initial commit * fixed naming * added comet_ml as depedency * added comet to docs * Update custom_gui.md * Update models.md * Update benchmarking.md * added Comet to ReadMe * added decorator * fixed link and added comet examples * fixed link and added comet example * fixed format * fixed type ignore error * fixed readme typo * fixed typo --- README.md | 2 +- .../debugging_tools/benchmarking.md | 2 +- docs/developer_guides/pipelines/models.md | 2 +- docs/developer_guides/viewer/custom_gui.md | 2 +- docs/quickstart/first_nerf.md | 4 +- nerfstudio/configs/experiment_config.py | 15 ++++--- nerfstudio/engine/trainer.py | 11 ++--- nerfstudio/utils/decorators.py | 2 +- nerfstudio/utils/writer.py | 42 ++++++++++++++++--- pyproject.toml | 1 + 10 files changed, 57 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index aa5825a23a..b6f05f9be0 100644 --- a/README.md +++ b/README.md @@ -274,7 +274,7 @@ ns-train nerfacto --help ### Tensorboard / WandB / Viewer -We support three different methods to track training progress, using the viewer, [tensorboard](https://www.tensorflow.org/tensorboard), and [Weights and Biases](https://wandb.ai/site). You can specify which visualizer to use by appending `--vis {viewer, tensorboard, wandb, viewer+wandb, viewer+tensorboard}` to the training command. Simultaneously utilizing the viewer alongside wandb or tensorboard may cause stuttering issues during evaluation steps. The viewer only works for methods that are fast (ie. nerfacto, instant-ngp), for slower methods like NeRF, use the other loggers. +We support four different methods to track training progress, using the viewer[tensorboard](https://www.tensorflow.org/tensorboard), [Weights and Biases](https://wandb.ai/site), and ,[Comet](https://comet.com/?utm_source=nerf&utm_medium=referral&utm_content=github). You can specify which visualizer to use by appending `--vis {viewer, tensorboard, wandb, comet viewer+wandb, viewer+tensorboard, viewer+comet}` to the training command. Simultaneously utilizing the viewer alongside wandb or tensorboard may cause stuttering issues during evaluation steps. The viewer only works for methods that are fast (ie. nerfacto, instant-ngp), for slower methods like NeRF, use the other loggers. # Learn More diff --git a/docs/developer_guides/debugging_tools/benchmarking.md b/docs/developer_guides/debugging_tools/benchmarking.md index b38c09277c..bf97db4b5c 100644 --- a/docs/developer_guides/debugging_tools/benchmarking.md +++ b/docs/developer_guides/debugging_tools/benchmarking.md @@ -16,7 +16,7 @@ Simply replace the arguments in brackets with the correct arguments. - `-m {METHOD_NAME}`: Name of the method you want to benchmark (e.g. `nerfacto`, `mipnerf`). - `-s`: Launch a single job per GPU. -- `-v {VIS}`: Use another visualization than wandb, which is the default. Only other option is tensorboard. +- `-v {VIS}`: Use another visualization than wandb, which is the default. Other options are comet & tensorboard. - `{GPU_LIST}`: (optional) Specify the list of gpus you want to use on your machine space separated. for instance, if you want to use GPU's 0-3, you will need to pass in `0 1 2 3`. If left empty, the script will automatically find available GPU's and distribute training jobs on the available GPUs. :::{admonition} Tip diff --git a/docs/developer_guides/pipelines/models.md b/docs/developer_guides/pipelines/models.md index 8afa76d3a2..4fb1cb3f24 100644 --- a/docs/developer_guides/pipelines/models.md +++ b/docs/developer_guides/pipelines/models.md @@ -55,7 +55,7 @@ class Model: """Process a RayBundle object and return RayOutputs describing quanties for each ray.""" def get_metrics_dict(self, outputs, batch): - """Returns metrics dictionary which will be plotted with wandb or tensorboard.""" + """Returns metrics dictionary which will be plotted with comet, wandb or tensorboard.""" def get_loss_dict(self, outputs, batch, metrics_dict=None): """Returns a dictionary of losses to be summed which will be your loss.""" diff --git a/docs/developer_guides/viewer/custom_gui.md b/docs/developer_guides/viewer/custom_gui.md index e11381d020..2ae41fa6ac 100644 --- a/docs/developer_guides/viewer/custom_gui.md +++ b/docs/developer_guides/viewer/custom_gui.md @@ -59,7 +59,7 @@ class MyModel(Model): **Writing to the element** -You can write to a viewer element in Python, which provides a convenient way to track values in your code without the need for wandb/tensorboard or relying on `print` statements. +You can write to a viewer element in Python, which provides a convenient way to track values in your code without the need for comet/wandb/tensorboard or relying on `print` statements. ```python self.custom_value.value = x diff --git a/docs/quickstart/first_nerf.md b/docs/quickstart/first_nerf.md index a88183d9e4..137bd0b235 100644 --- a/docs/quickstart/first_nerf.md +++ b/docs/quickstart/first_nerf.md @@ -96,9 +96,9 @@ ns-train nerfacto nerfstudio-data --help Each script will have some other minor quirks (like the training script dataparser subcommand needing to come after the model subcommand), read up on them [here](../reference/cli/index.md). -## Tensorboard / WandB / Viewer +## Comet / Tensorboard / WandB / Viewer -We support three different methods to track training progress, using the viewer, [tensorboard](https://www.tensorflow.org/tensorboard), and [Weights and Biases](https://wandb.ai/site). You can specify which visualizer to use by appending `--vis {viewer, tensorboard, wandb, viewer+wandb, viewer+tensorboard}` to the training command. Simultaneously utilizing the viewer alongside wandb or tensorboard may cause stuttering issues during evaluation steps. The viewer only works for methods that are fast (ie. nerfacto, instant-ngp), for slower methods like NeRF, use the other loggers. +We support four different methods to track training progress, using the viewer [tensorboard](https://www.tensorflow.org/tensorboard), [Weights and Biases](https://wandb.ai/site), and [Comet](https://comet.com/?utm_source=nerf&utm_medium=referral&utm_content=nerf_docs). You can specify which visualizer to use by appending `--vis {viewer, tensorboard, wandb, viewer+wandb, viewer+tensorboard, viewer+comet}` to the training command. Simultaneously utilizing the viewer alongside wandb or tensorboard may cause stuttering issues during evaluation steps. The viewer only works for methods that are fast (ie. nerfacto, instant-ngp), for slower methods like NeRF, use the other loggers. ## Evaluating Runs diff --git a/nerfstudio/configs/experiment_config.py b/nerfstudio/configs/experiment_config.py index 1fa3598f6d..0da8893ed1 100644 --- a/nerfstudio/configs/experiment_config.py +++ b/nerfstudio/configs/experiment_config.py @@ -22,13 +22,7 @@ from typing import Any, Dict, Literal, Optional import yaml - -from nerfstudio.configs.base_config import ( - InstantiateConfig, - LoggingConfig, - MachineConfig, - ViewerConfig, -) +from nerfstudio.configs.base_config import InstantiateConfig, LoggingConfig, MachineConfig, ViewerConfig from nerfstudio.configs.config_utils import to_immutable_dict from nerfstudio.engine.optimizers import OptimizerConfig from nerfstudio.engine.schedulers import SchedulerConfig @@ -68,7 +62,9 @@ class ExperimentConfig(InstantiateConfig): } ) """Dictionary of optimizer groups and their schedulers""" - vis: Literal["viewer", "wandb", "tensorboard", "viewer+wandb", "viewer+tensorboard", "viewer_beta"] = "wandb" + vis: Literal[ + "viewer", "wandb", "tensorboard", "comet", "viewer+wandb", "viewer+tensorboard", "viewer+comet", "viewer_beta" + ] = "wandb" """Which visualizer to use.""" data: Optional[Path] = None """Alias for --pipeline.datamanager.data""" @@ -95,6 +91,9 @@ def is_tensorboard_enabled(self) -> bool: """Checks if tensorboard is enabled.""" return ("tensorboard" == self.vis) | ("viewer+tensorboard" == self.vis) + def is_comet_enabled(self) -> bool: + return ("comet" == self.vis) | ("viewer+comet" == self.vis) + def set_timestamp(self) -> None: """Dynamically set the experiment timestamp""" if self.timestamp == "{timestamp}": diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py index 94256809cf..c8ae500433 100644 --- a/nerfstudio/engine/trainer.py +++ b/nerfstudio/engine/trainer.py @@ -27,11 +27,6 @@ from typing import Dict, List, Literal, Optional, Tuple, Type, cast import torch -from rich import box, style -from rich.panel import Panel -from rich.table import Table -from torch.cuda.amp.grad_scaler import GradScaler - from nerfstudio.configs.experiment_config import ExperimentConfig from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes, TrainingCallbackLocation @@ -44,6 +39,10 @@ from nerfstudio.utils.writer import EventName, TimeWriter from nerfstudio.viewer.server.viewer_state import ViewerState from nerfstudio.viewer_beta.viewer import Viewer as ViewerBetaState +from rich import box, style +from rich.panel import Panel +from rich.table import Table +from torch.cuda.amp.grad_scaler import GradScaler TRAIN_INTERATION_OUTPUT = Tuple[torch.Tensor, Dict[str, torch.Tensor], Dict[str, torch.Tensor]] TORCH_DEVICE = str @@ -197,6 +196,7 @@ def setup(self, test_mode: Literal["test", "val", "inference"] = "val") -> None: writer.setup_event_writer( self.config.is_wandb_enabled(), self.config.is_tensorboard_enabled(), + self.config.is_comet_enabled(), log_dir=writer_log_path, experiment_name=self.config.experiment_name, project_name=self.config.project_name, @@ -326,6 +326,7 @@ def _check_viewer_warnings(self) -> None: (self.config.is_viewer_enabled() or self.config.is_viewer_beta_enabled()) and not self.config.is_tensorboard_enabled() and not self.config.is_wandb_enabled() + and not self.config.is_comet_enabled() ): string: str = ( "[NOTE] Not running eval iterations since only viewer is enabled.\n" diff --git a/nerfstudio/utils/decorators.py b/nerfstudio/utils/decorators.py index 06f28f60a6..d6fd3f4179 100644 --- a/nerfstudio/utils/decorators.py +++ b/nerfstudio/utils/decorators.py @@ -66,7 +66,7 @@ def check_eval_enabled(func: Callable) -> Callable: def wrapper(self, *args, **kwargs): ret = None - if self.config.is_wandb_enabled() or self.config.is_tensorboard_enabled(): + if self.config.is_wandb_enabled() or self.config.is_tensorboard_enabled() or self.config.is_comet_enabled(): ret = func(self, *args, **kwargs) return ret diff --git a/nerfstudio/utils/writer.py b/nerfstudio/utils/writer.py index 8d3b07b1a1..59e6c5a713 100644 --- a/nerfstudio/utils/writer.py +++ b/nerfstudio/utils/writer.py @@ -24,16 +24,16 @@ from time import time from typing import Any, Dict, List, Optional, Union +import comet_ml import torch import wandb from jaxtyping import Float -from torch import Tensor -from torch.utils.tensorboard import SummaryWriter - from nerfstudio.configs import base_config as cfg from nerfstudio.utils.decorators import check_main_thread, decorate_all from nerfstudio.utils.printing import human_format from nerfstudio.utils.rich_utils import CONSOLE +from torch import Tensor +from torch.utils.tensorboard import SummaryWriter def to8b(x): @@ -150,7 +150,7 @@ def put_time(name: str, duration: float, step: int, avg_over_steps: bool = True, put_scalar(name, duration, step) if update_eta: - ## NOTE: eta should be called with avg train iteration time + # NOTE: eta should be called with avg train iteration time remain_iter = GLOBAL_BUFFER["max_iter"] - step remain_time = remain_iter * GLOBAL_BUFFER["events"][name]["avg"] put_scalar(EventName.ETA, remain_time, step) @@ -185,7 +185,7 @@ def setup_local_writer(config: cfg.LoggingConfig, max_iter: int, banner_messages else: CONSOLE.log("disabled local writer") - ## configure all the global buffer basic information + # configure all the global buffer basic information GLOBAL_BUFFER["max_iter"] = max_iter GLOBAL_BUFFER["max_buffer_size"] = config.max_buffer_size GLOBAL_BUFFER["steps_per_log"] = config.steps_per_log @@ -203,6 +203,7 @@ def is_initialized(): def setup_event_writer( is_wandb_enabled: bool, is_tensorboard_enabled: bool, + is_comet_enabled: bool, log_dir: Path, experiment_name: str, project_name: str = "nerfstudio-project", @@ -214,6 +215,11 @@ def setup_event_writer( banner_messages: list of messages to always display at bottom of screen """ using_event_writer = False + + if is_comet_enabled: + curr_writer = CometWriter(log_dir=log_dir, experiment_name=experiment_name, project_name=project_name) + EVENT_WRITERS.append(curr_writer) + using_event_writer = True if is_wandb_enabled: curr_writer = WandbWriter(log_dir=log_dir, experiment_name=experiment_name, project_name=project_name) EVENT_WRITERS.append(curr_writer) @@ -225,7 +231,7 @@ def setup_event_writer( if using_event_writer: string = f"logging events to: {log_dir}" else: - string = "Disabled tensorboard/wandb event writers" + string = "Disabled comet/tensorboard/wandb event writers" CONSOLE.print(f"[bold yellow]{string}") @@ -346,6 +352,30 @@ def write_config(self, name: str, config_dict: Dict[str, Any], step: int): self.tb_writer.add_text("config", str(config_dict)) +@decorate_all([check_main_thread]) +class CometWriter(Writer): + """Comet_ML Writer Class""" + + def __init__(self, log_dir: Path, experiment_name: str, project_name: str = "nerfstudio-project"): + self.experiment = comet_ml.Experiment(project_name=project_name) + if experiment_name != "unnamed": + self.experiment.set_name(experiment_name) + + def write_image(self, name: str, image: Float[Tensor, "H W C"], step: int) -> None: + self.experiment.log_image(image, name, step=step) + + def write_scalar(self, name: str, scalar: Union[float, torch.Tensor], step: int) -> None: + self.experiment.log_metric(name, scalar, step) + + def write_config(self, name: str, config_dict: Dict[str, Any], step: int): + """Function that writes out the config to Comet + + Args: + config: config dictionary to write out + """ + self.experiment.log_parameters(config_dict, step=step) + + def _cursorup(x: int): """utility tool to move the cursor up on the terminal diff --git a/pyproject.toml b/pyproject.toml index 1347f954b2..add21cf6d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ classifiers = [ dependencies = [ "appdirs>=1.4", "av>=9.2.0", + "comet_ml>=3.33.8", "cryptography>=38", "tyro>=0.5.3", "gdown>=4.6.0", From ef2fd3dbe1d5b6f35781716b747ea142fe21f5e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Kulh=C3=A1nek?= Date: Fri, 15 Sep 2023 23:18:23 +0200 Subject: [PATCH 007/101] Fix import error when tcnn installed but CUDA runtime not available (#2429) --- Dockerfile | 6 ++- nerfstudio/field_components/encodings.py | 8 +--- nerfstudio/field_components/mlp.py | 8 +--- nerfstudio/fields/sdf_field.py | 7 +-- nerfstudio/utils/external.py | 58 ++++++++++++++++++++++++ tests/field_components/test_fields.py | 7 ++- 6 files changed, 68 insertions(+), 26 deletions(-) create mode 100644 nerfstudio/utils/external.py diff --git a/Dockerfile b/Dockerfile index b48b0dd872..d73c487343 100644 --- a/Dockerfile +++ b/Dockerfile @@ -168,6 +168,8 @@ RUN cd nerfstudio && \ # Change working directory WORKDIR /workspace -# Install nerfstudio cli auto completion and enter shell if no command was provided. -CMD ns-install-cli --mode install && /bin/bash +# Install nerfstudio cli auto completion +RUN ns-install-cli --mode install +# Bash as default entrypoint. +CMD /bin/bash -l diff --git a/nerfstudio/field_components/encodings.py b/nerfstudio/field_components/encodings.py index 7145ae5996..437ee805fe 100644 --- a/nerfstudio/field_components/encodings.py +++ b/nerfstudio/field_components/encodings.py @@ -29,13 +29,7 @@ from nerfstudio.field_components.base_field_component import FieldComponent from nerfstudio.utils.math import components_from_spherical_harmonics, expected_sin from nerfstudio.utils.printing import print_tcnn_speed_warning - -try: - import tinycudann as tcnn - - TCNN_EXISTS = True -except ModuleNotFoundError: - TCNN_EXISTS = False +from nerfstudio.utils.external import tcnn, TCNN_EXISTS class Encoding(FieldComponent): diff --git a/nerfstudio/field_components/mlp.py b/nerfstudio/field_components/mlp.py index 52eabd29df..2585a49e4e 100644 --- a/nerfstudio/field_components/mlp.py +++ b/nerfstudio/field_components/mlp.py @@ -25,13 +25,7 @@ from nerfstudio.utils.printing import print_tcnn_speed_warning from nerfstudio.utils.rich_utils import CONSOLE - -try: - import tinycudann as tcnn - - TCNN_EXISTS = True -except ModuleNotFoundError: - TCNN_EXISTS = False +from nerfstudio.utils.external import TCNN_EXISTS, tcnn def activation_to_tcnn_string(activation: Union[nn.Module, None]) -> str: diff --git a/nerfstudio/fields/sdf_field.py b/nerfstudio/fields/sdf_field.py index 2f730d2c54..ed936d4b2b 100644 --- a/nerfstudio/fields/sdf_field.py +++ b/nerfstudio/fields/sdf_field.py @@ -33,12 +33,7 @@ from nerfstudio.field_components.field_heads import FieldHeadNames from nerfstudio.field_components.spatial_distortions import SpatialDistortion from nerfstudio.fields.base_field import Field, FieldConfig - -try: - import tinycudann as tcnn -except ModuleNotFoundError: - # tinycudann module doesn't exist - pass +from nerfstudio.utils.external import tcnn class LearnedVariance(nn.Module): diff --git a/nerfstudio/utils/external.py b/nerfstudio/utils/external.py new file mode 100644 index 0000000000..f14219bd56 --- /dev/null +++ b/nerfstudio/utils/external.py @@ -0,0 +1,58 @@ +# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + + +class _LazyError: + def __init__(self, data): + self.__data = data # pylint: disable=unused-private-member + + class LazyErrorObj: + def __init__(self, data): + self.__data = data # pylint: disable=unused-private-member + + def __call__(self, *args, **kwds): + name, exc = object.__getattribute__(self, "__data") + raise RuntimeError(f"Could not load package {name}.") from exc + + def __getattr__(self, __name: str): + name, exc = object.__getattribute__(self, "__data") + raise RuntimeError(f"Could not load package {name}") from exc + + def __getattr__(self, __name: str): + return _LazyError.LazyErrorObj(object.__getattribute__(self, "__data")) + + +TCNN_EXISTS = False +tcnn_import_exception = None +tcnn = None +try: + import tinycudann + + tcnn = tinycudann + del tinycudann + TCNN_EXISTS = True +except ModuleNotFoundError as _exp: + tcnn_import_exception = _exp +except ImportError as _exp: + tcnn_import_exception = _exp +except EnvironmentError as _exp: + if "Unknown compute capability" not in _exp.args[0]: + raise _exp + print("Could not load tinycudann: " + str(_exp), file=sys.stderr) + tcnn_import_exception = _exp + +if tcnn_import_exception is not None: + tcnn = _LazyError(tcnn_import_exception) diff --git a/tests/field_components/test_fields.py b/tests/field_components/test_fields.py index 74d7d6c3af..fd5332776e 100644 --- a/tests/field_components/test_fields.py +++ b/tests/field_components/test_fields.py @@ -5,15 +5,14 @@ from nerfstudio.cameras.rays import Frustums, RaySamples from nerfstudio.fields.nerfacto_field import NerfactoField +from nerfstudio.utils.external import TCNN_EXISTS, tcnn_import_exception def test_nerfacto_field(): """Test the Nerfacto field""" - try: - import tinycudann as tcnn # noqa: F401 - except ModuleNotFoundError as e: + if not TCNN_EXISTS: # tinycudann module doesn't exist - print(e) + print(tcnn_import_exception) return device = torch.device("cuda" if torch.cuda.is_available() else "cpu") aabb_scale = 1.0 From b0e700137bd8bde21fbc7867f433dd36b2905e1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=83=95=E3=83=A9=E3=83=8F=E3=83=86=E3=82=A3=E3=80=80?= =?UTF-8?q?=E4=BB=81?= Date: Tue, 19 Sep 2023 13:13:48 +0900 Subject: [PATCH 008/101] Allow the option for timestamp to be generated for `ns-train --load-config` (#2442) * Fix timestamp for ns-train --load-config * Remove whitespace --- nerfstudio/scripts/train.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nerfstudio/scripts/train.py b/nerfstudio/scripts/train.py index 9ca18c71a7..fde0a5c37a 100644 --- a/nerfstudio/scripts/train.py +++ b/nerfstudio/scripts/train.py @@ -226,7 +226,6 @@ def launch( def main(config: TrainerConfig) -> None: """Main function.""" - config.set_timestamp() if config.data: CONSOLE.log("Using --data alias for --data.pipeline.datamanager.data") config.pipeline.datamanager.data = config.data @@ -239,6 +238,8 @@ def main(config: TrainerConfig) -> None: CONSOLE.log(f"Loading pre-set config from: {config.load_config}") config = yaml.load(config.load_config.read_text(), Loader=yaml.Loader) + config.set_timestamp() + # print and save config config.print_to_terminal() config.save_config() From 3e1d4af8217134fc34b42690345c5847147bc732 Mon Sep 17 00:00:00 2001 From: Brent Yi Date: Tue, 19 Sep 2023 16:22:55 -0700 Subject: [PATCH 009/101] Add share link support for beta viewer (#2445) * Add share link support for beta viewer * More consistent CLI experience * fix viewer jitter by adding atomic update on camera pose * change to make_share_url * fix * remove incorrect assert --------- Co-authored-by: Justin Kerr --- nerfstudio/configs/base_config.py | 2 ++ nerfstudio/configs/experiment_config.py | 10 ++++++++-- nerfstudio/engine/trainer.py | 1 + nerfstudio/scripts/viewer/run_viewer.py | 8 ++++++-- nerfstudio/viewer_beta/viewer.py | 25 ++++++++++++++----------- pyproject.toml | 2 +- 6 files changed, 32 insertions(+), 16 deletions(-) diff --git a/nerfstudio/configs/base_config.py b/nerfstudio/configs/base_config.py index 315659acbb..fc906fae9e 100644 --- a/nerfstudio/configs/base_config.py +++ b/nerfstudio/configs/base_config.py @@ -146,3 +146,5 @@ class ViewerConfig(PrintableConfig): """Image format viewer should use; jpeg is lossy compression, while png is lossless.""" jpeg_quality: int = 90 """Quality tradeoff to use for jpeg compression.""" + make_share_url: bool = False + """Viewer beta feature: print a shareable URL. `vis` must be set to viewer_beta; this flag is otherwise ignored.""" diff --git a/nerfstudio/configs/experiment_config.py b/nerfstudio/configs/experiment_config.py index 0da8893ed1..42cc4399ec 100644 --- a/nerfstudio/configs/experiment_config.py +++ b/nerfstudio/configs/experiment_config.py @@ -22,7 +22,13 @@ from typing import Any, Dict, Literal, Optional import yaml -from nerfstudio.configs.base_config import InstantiateConfig, LoggingConfig, MachineConfig, ViewerConfig + +from nerfstudio.configs.base_config import ( + InstantiateConfig, + LoggingConfig, + MachineConfig, + ViewerConfig, +) from nerfstudio.configs.config_utils import to_immutable_dict from nerfstudio.engine.optimizers import OptimizerConfig from nerfstudio.engine.schedulers import SchedulerConfig @@ -73,7 +79,7 @@ class ExperimentConfig(InstantiateConfig): relative_model_dir: Path = Path("nerfstudio_models/") """Relative path to save all checkpoints.""" load_scheduler: bool = True - """Whether to load the scheduler state_dict to resume training, if exists""" + """Whether to load the scheduler state_dict to resume training, if it exists.""" def is_viewer_enabled(self) -> bool: """Checks if a viewer is enabled.""" diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py index c8ae500433..04fa8e05b3 100644 --- a/nerfstudio/engine/trainer.py +++ b/nerfstudio/engine/trainer.py @@ -177,6 +177,7 @@ def setup(self, test_mode: Literal["test", "val", "inference"] = "val") -> None: pipeline=self.pipeline, trainer=self, train_lock=self.train_lock, + share=self.config.viewer.make_share_url, ) banner_messages = [f"Viewer Beta at: {self.viewer_state.viewer_url}"] self._check_viewer_warnings() diff --git a/nerfstudio/scripts/viewer/run_viewer.py b/nerfstudio/scripts/viewer/run_viewer.py index 56a712026f..de210a30d8 100644 --- a/nerfstudio/scripts/viewer/run_viewer.py +++ b/nerfstudio/scripts/viewer/run_viewer.py @@ -55,6 +55,8 @@ class RunViewer: """Viewer configuration""" vis: Literal["viewer", "viewer_beta"] = "viewer" """Type of viewer""" + make_share_url: bool = False + """Viewer beta feature: print a shareable URL. `vis` must be set to viewer_beta; this flag is otherwise ignored.""" def main(self) -> None: """Main function.""" @@ -66,6 +68,7 @@ def main(self) -> None: num_rays_per_chunk = config.viewer.num_rays_per_chunk assert self.viewer.num_rays_per_chunk == -1 config.vis = self.vis + config.viewer.make_share_url = self.make_share_url config.viewer = self.viewer.as_viewer_config() config.viewer.num_rays_per_chunk = num_rays_per_chunk @@ -103,6 +106,7 @@ def _start_viewer(config: TrainerConfig, pipeline: Pipeline, step: int): log_filename=viewer_log_path, datapath=base_dir, pipeline=pipeline, + share=config.viewer.make_share_url, ) banner_messages = [f"Viewer Beta at: {viewer_state.viewer_url}"] @@ -126,11 +130,11 @@ def _start_viewer(config: TrainerConfig, pipeline: Pipeline, step: int): def entrypoint(): """Entrypoint for use with pyproject scripts.""" tyro.extras.set_accent_color("bright_yellow") - tyro.cli(RunViewer).main() + tyro.cli(tyro.conf.FlagConversionOff[RunViewer]).main() if __name__ == "__main__": entrypoint() # For sphinx docs -get_parser_fn = lambda: tyro.extras.get_parser(RunViewer) # noqa +get_parser_fn = lambda: tyro.extras.get_parser(tyro.conf.FlagConversionOff[RunViewer]) # noqa diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py index 9e2314ad41..d2a53b198f 100644 --- a/nerfstudio/viewer_beta/viewer.py +++ b/nerfstudio/viewer_beta/viewer.py @@ -26,6 +26,8 @@ import viser import viser.theme import viser.transforms as vtf + +from nerfstudio.cameras.camera_optimizers import CameraOptimizer from nerfstudio.configs import base_config as cfg from nerfstudio.data.datasets.base_dataset import InputDataset from nerfstudio.models.base_model import Model @@ -40,8 +42,6 @@ from nerfstudio.viewer_beta.utils import CameraState, parse_object from nerfstudio.viewer_beta.viewer_elements import ViewerControl, ViewerElement -from nerfstudio.cameras.camera_optimizers import CameraOptimizer - if TYPE_CHECKING: from nerfstudio.engine.trainer import Trainer @@ -59,6 +59,7 @@ class Viewer: datapath: path to data pipeline: pipeline object to use trainer: trainer object to use + share: print a shareable URL Attributes: viewer_url: url to open viewer @@ -77,6 +78,7 @@ def __init__( pipeline: Pipeline, trainer: Optional[Trainer] = None, train_lock: Optional[threading.Lock] = None, + share: bool = False, ): self.config = config self.trainer = trainer @@ -103,7 +105,7 @@ def __init__( self._prev_train_state: Literal["training", "paused", "completed"] = "training" self.client: Optional[viser.ClientHandle] = None - self.viser_server = viser.ViserServer(host=config.websocket_host, port=websocket_port) + self.viser_server = viser.ViserServer(host=config.websocket_host, port=websocket_port, share=share) buttons = ( viser.theme.TitlebarButton( text="Getting Started", @@ -191,14 +193,15 @@ def handle_new_client(self, client: viser.ClientHandle) -> None: @client.camera.on_update def _(cam: viser.CameraHandle) -> None: assert self.client is not None - self.last_move_time = time.time() - R = vtf.SO3(wxyz=self.client.camera.wxyz) - R = R @ vtf.SO3.from_x_radians(np.pi) - R = torch.tensor(R.as_matrix()) - pos = torch.tensor(self.client.camera.position, dtype=torch.float64) / VISER_NERFSTUDIO_SCALE_RATIO - c2w = torch.concatenate([R, pos[:, None]], dim=1) - self.camera_state = CameraState(fov=self.client.camera.fov, aspect=self.client.camera.aspect, c2w=c2w) - self.render_statemachine.action(RenderAction("move", self.camera_state)) + with client.atomic(): + self.last_move_time = time.time() + R = vtf.SO3(wxyz=self.client.camera.wxyz) + R = R @ vtf.SO3.from_x_radians(np.pi) + R = torch.tensor(R.as_matrix()) + pos = torch.tensor(self.client.camera.position, dtype=torch.float64) / VISER_NERFSTUDIO_SCALE_RATIO + c2w = torch.concatenate([R, pos[:, None]], dim=1) + self.camera_state = CameraState(fov=self.client.camera.fov, aspect=self.client.camera.aspect, c2w=c2w) + self.render_statemachine.action(RenderAction("move", self.camera_state)) def set_camera_visibility(self, visible: bool) -> None: """Toggle the visibility of the training cameras.""" diff --git a/pyproject.toml b/pyproject.toml index add21cf6d1..c641526755 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ dependencies = [ "torchvision>=0.14.1", "torchmetrics[image]>=1.0.1", "typing_extensions>=4.4.0", - "viser==0.1.2", + "viser==0.1.3", "nuscenes-devkit>=1.1.1", "wandb>=0.13.3", "xatlas", From e0d8f0426752dc0f97bd332fd6b84e95e8377210 Mon Sep 17 00:00:00 2001 From: Justin Kerr Date: Tue, 19 Sep 2023 16:48:54 -0700 Subject: [PATCH 010/101] Render oriented crops in viewer beta (#2447) * add obb to the rendering pipeline * useless line * lint * remove todo * pyright * more pyright --- nerfstudio/engine/trainer.py | 5 +- nerfstudio/scripts/render.py | 33 +++--- nerfstudio/viewer_beta/render_panel.py | 135 +++++++++++++++++++++---- nerfstudio/viewer_beta/viewer.py | 4 +- 4 files changed, 139 insertions(+), 38 deletions(-) diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py index 04fa8e05b3..b4ccba9eea 100644 --- a/nerfstudio/engine/trainer.py +++ b/nerfstudio/engine/trainer.py @@ -170,10 +170,13 @@ def setup(self, test_mode: Literal["test", "val", "inference"] = "val") -> None: ) banner_messages = [f"Viewer at: {self.viewer_state.viewer_url}"] if self.config.is_viewer_beta_enabled() and self.local_rank == 0: + datapath = self.config.data + if datapath is None: + datapath = self.base_dir self.viewer_state = ViewerBetaState( self.config.viewer, log_filename=viewer_log_path, - datapath=self.base_dir, + datapath=datapath, pipeline=self.pipeline, trainer=self, train_lock=self.train_lock, diff --git a/nerfstudio/scripts/render.py b/nerfstudio/scripts/render.py index 5a3ca78bd0..99a7cae01d 100644 --- a/nerfstudio/scripts/render.py +++ b/nerfstudio/scripts/render.py @@ -54,7 +54,7 @@ ) from nerfstudio.cameras.cameras import Cameras, CameraType from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager -from nerfstudio.data.scene_box import SceneBox +from nerfstudio.data.scene_box import OrientedBox from nerfstudio.model_components import renderers from nerfstudio.pipelines.base_pipeline import Pipeline from nerfstudio.utils import colormaps, install_checks @@ -127,12 +127,10 @@ def _render_trajectory_video( with progress: for camera_idx in progress.track(range(cameras.size), description=""): - aabb_box = None + obb_box = None if crop_data is not None: - bounding_box_min = crop_data.center - crop_data.scale / 2.0 - bounding_box_max = crop_data.center + crop_data.scale / 2.0 - aabb_box = SceneBox(torch.stack([bounding_box_min, bounding_box_max]).to(pipeline.device)) - camera_ray_bundle = cameras.generate_rays(camera_indices=camera_idx, aabb_box=aabb_box) + obb_box = crop_data.obb + camera_ray_bundle = cameras.generate_rays(camera_indices=camera_idx, obb_box=obb_box) if crop_data is not None: with renderers.background_color_override_context( @@ -289,10 +287,17 @@ class CropData: background_color: Float[Tensor, "3"] = torch.Tensor([0.0, 0.0, 0.0]) """background color""" - center: Float[Tensor, "3"] = torch.Tensor([0.0, 0.0, 0.0]) - """center of the crop""" - scale: Float[Tensor, "3"] = torch.Tensor([2.0, 2.0, 2.0]) - """scale of the crop""" + obb: OrientedBox = OrientedBox(R=torch.eye(3), T=torch.zeros(3), S=torch.ones(3) * 2) + """Oriented box representing the crop region""" + + # properties for backwards-compatibility interface + @property + def center(self): + return self.obb.T + + @property + def scale(self): + return self.obb.S def get_crop_from_json(camera_json: Dict[str, Any]) -> Optional[CropData]: @@ -305,13 +310,13 @@ def get_crop_from_json(camera_json: Dict[str, Any]) -> Optional[CropData]: """ if "crop" not in camera_json or camera_json["crop"] is None: return None - bg_color = camera_json["crop"]["crop_bg_color"] - + center = camera_json["crop"]["crop_center"] + scale = camera_json["crop"]["crop_scale"] + rot = (0.0, 0.0, 0.0) if "crop_rot" not in camera_json["crop"] else tuple(camera_json["crop"]["crop_rot"]) return CropData( background_color=torch.Tensor([bg_color["r"] / 255.0, bg_color["g"] / 255.0, bg_color["b"] / 255.0]), - center=torch.Tensor(camera_json["crop"]["crop_center"]), - scale=torch.Tensor(camera_json["crop"]["crop_scale"]), + obb=OrientedBox.from_params(center, rot, scale), ) diff --git a/nerfstudio/viewer_beta/render_panel.py b/nerfstudio/viewer_beta/render_panel.py index 02d4d6ce07..7342b0f388 100644 --- a/nerfstudio/viewer_beta/render_panel.py +++ b/nerfstudio/viewer_beta/render_panel.py @@ -13,17 +13,19 @@ # limitations under the License. from __future__ import annotations - +from pathlib import Path import colorsys import dataclasses import threading import time from typing import Dict, List, Optional, Tuple - +import datetime +from nerfstudio.viewer_beta.control_panel import ControlPanel import numpy as onp import splines import splines.quaternion import viser +import json import viser.transforms as tf @@ -256,7 +258,11 @@ def update_spline(self) -> None: ) -def populate_render_tab(server: viser.ViserServer) -> None: +def populate_render_tab( + server: viser.ViserServer, config_path: Path, datapath: Path, control_panel: ControlPanel +) -> None: + from nerfstudio.viewer_beta.viewer import VISER_NERFSTUDIO_SCALE_RATIO + fov_degrees = server.add_gui_slider( "FOV", initial_value=90.0, @@ -528,6 +534,11 @@ def _(_) -> None: play_button.visible = True pause_button.visible = False + # set the initial value to the current date-time string + now = datetime.datetime.now() + render_name_text = server.add_gui_text( + "Render Name", initial_value=now.strftime("%Y-%m-%d-%H-%M-%S"), hint="Name of the render" + ) render_button = server.add_gui_button( "Generate Command", color="green", @@ -537,10 +548,107 @@ def _(_) -> None: @render_button.on_click def _(event: viser.GuiEvent) -> None: - """TODO: write the render JSON and show the render command.""" assert event.client is not None - with event.client.add_gui_modal("TODO") as modal: - event.client.add_gui_markdown("TODO") + num_frames = int(framerate_slider.value * duration_number.value) + json_data = {} + # json data has the properties: + # keyframes: list of keyframes with + # matrix : flattened 4x4 matrix + # fov: float in degrees + # aspect: float + # camera_type: string of camera type + # render_height: int + # render_width: int + # fps: int + # seconds: float + # is_cycle: bool + # smoothness_value: float + # camera_path: list of frames with properties + # camera_to_world: flattened 4x4 matrix + # fov: float in degrees + # aspect: float + # first populate the keyframes: + keyframes = [] + for keyframe, dummy in camera_path._keyframes.values(): + pose = tf.SE3.from_rotation_and_translation( + tf.SO3(keyframe.wxyz) @ tf.SO3.from_x_radians(onp.pi), + keyframe.position / VISER_NERFSTUDIO_SCALE_RATIO, + ) + keyframes.append( + { + "matrix": pose.as_matrix().flatten().tolist(), + "fov": onp.rad2deg(keyframe.override_fov_value) + if keyframe.override_fov_enabled + else fov_degrees.value, + "aspect": keyframe.aspect, + } + ) + json_data["keyframes"] = keyframes + json_data["camera_type"] = "perspective" + json_data["render_height"] = resolution.value[1] + json_data["render_width"] = resolution.value[0] + json_data["fps"] = framerate_slider.value + json_data["seconds"] = duration_number.value + json_data["is_cycle"] = loop.value + json_data["smoothness_value"] = smoothness.value + # now populate the camera path: + camera_path_list = [] + for i in range(num_frames): + maybe_pose_and_fov = camera_path.interpolate_pose_and_fov(i / num_frames) + if maybe_pose_and_fov is None: + return + pose, fov = maybe_pose_and_fov + # rotate the axis of the camera 180 about x axis + pose = tf.SE3.from_rotation_and_translation( + pose.rotation() @ tf.SO3.from_x_radians(onp.pi), + pose.translation() / VISER_NERFSTUDIO_SCALE_RATIO, + ) + camera_path_list.append( + { + "camera_to_world": pose.as_matrix().flatten().tolist(), + "fov": onp.rad2deg(fov), + "aspect": resolution.value[0] / resolution.value[1], + } + ) + json_data["camera_path"] = camera_path_list + # finally add crop data if crop is enabled + if control_panel.crop_viewport: + obb = control_panel.crop_obb + rpy = tf.SO3.from_matrix(obb.R.numpy()).as_rpy_radians() + color = control_panel.background_color + json_data["crop"] = { + "crop_center": obb.T.tolist(), + "crop_scale": obb.S.tolist(), + "crop_rot": [rpy.roll, rpy.pitch, rpy.yaw], + "crop_bg_color": {"r": color[0], "g": color[1], "b": color[2]}, + } + + # now write the json file + json_outfile = datapath / "camera_paths" / f"{render_name_text.value}.json" + with open(json_outfile.absolute(), "w") as outfile: + json.dump(json_data, outfile) + # now show the command + with event.client.add_gui_modal("Render Command") as modal: + dataname = datapath.name + command = " ".join( + [ + "ns-render camera-path", + f"--load-config {config_path}", + f"--camera-path-filename {json_outfile.absolute()}", + f"--output-path renders/{dataname}/{render_name_text.value}.mp4", + ] + ) + event.client.add_gui_markdown( + "\n".join( + [ + "To render the trajectory, run the following from the command line:", + "", + "```", + command, + "```", + ] + ) + ) close_button = event.client.add_gui_button("Close") @close_button.on_click @@ -551,18 +659,3 @@ def _(_) -> None: camera_path.default_fov = fov_degrees.value / 180.0 * onp.pi transform_controls: List[viser.SceneNodeHandle] = [] - - -def main() -> None: - """Launch a GUI with just the render panel, for development purposes.""" - server = viser.ViserServer() - server.configure_theme(dark_mode=True, control_layout="collapsible") - server.world_axes.visible = True - - populate_render_tab(server) - while True: - time.sleep(10.0) - - -if __name__ == "__main__": - main() diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py index d2a53b198f..6e6e02a294 100644 --- a/nerfstudio/viewer_beta/viewer.py +++ b/nerfstudio/viewer_beta/viewer.py @@ -154,11 +154,11 @@ def __init__( self._toggle_training_state, self.set_camera_visibility, ) + config_path = self.log_filename.parents[0] / "config.yml" with tabs.add_tab("Render", viser.Icon.CAMERA): - populate_render_tab(self.viser_server) + populate_render_tab(self.viser_server, config_path, self.datapath, self.control_panel) with tabs.add_tab("Export", viser.Icon.PACKAGE_EXPORT): - config_path = self.log_filename.parents[0] / "config.yml" populate_export_tab(self.viser_server, self.control_panel, config_path) def nested_folder_install(folder_labels: List[str], element: ViewerElement): From 76f8bcf953dbc001fe3db32839db1448bcd6ad85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Kulh=C3=A1nek?= Date: Thu, 21 Sep 2023 22:51:35 +0200 Subject: [PATCH 011/101] Disable loading colmap points by default (#2451) --- nerfstudio/data/dataparsers/colmap_dataparser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nerfstudio/data/dataparsers/colmap_dataparser.py b/nerfstudio/data/dataparsers/colmap_dataparser.py index 851a30367f..c68a9cf6a9 100644 --- a/nerfstudio/data/dataparsers/colmap_dataparser.py +++ b/nerfstudio/data/dataparsers/colmap_dataparser.py @@ -72,7 +72,7 @@ class ColmapDataParserConfig(DataParserConfig): """Path to depth maps directory. If not set, depths are not loaded.""" colmap_path: Path = Path("sparse/0") """Path to the colmap reconstruction directory relative to the data path.""" - load_3D_points: bool = True + load_3D_points: bool = False """Whether to load the 3D points from the colmap reconstruction.""" max_2D_matches_per_3D_point: int = -1 """Maximum number of 2D matches per 3D point. If set to -1, all 2D matches are loaded. If set to 0, no 2D matches are loaded.""" From c87ebe34ba8b11172971ce48e44b6a8e8eb7a6fc Mon Sep 17 00:00:00 2001 From: Matthew Tancik Date: Thu, 21 Sep 2023 18:38:22 -0700 Subject: [PATCH 012/101] v0.3.4 (#2452) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c641526755..c041858ba3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "nerfstudio" -version = "0.3.3" +version = "0.3.4" description = "All-in-one repository for state-of-the-art NeRFs" readme = "README.md" license = { text="Apache 2.0"} From 2fd8b59d20e73f55106012623d188b227a976df5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=83=95=E3=83=A9=E3=83=8F=E3=83=86=E3=82=A3=E3=80=80?= =?UTF-8?q?=E4=BB=81?= Date: Fri, 22 Sep 2023 23:40:20 +0900 Subject: [PATCH 013/101] Add `background_color` config to keep consistent with other models (#2455) * add backgorund_color config * removed unused color import --- nerfstudio/models/generfacto.py | 7 ++++--- nerfstudio/models/mipnerf.py | 4 ++-- nerfstudio/models/tensorf.py | 4 +++- nerfstudio/models/vanilla_nerf.py | 8 +++++--- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/nerfstudio/models/generfacto.py b/nerfstudio/models/generfacto.py index bf34eb3357..c07f904c7d 100644 --- a/nerfstudio/models/generfacto.py +++ b/nerfstudio/models/generfacto.py @@ -57,7 +57,7 @@ from nerfstudio.model_components.scene_colliders import AABBBoxCollider, SphereCollider from nerfstudio.model_components.shaders import LambertianShader, NormalsShader from nerfstudio.models.base_model import Model, ModelConfig -from nerfstudio.utils import colormaps, colors, math, misc +from nerfstudio.utils import colormaps, math, misc @dataclass @@ -68,7 +68,8 @@ class GenerfactoModelConfig(ModelConfig): """target class to instantiate""" prompt: str = "a high quality photo of a ripe pineapple" """prompt for stable dreamfusion""" - + background_color: Literal["random", "last_sample", "black", "white"] = "white" + """Whether to randomize the background color.""" orientation_loss_mult: Tuple[float, float] = (0.001, 10.0) """Orientation loss multipier on computed normals.""" orientation_loss_mult_range: Tuple[int, int] = (0, 15000) @@ -251,7 +252,7 @@ def update_schedule(step): ) # renderers - self.renderer_rgb = RGBRenderer(background_color=colors.WHITE) + self.renderer_rgb = RGBRenderer(background_color=self.config.background_color) self.renderer_accumulation = AccumulationRenderer() self.renderer_depth = DepthRenderer() self.renderer_normals = NormalsRenderer() diff --git a/nerfstudio/models/mipnerf.py b/nerfstudio/models/mipnerf.py index a3a7834615..67ca632514 100644 --- a/nerfstudio/models/mipnerf.py +++ b/nerfstudio/models/mipnerf.py @@ -38,7 +38,7 @@ ) from nerfstudio.models.base_model import Model from nerfstudio.models.vanilla_nerf import VanillaModelConfig -from nerfstudio.utils import colormaps, colors, misc +from nerfstudio.utils import colormaps, misc class MipNerfModel(Model): @@ -81,7 +81,7 @@ def populate_modules(self): self.sampler_pdf = PDFSampler(num_samples=self.config.num_importance_samples, include_original=False) # renderers - self.renderer_rgb = RGBRenderer(background_color=colors.WHITE) + self.renderer_rgb = RGBRenderer(background_color=self.config.background_color) self.renderer_accumulation = AccumulationRenderer() self.renderer_depth = DepthRenderer() diff --git a/nerfstudio/models/tensorf.py b/nerfstudio/models/tensorf.py index a2cd9901d9..a0380d42e4 100644 --- a/nerfstudio/models/tensorf.py +++ b/nerfstudio/models/tensorf.py @@ -89,6 +89,8 @@ class TensoRFModelConfig(ModelConfig): tensorf_encoding: Literal["triplane", "vm", "cp"] = "vm" regularization: Literal["none", "l1", "tv"] = "l1" """Regularization method used in tensorf paper""" + background_color: Literal["random", "last_sample", "black", "white"] = "white" + """Whether to randomize the background color.""" class TensoRFModel(Model): @@ -234,7 +236,7 @@ def populate_modules(self): self.sampler_pdf = PDFSampler(num_samples=self.config.num_samples, single_jitter=True, include_original=False) # renderers - self.renderer_rgb = RGBRenderer(background_color=colors.WHITE) + self.renderer_rgb = RGBRenderer(background_color=self.config.background_color) self.renderer_accumulation = AccumulationRenderer() self.renderer_depth = DepthRenderer() diff --git a/nerfstudio/models/vanilla_nerf.py b/nerfstudio/models/vanilla_nerf.py index ea5344fb87..e95c8dfa49 100644 --- a/nerfstudio/models/vanilla_nerf.py +++ b/nerfstudio/models/vanilla_nerf.py @@ -19,7 +19,7 @@ from __future__ import annotations from dataclasses import dataclass, field -from typing import Any, Dict, List, Tuple, Type +from typing import Any, Dict, List, Tuple, Type, Literal import torch from torch.nn import Parameter @@ -41,7 +41,7 @@ RGBRenderer, ) from nerfstudio.models.base_model import Model, ModelConfig -from nerfstudio.utils import colormaps, colors, misc +from nerfstudio.utils import colormaps, misc @dataclass @@ -58,6 +58,8 @@ class VanillaModelConfig(ModelConfig): """Specifies whether or not to include ray warping based on time.""" temporal_distortion_params: Dict[str, Any] = to_immutable_dict({"kind": TemporalDistortionKind.DNERF}) """Parameters to instantiate temporal distortion with""" + background_color: Literal["random", "last_sample", "black", "white"] = "white" + """Whether to randomize the background color.""" class NeRFModel(Model): @@ -110,7 +112,7 @@ def populate_modules(self): self.sampler_pdf = PDFSampler(num_samples=self.config.num_importance_samples) # renderers - self.renderer_rgb = RGBRenderer(background_color=colors.WHITE) + self.renderer_rgb = RGBRenderer(background_color=self.config.background_color) self.renderer_accumulation = AccumulationRenderer() self.renderer_depth = DepthRenderer() From 52f26e82b3adb9ba89d61e7ca938a00faa7994dd Mon Sep 17 00:00:00 2001 From: Maxim Bonnaerens Date: Fri, 22 Sep 2023 17:36:51 +0200 Subject: [PATCH 014/101] Do not materialize mlp_head if no field_heads (#2457) In case it is used in a proposal network we avoid initializing the head. Co-authored-by: Matthew Tancik --- nerfstudio/fields/vanilla_nerf_field.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/nerfstudio/fields/vanilla_nerf_field.py b/nerfstudio/fields/vanilla_nerf_field.py index c9d2bbb78b..22e11a25b8 100644 --- a/nerfstudio/fields/vanilla_nerf_field.py +++ b/nerfstudio/fields/vanilla_nerf_field.py @@ -74,15 +74,15 @@ def __init__( skip_connections=skip_connections, out_activation=nn.ReLU(), ) - - self.mlp_head = MLP( - in_dim=self.mlp_base.get_out_dim() + self.direction_encoding.get_out_dim(), - num_layers=head_mlp_num_layers, - layer_width=head_mlp_layer_width, - out_activation=nn.ReLU(), - ) - self.field_output_density = DensityFieldHead(in_dim=self.mlp_base.get_out_dim()) + + if field_heads: + self.mlp_head = MLP( + in_dim=self.mlp_base.get_out_dim() + self.direction_encoding.get_out_dim(), + num_layers=head_mlp_num_layers, + layer_width=head_mlp_layer_width, + out_activation=nn.ReLU(), + ) self.field_heads = nn.ModuleList([field_head() for field_head in field_heads] if field_heads else []) # type: ignore for field_head in self.field_heads: field_head.set_in_dim(self.mlp_head.get_out_dim()) # type: ignore From 8812d7a5b092beb822ddd2b191c5e82cd3f66ed5 Mon Sep 17 00:00:00 2001 From: Maxim Bonnaerens Date: Fri, 22 Sep 2023 19:45:05 +0200 Subject: [PATCH 015/101] .vscode settings match move to ms-python.black-formatter (#2456) Co-authored-by: Brent Yi --- .vscode/settings.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 4c3a7447af..8b0f1d7501 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -33,7 +33,7 @@ "editor.formatOnSave": true, "python.envFile": "${workspaceFolder}/.env", "python.formatting.provider": "none", - "python.formatting.blackArgs": ["--line-length=120"], + "black-formatter.args": ["--line-length=120"], "python.linting.pylintEnabled": false, "python.linting.flake8Enabled": false, "python.linting.enabled": true, From e0f0cd05aa06bdd148a20ed62271d49ff0a6ac9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ruilong=20Li=28=E6=9D=8E=E7=91=9E=E9=BE=99=29?= Date: Fri, 22 Sep 2023 23:04:02 -0700 Subject: [PATCH 016/101] Migrate doc from RTD to gh-pages (#2460) * gh-pages workflow * update doc.yml * update * remove RTD * change action to main * test fail on warning * fail on warnings * get back .readthedocs.yml, a bit cleanup * revert SHA of index.md file --- .github/workflows/doc.yml | 33 +++++++++++++++++++++++++++++++++ docs/Makefile | 2 +- docs/make.bat | 1 + 3 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/doc.yml diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml new file mode 100644 index 0000000000..51d07554db --- /dev/null +++ b/.github/workflows/doc.yml @@ -0,0 +1,33 @@ +name: Docs +on: + push: + branches: [main] + pull_request: + branches: [main] + workflow_dispatch: + +permissions: + contents: write +jobs: + docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v3 + with: + python-version: '3.9' + - name: Install dependencies + run: | + pip install .[docs] + - name: Sphinx build + # fail on warnings + run: | + sphinx-build docs _build -W --keep-going + - name: Deploy + uses: peaceiris/actions-gh-pages@v3 + with: + publish_branch: gh-pages + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: _build/ + force_orphan: true + # cname: docs.nerf.studio diff --git a/docs/Makefile b/docs/Makefile index d4bb2cbb9e..5f83226609 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -3,7 +3,7 @@ # You can set these variables from the command line, and also # from the environment for the first two. -SPHINXOPTS ?= +SPHINXOPTS ?= -W --keep-going # build fail on warning SPHINXBUILD ?= sphinx-build SOURCEDIR = . BUILDDIR = _build diff --git a/docs/make.bat b/docs/make.bat index 32bb24529f..008ca8c48c 100644 --- a/docs/make.bat +++ b/docs/make.bat @@ -9,6 +9,7 @@ if "%SPHINXBUILD%" == "" ( ) set SOURCEDIR=. set BUILDDIR=_build +set SPHINXOPTS="-W --keep-going" %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( From 53037ba8a790a7aa089c8a4983c2aa210e01111c Mon Sep 17 00:00:00 2001 From: Maxim Bonnaerens Date: Sun, 24 Sep 2023 12:06:16 +0200 Subject: [PATCH 017/101] Allow custom sampler in ProposalNetworkSampler (#2464) * Allow custom sampler in ProposalNetworkSampler This allows custom (pdf)samplers in distinct repositories while still using the base ProposalNetworkSampler. * sampler should inherit from pdf sampler * docstring --- nerfstudio/model_components/ray_samplers.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/nerfstudio/model_components/ray_samplers.py b/nerfstudio/model_components/ray_samplers.py index e18c680ae3..7a2052b639 100644 --- a/nerfstudio/model_components/ray_samplers.py +++ b/nerfstudio/model_components/ray_samplers.py @@ -530,6 +530,7 @@ class ProposalNetworkSampler(Sampler): single_jitter: Use a same random jitter for all samples along a ray. update_sched: A function that takes the iteration number of steps between updates. initial_sampler: Sampler to use for the first iteration. Uses UniformLinDispPiecewise if not set. + pdf_sampler: PDFSampler to use after the first iteration. Uses PDFSampler if not set. """ def __init__( @@ -540,6 +541,7 @@ def __init__( single_jitter: bool = False, update_sched: Callable = lambda x: 1, initial_sampler: Optional[Sampler] = None, + pdf_sampler: Optional[PDFSampler] = None, ) -> None: super().__init__() self.num_proposal_samples_per_ray = num_proposal_samples_per_ray @@ -554,7 +556,10 @@ def __init__( self.initial_sampler = UniformLinDispPiecewiseSampler(single_jitter=single_jitter) else: self.initial_sampler = initial_sampler - self.pdf_sampler = PDFSampler(include_original=False, single_jitter=single_jitter) + if pdf_sampler is None: + self.pdf_sampler = PDFSampler(include_original=False, single_jitter=single_jitter) + else: + self.pdf_sampler = pdf_sampler self._anneal = 1.0 self._steps_since_update = 0 From 2864c52618fa9511639015fd390f50a38b00cd61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Kulh=C3=A1nek?= Date: Mon, 25 Sep 2023 17:37:08 +0200 Subject: [PATCH 018/101] Fix COLMAP dataparser masks and depthmaps (#2467) --- nerfstudio/data/dataparsers/colmap_dataparser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nerfstudio/data/dataparsers/colmap_dataparser.py b/nerfstudio/data/dataparsers/colmap_dataparser.py index c68a9cf6a9..071a11e1ea 100644 --- a/nerfstudio/data/dataparsers/colmap_dataparser.py +++ b/nerfstudio/data/dataparsers/colmap_dataparser.py @@ -147,11 +147,11 @@ def _get_all_images_and_cameras(self, recon_dir: Path): frame.update(cameras[im_data.camera_id]) if self.config.masks_path is not None: frame["mask_path"] = ( - (self.config.data / self.config.masks_path / im_data.name).with_suffix(".png").as_posix(), + (self.config.data / self.config.masks_path / im_data.name).with_suffix(".png").as_posix() ) if self.config.depths_path is not None: frame["depth_path"] = ( - (self.config.data / self.config.depths_path / im_data.name).with_suffix(".png").as_posix(), + (self.config.data / self.config.depths_path / im_data.name).with_suffix(".png").as_posix() ) frames.append(frame) if camera_model is not None: From 4cdeaa6eec186afe8fd6e1293cecd582215f1ff4 Mon Sep 17 00:00:00 2001 From: Chung Min Kim Date: Mon, 25 Sep 2023 13:14:51 -0700 Subject: [PATCH 019/101] Track all viser GUI folders to avoid folder duplicates (viewer-beta) (#2466) * Track all viser GUI folders to avoid folder duplicates * lint --------- Co-authored-by: Justin Kerr --- nerfstudio/viewer_beta/viewer.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py index 6e6e02a294..d4fbf973b3 100644 --- a/nerfstudio/viewer_beta/viewer.py +++ b/nerfstudio/viewer_beta/viewer.py @@ -161,22 +161,40 @@ def __init__( with tabs.add_tab("Export", viser.Icon.PACKAGE_EXPORT): populate_export_tab(self.viser_server, self.control_panel, config_path) - def nested_folder_install(folder_labels: List[str], element: ViewerElement): + # Keep track of the pointers to generated GUI folders, because each generated folder holds a unique ID. + viewer_gui_folders = dict() + + def nested_folder_install(folder_labels: List[str], prev_labels: List[str], element: ViewerElement): if len(folder_labels) == 0: element.install(self.viser_server) # also rewire the hook to rerender prev_cb = element.cb_hook element.cb_hook = lambda element: [prev_cb(element), self._interrupt_render(element)] else: - with self.viser_server.add_gui_folder(folder_labels[0]): - nested_folder_install(folder_labels[1:], element) + # recursively create folders + # If the folder name is "Custom Elements/a/b", then: + # in the beginning: folder_path will be + # "/".join([] + ["Custom Elements"]) --> "Custom Elements" + # later, folder_path will be + # "/".join(["Custom Elements"] + ["a"]) --> "Custom Elements/a" + # "/".join(["Custom Elements", "a"] + ["b"]) --> "Custom Elements/a/b" + # --> the element will be installed in the folder "Custom Elements/a/b" + # + # Note that the gui_folder is created only when the folder is not in viewer_gui_folders, + # and we use the folder_path as the key to check if the folder is already created. + # Otherwise, use the existing folder as context manager. + folder_path = "/".join(prev_labels + [folder_labels[0]]) + if folder_path not in viewer_gui_folders: + viewer_gui_folders[folder_path] = self.viser_server.add_gui_folder(folder_labels[0]) + with viewer_gui_folders[folder_path]: + nested_folder_install(folder_labels[1:], prev_labels + [folder_labels[0]], element) with control_tab: self.viewer_elements = [] self.viewer_elements.extend(parse_object(pipeline, ViewerElement, "Custom Elements")) for param_path, element in self.viewer_elements: folder_labels = param_path.split("/")[:-1] - nested_folder_install(folder_labels, element) + nested_folder_install(folder_labels, [], element) # scrape the trainer/pipeline for any ViewerControl objects to initialize them self.viewer_controls: List[ViewerControl] = [ From 593950efec18bdfb28297f1c9d17248f9a5e2ac0 Mon Sep 17 00:00:00 2001 From: Gina Wu <42229107+ginazhouhuiwu@users.noreply.github.com> Date: Wed, 27 Sep 2023 10:45:58 -0700 Subject: [PATCH 020/101] Update colmap installation instructions with conda (#2473) --- docs/quickstart/custom_dataset.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/quickstart/custom_dataset.md b/docs/quickstart/custom_dataset.md index c826979360..723a97c91e 100644 --- a/docs/quickstart/custom_dataset.md +++ b/docs/quickstart/custom_dataset.md @@ -57,10 +57,16 @@ There are many ways to install COLMAP, unfortunately it can sometimes be a bit f ::::::{tab-set} :::::{tab-item} Linux -We recommend trying `apt`: +We recommend trying `conda`: ``` -sudo apt install colmap +conda install -c conda-forge colmap +``` + +Check that COLMAP 3.8 with CUDA is successfully installed: + +``` +colmap -h ``` If that doesn't work, you can try VKPG: From 169030199ee9e8c7e47a7f30bf542b749a4bdf06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ruilong=20Li=28=E6=9D=8E=E7=91=9E=E9=BE=99=29?= Date: Thu, 28 Sep 2023 18:14:40 -0700 Subject: [PATCH 021/101] Update doc.yml (#2479) --- .github/workflows/doc.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml index 51d07554db..0878bdea99 100644 --- a/.github/workflows/doc.yml +++ b/.github/workflows/doc.yml @@ -30,4 +30,4 @@ jobs: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: _build/ force_orphan: true - # cname: docs.nerf.studio + cname: docs.nerf.studio From 88d5b03a802c7ec6b7dc21078ec8237e942a268e Mon Sep 17 00:00:00 2001 From: Matthew Tancik Date: Thu, 28 Sep 2023 18:15:35 -0700 Subject: [PATCH 022/101] Remove rtd (#2478) --- .readthedocs.yaml | 37 ------------------------------------- 1 file changed, 37 deletions(-) delete mode 100644 .readthedocs.yaml diff --git a/.readthedocs.yaml b/.readthedocs.yaml deleted file mode 100644 index d66aefd16a..0000000000 --- a/.readthedocs.yaml +++ /dev/null @@ -1,37 +0,0 @@ -# .readthedocs.yaml -# Read the Docs configuration file -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details - -# Required -version: 2 - -# Set the version of Python and other tools you might need -build: - os: ubuntu-20.04 - tools: - python: '3.9' - # You can also specify other tool versions: - # nodejs: "16" - # rust: "1.55" - # golang: "1.17" - -# Build documentation in the docs/ directory with Sphinx -sphinx: - fail_on_warning: true - configuration: docs/conf.py - -# If using Sphinx, optionally build your docs in additional formats such as PDF -# formats: -# - pdf - -# Optionally declare the Python requirements required to build your docs -python: - install: - # Equivalent to 'pip install .' - - method: pip - path: . - # Equivalent to 'pip install .[docs]' - - method: pip - path: . - extra_requirements: - - docs From bbe31736f31ae3246579df9e3c256c1e81bef9af Mon Sep 17 00:00:00 2001 From: Matthew Tancik Date: Fri, 29 Sep 2023 10:48:51 -0700 Subject: [PATCH 023/101] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b6f05f9be0..7e1a0ad210 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ - + Documentation Status From c93b374d08b2c1e4f95032f61d55b5744e384dfd Mon Sep 17 00:00:00 2001 From: Brent Yi Date: Fri, 29 Sep 2023 12:33:19 -0700 Subject: [PATCH 024/101] Don't deploy docs from pull requests (#2482) --- .github/workflows/doc.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml index 0878bdea99..7b46b3429e 100644 --- a/.github/workflows/doc.yml +++ b/.github/workflows/doc.yml @@ -31,3 +31,4 @@ jobs: publish_dir: _build/ force_orphan: true cname: docs.nerf.studio + if: github.event_name != 'pull_request' From 7856aa006011404e0f0b4b77a6252f1ad5e8cc28 Mon Sep 17 00:00:00 2001 From: pierremerriaux-leddartech <42007976+pierremerriaux-leddartech@users.noreply.github.com> Date: Fri, 29 Sep 2023 19:49:20 -0400 Subject: [PATCH 025/101] fix:correction of PairPixelSampler in case of using masks with depth-nerfacto (#2477) fix:correction of PairPixelSampler in case of using masks --- nerfstudio/data/pixel_samplers.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/nerfstudio/data/pixel_samplers.py b/nerfstudio/data/pixel_samplers.py index 463d08294c..d7e76806d8 100644 --- a/nerfstudio/data/pixel_samplers.py +++ b/nerfstudio/data/pixel_samplers.py @@ -399,10 +399,11 @@ def sample_method( # pylint: disable=no-self-use mask: Optional[Tensor] = None, device: Union[torch.device, str] = "cpu", ) -> Int[Tensor, "batch_size 3"]: + rays_to_sample = self.rays_to_sample if isinstance(mask, Tensor): m = erode_mask(mask.permute(0, 3, 1, 2).float(), pixel_radius=self.radius) nonzero_indices = torch.nonzero(m[:, 0], as_tuple=False).to(device) - chosen_indices = random.sample(range(len(nonzero_indices)), k=self.rays_to_sample) + chosen_indices = random.sample(range(len(nonzero_indices)), k=rays_to_sample) indices = nonzero_indices[chosen_indices] else: rays_to_sample = self.rays_to_sample @@ -418,12 +419,12 @@ def sample_method( # pylint: disable=no-self-use ws = torch.randint(self.radius, image_width - self.radius, s, dtype=torch.long, device=device) indices = torch.concat((ns, hs, ws), dim=1) - pair_indices = torch.hstack( - ( - torch.zeros(rays_to_sample, 1, device=device, dtype=torch.long), - torch.randint(-self.radius, self.radius, (rays_to_sample, 2), device=device, dtype=torch.long), - ) + pair_indices = torch.hstack( + ( + torch.zeros(rays_to_sample, 1, device=device, dtype=torch.long), + torch.randint(-self.radius, self.radius, (rays_to_sample, 2), device=device, dtype=torch.long), ) - pair_indices += indices - indices = torch.hstack((indices, pair_indices)).view(rays_to_sample * 2, 3) + ) + pair_indices += indices + indices = torch.hstack((indices, pair_indices)).view(rays_to_sample * 2, 3) return indices From b78d0faa1289c234f644ff296ae1f5f64ad3a05d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Kulh=C3=A1nek?= Date: Sun, 1 Oct 2023 22:12:48 +0200 Subject: [PATCH 026/101] ns-render - render all images (#2459) * ns-render - render all images * Fix commonprefix->commonpath * Fix raw rendering --- nerfstudio/scripts/render.py | 208 ++++++++++++++++++++++++++++++++- nerfstudio/utils/eval_utils.py | 7 +- 2 files changed, 211 insertions(+), 4 deletions(-) diff --git a/nerfstudio/scripts/render.py b/nerfstudio/scripts/render.py index 99a7cae01d..ecb7961cc4 100644 --- a/nerfstudio/scripts/render.py +++ b/nerfstudio/scripts/render.py @@ -23,7 +23,8 @@ import struct import shutil import sys -from contextlib import ExitStack +import gzip +from contextlib import ExitStack, contextmanager from dataclasses import dataclass, field from pathlib import Path from typing import Any, Dict, List, Literal, Optional, Union @@ -52,8 +53,11 @@ get_path_from_json, get_spiral_path, ) -from nerfstudio.cameras.cameras import Cameras, CameraType -from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager +from nerfstudio.cameras.cameras import Cameras, CameraType, RayBundle +from nerfstudio.data.datasets.base_dataset import Dataset +from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager, VanillaDataManagerConfig +from nerfstudio.data.utils.dataloaders import FixedIndicesEvalDataloader +from nerfstudio.engine.trainer import TrainerConfig from nerfstudio.data.scene_box import OrientedBox from nerfstudio.model_components import renderers from nerfstudio.pipelines.base_pipeline import Pipeline @@ -585,11 +589,209 @@ def main(self) -> None: ) +@contextmanager +def _disable_datamanager_setup(cls): + """ + Disables setup_train or setup_eval for faster initialization. + """ + old_setup_train = getattr(cls, "setup_train") + old_setup_eval = getattr(cls, "setup_eval") + setattr(cls, "setup_train", lambda *args, **kwargs: None) + setattr(cls, "setup_eval", lambda *args, **kwargs: None) + yield cls + setattr(cls, "setup_train", old_setup_train) + setattr(cls, "setup_eval", old_setup_eval) + + +@dataclass +class DatasetRender(BaseRender): + """Render all images in the dataset.""" + + output_path: Path = Path("renders") + """Path to output video file.""" + data: Optional[Path] = None + """Override path to the dataset.""" + downscale_factor: Optional[float] = None + """Scaling factor to apply to the camera image resolution.""" + split: Literal["train", "val", "test", "train+test"] = "test" + """Split to render.""" + rendered_output_names: Optional[List[str]] = field(default_factory=lambda: None) + """Name of the renderer outputs to use. rgb, depth, raw-depth, gt-rgb etc. By default all outputs are rendered.""" + + def main(self): + config: TrainerConfig + + def update_config(config: TrainerConfig) -> TrainerConfig: + data_manager_config = config.pipeline.datamanager + assert isinstance(data_manager_config, VanillaDataManagerConfig) + data_manager_config.eval_image_indices = None + data_manager_config.eval_num_images_to_sample_from = -1 + data_manager_config.eval_num_times_to_repeat_images = -1 + data_manager_config.train_num_images_to_sample_from = -1 + data_manager_config.train_num_times_to_repeat_images = -1 + data_manager_config.data = self.data + if self.downscale_factor is not None: + assert hasattr(data_manager_config.dataparser, "downscale_factor") + setattr(data_manager_config.dataparser, "downscale_factor", self.downscale_factor) + return config + + config, pipeline, _, _ = eval_setup( + self.load_config, + eval_num_rays_per_chunk=self.eval_num_rays_per_chunk, + test_mode="inference", + update_config_callback=update_config, + ) + data_manager_config = config.pipeline.datamanager + assert isinstance(data_manager_config, VanillaDataManagerConfig) + + for split in self.split.split("+"): + datamanager: VanillaDataManager + dataset: Dataset + if split == "train": + with _disable_datamanager_setup(data_manager_config._target): # pylint: disable=protected-access + datamanager = data_manager_config.setup(test_mode="test", device=pipeline.device) + + dataset = datamanager.train_dataset + dataparser_outputs = getattr(dataset, "_dataparser_outputs", datamanager.train_dataparser_outputs) + else: + with _disable_datamanager_setup(data_manager_config._target): # pylint: disable=protected-access + datamanager = data_manager_config.setup(test_mode=split, device=pipeline.device) + + dataset = datamanager.eval_dataset + dataparser_outputs = getattr(dataset, "_dataparser_outputs", None) + if dataparser_outputs is None: + dataparser_outputs = datamanager.dataparser.get_dataparser_outputs(split=datamanager.test_split) + dataloader = FixedIndicesEvalDataloader( + input_dataset=dataset, + device=datamanager.device, + num_workers=datamanager.world_size * 4, + ) + images_root = Path(os.path.commonpath(dataparser_outputs.image_filenames)) + with Progress( + TextColumn(f":movie_camera: Rendering split {split} :movie_camera:"), + BarColumn(), + TaskProgressColumn( + text_format="[progress.percentage]{task.completed}/{task.total:>.0f}({task.percentage:>3.1f}%)", + show_speed=True, + ), + ItersPerSecColumn(suffix="fps"), + TimeRemainingColumn(elapsed_when_finished=False, compact=False), + TimeElapsedColumn(), + ) as progress: + for camera_idx, (ray_bundle, batch) in enumerate(progress.track(dataloader, total=len(dataset))): + ray_bundle: RayBundle + with torch.no_grad(): + outputs = pipeline.model.get_outputs_for_camera_ray_bundle(ray_bundle) + + gt_batch = batch.copy() + gt_batch["rgb"] = gt_batch.pop("image") + all_outputs = ( + list(outputs.keys()) + + [f"raw-{x}" for x in outputs.keys()] + + [f"gt-{x}" for x in gt_batch.keys()] + + [f"raw-gt-{x}" for x in gt_batch.keys()] + ) + rendered_output_names = self.rendered_output_names + if rendered_output_names is None: + rendered_output_names = ["gt-rgb"] + list(outputs.keys()) + for rendered_output_name in rendered_output_names: + if rendered_output_name not in all_outputs: + CONSOLE.rule("Error", style="red") + CONSOLE.print( + f"Could not find {rendered_output_name} in the model outputs", justify="center" + ) + CONSOLE.print( + f"Please set --rendered-output-name to one of: {all_outputs}", justify="center" + ) + sys.exit(1) + + is_raw = False + is_depth = rendered_output_name.find("depth") != -1 + image_name = f"{camera_idx:05d}" + + # Try to get the original filename + image_name = ( + dataparser_outputs.image_filenames[camera_idx].with_suffix("").relative_to(images_root) + ) + + output_path = self.output_path / split / rendered_output_name / image_name + output_path.parent.mkdir(exist_ok=True, parents=True) + + output_name = rendered_output_name + if output_name.startswith("raw-"): + output_name = output_name[4:] + is_raw = True + if output_name.startswith("gt-"): + output_name = output_name[3:] + output_image = gt_batch[output_name] + else: + output_image = outputs[output_name] + if is_depth: + # Divide by the dataparser scale factor + output_image.div_(dataparser_outputs.dataparser_scale) + else: + if output_name.startswith("gt-"): + output_name = output_name[3:] + output_image = gt_batch[output_name] + else: + output_image = outputs[output_name] + del output_name + + # Map to color spaces / numpy + if is_raw: + output_image = output_image.cpu().numpy() + elif is_depth: + output_image = ( + colormaps.apply_depth_colormap( + output_image, + accumulation=outputs["accumulation"], + near_plane=self.depth_near_plane, + far_plane=self.depth_far_plane, + colormap_options=self.colormap_options, + ) + .cpu() + .numpy() + ) + else: + output_image = ( + colormaps.apply_colormap( + image=output_image, + colormap_options=self.colormap_options, + ) + .cpu() + .numpy() + ) + + # Save to file + if is_raw: + with gzip.open(output_path.with_suffix(".npy.gz"), "wb") as f: + np.save(f, output_image) + elif self.image_format == "png": + media.write_image(output_path.with_suffix(".png"), output_image, fmt="png") + elif self.image_format == "jpeg": + media.write_image( + output_path.with_suffix(".jpg"), output_image, fmt="jpeg", quality=self.jpeg_quality + ) + else: + raise ValueError(f"Unknown image format {self.image_format}") + + table = Table( + title=None, + show_header=False, + box=box.MINIMAL, + title_style=style.Style(bold=True), + ) + for split in self.split.split("+"): + table.add_row(f"Outputs {split}", str(self.output_path / split)) + CONSOLE.print(Panel(table, title="[bold][green]:tada: Render on split {} Complete :tada:[/bold]", expand=False)) + + Commands = tyro.conf.FlagConversionOff[ Union[ Annotated[RenderCameraPath, tyro.conf.subcommand(name="camera-path")], Annotated[RenderInterpolated, tyro.conf.subcommand(name="interpolate")], Annotated[SpiralRender, tyro.conf.subcommand(name="spiral")], + Annotated[DatasetRender, tyro.conf.subcommand(name="dataset")], ] ] diff --git a/nerfstudio/utils/eval_utils.py b/nerfstudio/utils/eval_utils.py index 13948678bb..7e04005368 100644 --- a/nerfstudio/utils/eval_utils.py +++ b/nerfstudio/utils/eval_utils.py @@ -20,7 +20,7 @@ import os import sys from pathlib import Path -from typing import Literal, Optional, Tuple +from typing import Literal, Optional, Tuple, Callable import torch import yaml @@ -69,6 +69,7 @@ def eval_setup( config_path: Path, eval_num_rays_per_chunk: Optional[int] = None, test_mode: Literal["test", "val", "inference"] = "test", + update_config_callback: Optional[Callable[[TrainerConfig], TrainerConfig]] = None, ) -> Tuple[TrainerConfig, Pipeline, Path, int]: """Shared setup for loading a saved pipeline for evaluation. @@ -79,6 +80,7 @@ def eval_setup( 'val': loads train/val datasets into memory 'test': loads train/test dataset into memory 'inference': does not load any dataset into memory + update_config_callback: Callback to update the config before loading the pipeline Returns: @@ -92,6 +94,9 @@ def eval_setup( if eval_num_rays_per_chunk: config.pipeline.model.eval_num_rays_per_chunk = eval_num_rays_per_chunk + if update_config_callback is not None: + config = update_config_callback(config) + # load checkpoints from wherever they were saved # TODO: expose the ability to choose an arbitrary checkpoint config.load_dir = config.get_checkpoint_dir() From c2f5e68d548b66114a3dab0ad7707906a47126c9 Mon Sep 17 00:00:00 2001 From: Reza Kermani Date: Sun, 1 Oct 2023 23:31:02 -0400 Subject: [PATCH 027/101] fixing broken links in README (#2483) fixing broken links --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 7e1a0ad210..a853192ad4 100644 --- a/README.md +++ b/README.md @@ -242,15 +242,15 @@ Using an existing dataset is great, but likely you want to use your own data! We | Data | Capture Device | Requirements | `ns-process-data` Speed | | ---------------------------------------------------------------------------------------------------- | -------------- | ----------------------------------------------------------------- | ----------------------- | -| πŸ“· [Images](https://docs.nerf.studio/en/latest/quickstart/custom_dataset.html#images-and-video) | Any | [COLMAP](https://colmap.github.io/install.html) | 🐒 | -| πŸ“Ή [Video](https://docs.nerf.studio/en/latest/quickstart/custom_dataset.html#images-and-video) | Any | [COLMAP](https://colmap.github.io/install.html) | 🐒 | -| 🌎 [360 Data](https://docs.nerf.studio/en/latest/quickstart/custom_dataset.html#360_data) | Any | [COLMAP](https://colmap.github.io/install.html) | 🐒 | -| πŸ“± [Polycam](https://docs.nerf.studio/en/latest/quickstart/custom_dataset.html#polycam-capture) | IOS with LiDAR | [Polycam App](https://poly.cam/) | πŸ‡ | -| πŸ“± [KIRI Engine](https://docs.nerf.studio/en/latest/quickstart/custom_dataset.html#kiri-capture) | IOS or Android | [KIRI Engine App](https://www.kiriengine.com/) | πŸ‡ | -| πŸ“± [Record3D](https://docs.nerf.studio/en/latest/quickstart/custom_dataset.html#record3d-capture) | IOS with LiDAR | [Record3D app](https://record3d.app/) | πŸ‡ | -| πŸ–₯ [Metashape](https://docs.nerf.studio/en/latest/quickstart/custom_dataset.html#metashape) | Any | [Metashape](https://www.agisoft.com/) | πŸ‡ | -| πŸ–₯ [RealityCapture](https://docs.nerf.studio/en/latest/quickstart/custom_dataset.html#realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | πŸ‡ | -| πŸ›  [Custom](https://docs.nerf.studio/en/latest/quickstart/data_conventions.html) | Any | Camera Poses | πŸ‡ | +| πŸ“· [Images](https://docs.nerf.studio/quickstart/custom_dataset.html#images-or-video) | Any | [COLMAP](https://colmap.github.io/install.html) | 🐒 | +| πŸ“Ή [Video](https://docs.nerf.studio/quickstart/custom_dataset.html#images-or-video) | Any | [COLMAP](https://colmap.github.io/install.html) | 🐒 | +| 🌎 [360 Data](https://docs.nerf.studio/quickstart/custom_dataset.html#data-equirectangular) | Any | [COLMAP](https://colmap.github.io/install.html) | 🐒 | +| πŸ“± [Polycam](https://docs.nerf.studio/quickstart/custom_dataset.html#polycam-capture) | IOS with LiDAR | [Polycam App](https://poly.cam/) | πŸ‡ | +| πŸ“± [KIRI Engine](https://docs.nerf.studio/quickstart/custom_dataset.html#kiri-engine-capture) | IOS or Android | [KIRI Engine App](https://www.kiriengine.com/) | πŸ‡ | +| πŸ“± [Record3D](https://docs.nerf.studio/quickstart/custom_dataset.html#record3d-capture) | IOS with LiDAR | [Record3D app](https://record3d.app/) | πŸ‡ | +| πŸ–₯ [Metashape](https://docs.nerf.studio/quickstart/custom_dataset.html#metashape) | Any | [Metashape](https://www.agisoft.com/) | πŸ‡ | +| πŸ–₯ [RealityCapture](https://docs.nerf.studio/quickstart/custom_dataset.html#realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | πŸ‡ | +| πŸ›  [Custom](https://docs.nerf.studio/quickstart/data_conventions.html) | Any | Camera Poses | πŸ‡ | ## 5. Advanced Options From 0d746b136347ed7b2eae6b9bd44df66f9b8edf34 Mon Sep 17 00:00:00 2001 From: Maxim Bonnaerens Date: Mon, 2 Oct 2023 23:07:34 +0200 Subject: [PATCH 028/101] Fourier Feature encodings and polyhedron encodings (#2463) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fourier Feature encodings and polyhedron encodings Rework RFFEncoding to be a subclass of the more general Fourier Feature Encodings and introduce Polyhedron encodings as introduced in mipnerf360. * b_matrix -> basis * Add typing * fighting pyright * use scale argument * continue the fight * ignore * ignore em all * add docstring and rename generate_basis to generate_polyhedron_basis * Try to please pyright with assert * Immediately allocate tensor on correct device * private functions and docstrings update * doc fix continued --------- Co-authored-by: JonΓ‘Ε‘ KulhΓ‘nek --- nerfstudio/field_components/encodings.py | 98 +++++++++++--- nerfstudio/utils/math.py | 161 ++++++++++++++++++++++- 2 files changed, 237 insertions(+), 22 deletions(-) diff --git a/nerfstudio/field_components/encodings.py b/nerfstudio/field_components/encodings.py index 437ee805fe..4ba28f0c4e 100644 --- a/nerfstudio/field_components/encodings.py +++ b/nerfstudio/field_components/encodings.py @@ -27,9 +27,13 @@ from torch import Tensor, nn from nerfstudio.field_components.base_field_component import FieldComponent -from nerfstudio.utils.math import components_from_spherical_harmonics, expected_sin +from nerfstudio.utils.external import TCNN_EXISTS, tcnn +from nerfstudio.utils.math import ( + components_from_spherical_harmonics, + expected_sin, + generate_polyhedron_basis, +) from nerfstudio.utils.printing import print_tcnn_speed_warning -from nerfstudio.utils.external import tcnn, TCNN_EXISTS class Encoding(FieldComponent): @@ -153,7 +157,7 @@ def pytorch_fwd( Output values will be between -1 and 1 """ scaled_in_tensor = 2 * torch.pi * in_tensor # scale to [0, 2pi] - freqs = 2 ** torch.linspace(self.min_freq, self.max_freq, self.num_frequencies).to(in_tensor.device) + freqs = 2 ** torch.linspace(self.min_freq, self.max_freq, self.num_frequencies, device=in_tensor.device) scaled_inputs = scaled_in_tensor[..., None] * freqs # [..., "input_dim", "num_scales"] scaled_inputs = scaled_inputs.view(*scaled_inputs.shape[:-2], -1) # [..., "input_dim" * "num_scales"] @@ -178,34 +182,40 @@ def forward( return self.pytorch_fwd(in_tensor, covs) -class RFFEncoding(Encoding): - """Random Fourier Feature encoding. Supports integrated encodings. +class FFEncoding(Encoding): + """Fourier Feature encoding. Supports integrated encodings. Args: in_dim: Input dimension of tensor - num_frequencies: Number of encoding frequencies - scale: Std of Gaussian to sample frequencies. Must be greater than zero + basis: Basis matrix from which to construct the Fourier features. + num_frequencies: Number of encoded frequencies per axis + min_freq_exp: Minimum frequency exponent + max_freq_exp: Maximum frequency exponent include_input: Append the input coordinate to the encoding """ - def __init__(self, in_dim: int, num_frequencies: int, scale: float, include_input: bool = False) -> None: + def __init__( + self, + in_dim: int, + basis: Float[Tensor, "M N"], + num_frequencies: int, + min_freq_exp: float, + max_freq_exp: float, + include_input: bool = False, + ) -> None: super().__init__(in_dim) - self.num_frequencies = num_frequencies - if not scale > 0: - raise ValueError("RFF encoding scale should be greater than zero") - self.scale = scale - if self.in_dim is None: - raise ValueError("Input dimension has not been set") - b_matrix = torch.normal(mean=0, std=self.scale, size=(self.in_dim, self.num_frequencies)) - self.register_buffer(name="b_matrix", tensor=b_matrix) + self.min_freq = min_freq_exp + self.max_freq = max_freq_exp + self.register_buffer(name="b_matrix", tensor=basis) self.include_input = include_input def get_out_dim(self) -> int: - out_dim = self.num_frequencies * 2 + if self.in_dim is None: + raise ValueError("Input dimension has not been set") + assert isinstance(self.b_matrix, Tensor) + out_dim = self.b_matrix.shape[1] * self.num_frequencies * 2 if self.include_input: - if self.in_dim is None: - raise ValueError("Input dimension has not been set") out_dim += self.in_dim return out_dim @@ -214,7 +224,7 @@ def forward( in_tensor: Float[Tensor, "*bs input_dim"], covs: Optional[Float[Tensor, "*bs input_dim input_dim"]] = None, ) -> Float[Tensor, "*bs output_dim"]: - """Calculates RFF encoding. If covariances are provided the encodings will be integrated as proposed + """Calculates FF encoding. If covariances are provided the encodings will be integrated as proposed in mip-NeRF. Args: @@ -226,11 +236,16 @@ def forward( """ scaled_in_tensor = 2 * torch.pi * in_tensor # scale to [0, 2pi] scaled_inputs = scaled_in_tensor @ self.b_matrix # [..., "num_frequencies"] + freqs = 2 ** torch.linspace(self.min_freq, self.max_freq, self.num_frequencies, device=in_tensor.device) + scaled_inputs = scaled_inputs[..., None] * freqs # [..., "input_dim", "num_scales"] + scaled_inputs = scaled_inputs.view(*scaled_inputs.shape[:-2], -1) # [..., "input_dim" * "num_scales"] if covs is None: encoded_inputs = torch.sin(torch.cat([scaled_inputs, scaled_inputs + torch.pi / 2.0], dim=-1)) else: input_var = torch.sum((covs @ self.b_matrix) * self.b_matrix, -2) + input_var = input_var[..., :, None] * freqs[None, :] ** 2 + input_var = input_var.reshape((*input_var.shape[:-2], -1)) encoded_inputs = expected_sin( torch.cat([scaled_inputs, scaled_inputs + torch.pi / 2.0], dim=-1), torch.cat(2 * [input_var], dim=-1) ) @@ -241,6 +256,49 @@ def forward( return encoded_inputs +class RFFEncoding(FFEncoding): + """Random Fourier Feature encoding. Supports integrated encodings. + + Args: + in_dim: Input dimension of tensor + num_frequencies: Number of encoding frequencies + scale: Std of Gaussian to sample frequencies. Must be greater than zero + include_input: Append the input coordinate to the encoding + """ + + def __init__(self, in_dim: int, num_frequencies: int, scale: float, include_input: bool = False) -> None: + if not scale > 0: + raise ValueError("RFF encoding scale should be greater than zero") + + b_matrix = torch.normal(mean=0, std=scale, size=(in_dim, num_frequencies)) + super().__init__(in_dim, b_matrix, 1, 0.0, 0.0, include_input) + + +class PolyhedronFFEncoding(FFEncoding): + """Fourier Feature encoding using polyhedron basis as proposed by mip-NeRF360. Supports integrated encodings. + + Args: + num_frequencies: Number of encoded frequencies per axis + min_freq_exp: Minimum frequency exponent + max_freq_exp: Maximum frequency exponent + basis_shape: Shape of polyhedron basis. Either "octahedron" or "icosahedron" + basis_subdivisions: Number of times to tesselate the polyhedron. + include_input: Append the input coordinate to the encoding + """ + + def __init__( + self, + num_frequencies: int, + min_freq_exp: float, + max_freq_exp: float, + basis_shape: Literal["octahedron", "icosahedron"] = "octahedron", + basis_subdivisions: int = 1, + include_input: bool = False, + ) -> None: + basis_t = generate_polyhedron_basis(basis_shape, basis_subdivisions).T + super().__init__(3, basis_t, num_frequencies, min_freq_exp, max_freq_exp, include_input) + + class HashEncoding(Encoding): """Hash encoding diff --git a/nerfstudio/utils/math.py b/nerfstudio/utils/math.py index 8567a8b289..0ba9e6a51c 100644 --- a/nerfstudio/utils/math.py +++ b/nerfstudio/utils/math.py @@ -14,6 +14,8 @@ """ Math Helper Functions """ +import itertools +import math from dataclasses import dataclass from typing import Literal, Tuple @@ -195,7 +197,6 @@ def expected_sin(x_means: torch.Tensor, x_vars: torch.Tensor) -> torch.Tensor: Returns: torch.Tensor: The expected value of sin. """ - return torch.exp(-0.5 * x_vars) * torch.sin(x_means) @@ -360,4 +361,160 @@ def normalized_depth_scale_and_shift( shift[valid] = (-a_01[valid] * b_0[valid] + a_00[valid] * b_1[valid]) / det[valid] return scale, shift - return scale, shift + + +def columnwise_squared_l2_distance( + x: Float[Tensor, "*M N"], + y: Float[Tensor, "*M N"], +) -> Float[Tensor, "N N"]: + """Compute the squared Euclidean distance between all pairs of columns. + Adapted from https://github.com/google-research/multinerf/blob/5b4d4f64608ec8077222c52fdf814d40acc10bc1/internal/geopoly.py + + Args: + x: tensor of floats, with shape [M, N]. + y: tensor of floats, with shape [M, N]. + Returns: + sq_dist: tensor of floats, with shape [N, N]. + """ + # Use the fact that ||x - y||^2 == ||x||^2 + ||y||^2 - 2 x^T y. + sq_norm_x = torch.sum(x**2, 0) + sq_norm_y = torch.sum(y**2, 0) + sq_dist = sq_norm_x[:, None] + sq_norm_y[None, :] - 2 * x.T @ y + return sq_dist + + +def _compute_tesselation_weights(v: int) -> Tensor: + """Tesselate the vertices of a triangle by a factor of `v`. + Adapted from https://github.com/google-research/multinerf/blob/5b4d4f64608ec8077222c52fdf814d40acc10bc1/internal/geopoly.py + + Args: + v: int, the factor of the tesselation (v==1 is a no-op to the triangle). + + Returns: + weights: tesselated weights. + """ + if v < 1: + raise ValueError(f"v {v} must be >= 1") + int_weights = [] + for i in range(v + 1): + for j in range(v + 1 - i): + int_weights.append((i, j, v - (i + j))) + int_weights = torch.FloatTensor(int_weights) + weights = int_weights / v # Barycentric weights. + return weights + + +def _tesselate_geodesic( + vertices: Float[Tensor, "N 3"], faces: Float[Tensor, "M 3"], v: int, eps: float = 1e-4 +) -> Tensor: + """Tesselate the vertices of a geodesic polyhedron. + + Adapted from https://github.com/google-research/multinerf/blob/5b4d4f64608ec8077222c52fdf814d40acc10bc1/internal/geopoly.py + + Args: + vertices: tensor of floats, the vertex coordinates of the geodesic. + faces: tensor of ints, the indices of the vertices of base_verts that + constitute eachface of the polyhedra. + v: int, the factor of the tesselation (v==1 is a no-op). + eps: float, a small value used to determine if two vertices are the same. + + Returns: + verts: a tensor of floats, the coordinates of the tesselated vertices. + """ + tri_weights = _compute_tesselation_weights(v) + + verts = [] + for face in faces: + new_verts = torch.matmul(tri_weights, vertices[face, :]) + new_verts /= torch.sqrt(torch.sum(new_verts**2, 1, keepdim=True)) + verts.append(new_verts) + verts = torch.concatenate(verts, 0) + + sq_dist = columnwise_squared_l2_distance(verts.T, verts.T) + assignment = torch.tensor([torch.min(torch.argwhere(d <= eps)) for d in sq_dist]) + unique = torch.unique(assignment) + verts = verts[unique, :] + return verts + + +def generate_polyhedron_basis( + basis_shape: Literal["icosahedron", "octahedron"], + angular_tesselation: int, + remove_symmetries: bool = True, + eps: float = 1e-4, +) -> Tensor: + """Generates a 3D basis by tesselating a geometric polyhedron. + Basis is used to construct Fourier features for positional encoding. + See Mip-Nerf360 paper: https://arxiv.org/abs/2111.12077 + Adapted from https://github.com/google-research/multinerf/blob/5b4d4f64608ec8077222c52fdf814d40acc10bc1/internal/geopoly.py + + Args: + base_shape: string, the name of the starting polyhedron, must be either + 'icosahedron' or 'octahedron'. + angular_tesselation: int, the number of times to tesselate the polyhedron, + must be >= 1 (a value of 1 is a no-op to the polyhedron). + remove_symmetries: bool, if True then remove the symmetric basis columns, + which is usually a good idea because otherwise projections onto the basis + will have redundant negative copies of each other. + eps: float, a small number used to determine symmetries. + + Returns: + basis: a matrix with shape [3, n]. + """ + if basis_shape == "icosahedron": + a = (math.sqrt(5) + 1) / 2 + verts = torch.FloatTensor( + [ + (-1, 0, a), + (1, 0, a), + (-1, 0, -a), + (1, 0, -a), + (0, a, 1), + (0, a, -1), + (0, -a, 1), + (0, -a, -1), + (a, 1, 0), + (-a, 1, 0), + (a, -1, 0), + (-a, -1, 0), + ] + ) / math.sqrt(a + 2) + faces = torch.tensor( + [ + (0, 4, 1), + (0, 9, 4), + (9, 5, 4), + (4, 5, 8), + (4, 8, 1), + (8, 10, 1), + (8, 3, 10), + (5, 3, 8), + (5, 2, 3), + (2, 7, 3), + (7, 10, 3), + (7, 6, 10), + (7, 11, 6), + (11, 0, 6), + (0, 1, 6), + (6, 1, 10), + (9, 0, 11), + (9, 11, 2), + (9, 2, 5), + (7, 2, 11), + ] + ) + verts = _tesselate_geodesic(verts, faces, angular_tesselation) + elif basis_shape == "octahedron": + verts = torch.FloatTensor([(0, 0, -1), (0, 0, 1), (0, -1, 0), (0, 1, 0), (-1, 0, 0), (1, 0, 0)]) + corners = torch.FloatTensor(list(itertools.product([-1, 1], repeat=3))) + pairs = torch.argwhere(columnwise_squared_l2_distance(corners.T, verts.T) == 2) + faces, _ = torch.sort(torch.reshape(pairs[:, 1], [3, -1]).T, 1) + verts = _tesselate_geodesic(verts, faces, angular_tesselation) + + if remove_symmetries: + # Remove elements of `verts` that are reflections of each other. + match = columnwise_squared_l2_distance(verts.T, -verts.T) < eps + verts = verts[torch.any(torch.triu(match), 1), :] + + basis = verts.flip(-1) + return basis From afcc8434d031d85e609c5a6491998cc333d55138 Mon Sep 17 00:00:00 2001 From: Rohan Mathur Date: Tue, 3 Oct 2023 00:20:58 -0700 Subject: [PATCH 029/101] fixed link to viser to be functional link cause previous 404ed (#2486) --- docs/developer_guides/viewer/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/developer_guides/viewer/index.md b/docs/developer_guides/viewer/index.md index 93204a2035..9df5c60d5a 100644 --- a/docs/developer_guides/viewer/index.md +++ b/docs/developer_guides/viewer/index.md @@ -16,7 +16,7 @@ local_viewer We thank the authors and contributors to the following repos, which we've started, used, and modified for our use-cases. -- [Viser](https://github.com/brentyi/viser/tree/main/viser) - made by [Brent Yi](https://github.com/brentyi) +- [Viser](https://github.com/brentyi/viser/) - made by [Brent Yi](https://github.com/brentyi) - [meshcat-python](https://github.com/rdeits/meshcat-python) - made by [Robin Deits](https://github.com/rdeits) - [meshcat](https://github.com/rdeits/meshcat) - made by [Robin Deits](https://github.com/rdeits) - [ThreeJS](https://threejs.org/) From a484d255b4f71c55915afcfc52d90ec88963779f Mon Sep 17 00:00:00 2001 From: Boris Feld Date: Wed, 4 Oct 2023 21:02:46 +0200 Subject: [PATCH 030/101] Add support of using both Comet and Viewer at the same time (#2488) --- nerfstudio/configs/experiment_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nerfstudio/configs/experiment_config.py b/nerfstudio/configs/experiment_config.py index 42cc4399ec..d4b3d4de66 100644 --- a/nerfstudio/configs/experiment_config.py +++ b/nerfstudio/configs/experiment_config.py @@ -83,7 +83,7 @@ class ExperimentConfig(InstantiateConfig): def is_viewer_enabled(self) -> bool: """Checks if a viewer is enabled.""" - return ("viewer" == self.vis) | ("viewer+wandb" == self.vis) | ("viewer+tensorboard" == self.vis) + return self.vis in ("viewer", "viewer+wandb", "viewer+tensorboard", "viewer+comet") def is_viewer_beta_enabled(self) -> bool: """Checks if a viewer beta is enabled.""" From 242a1cf4fe460df4d0ed9a47b2376134eaa43a00 Mon Sep 17 00:00:00 2001 From: Ji Shi Date: Mon, 9 Oct 2023 03:59:58 +0800 Subject: [PATCH 031/101] Fix DatasetRender to override dataset path only when provided (#2494) --- nerfstudio/scripts/render.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nerfstudio/scripts/render.py b/nerfstudio/scripts/render.py index ecb7961cc4..7f6102428f 100644 --- a/nerfstudio/scripts/render.py +++ b/nerfstudio/scripts/render.py @@ -629,7 +629,8 @@ def update_config(config: TrainerConfig) -> TrainerConfig: data_manager_config.eval_num_times_to_repeat_images = -1 data_manager_config.train_num_images_to_sample_from = -1 data_manager_config.train_num_times_to_repeat_images = -1 - data_manager_config.data = self.data + if self.data is not None: + data_manager_config.data = self.data if self.downscale_factor is not None: assert hasattr(data_manager_config.dataparser, "downscale_factor") setattr(data_manager_config.dataparser, "downscale_factor", self.downscale_factor) From d325593e0229ae741920416ebe1812fc0ac4af50 Mon Sep 17 00:00:00 2001 From: Matthew Tancik Date: Mon, 9 Oct 2023 04:35:08 +0200 Subject: [PATCH 032/101] Remove unused config param (#2495) Remove unused config. --- docs/developer_guides/pipelines/datamanagers.md | 2 -- nerfstudio/data/datamanagers/base_datamanager.py | 2 -- nerfstudio/scripts/render.py | 1 - nerfstudio/utils/eval_utils.py | 3 --- tests/data/configs/test_config1.yml | 2 -- tests/data/configs/test_config2.yml | 2 -- 6 files changed, 12 deletions(-) diff --git a/docs/developer_guides/pipelines/datamanagers.md b/docs/developer_guides/pipelines/datamanagers.md index 32c55b63e9..78aec242da 100644 --- a/docs/developer_guides/pipelines/datamanagers.md +++ b/docs/developer_guides/pipelines/datamanagers.md @@ -62,8 +62,6 @@ class VanillaDataManagerConfig(InstantiateConfig): """number of rays per batch to use per eval iteration""" eval_num_images_to_sample_from: int = -1 """number of images to sample during eval iteration""" - eval_image_indices: Optional[Tuple[int, ...]] = (0,) - """specifies the image indices to use during eval; if None, uses all""" camera_optimizer: CameraOptimizerConfig = CameraOptimizerConfig() """specifies the camera pose optimizer used during training""" ``` diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py index 52fd0af0ff..cb63202dff 100644 --- a/nerfstudio/data/datamanagers/base_datamanager.py +++ b/nerfstudio/data/datamanagers/base_datamanager.py @@ -335,8 +335,6 @@ class VanillaDataManagerConfig(DataManagerConfig): eval_num_times_to_repeat_images: int = -1 """When not evaluating on all images, number of iterations before picking new images. If -1, never pick new images.""" - eval_image_indices: Optional[Tuple[int, ...]] = (0,) - """Specifies the image indices to use during eval; if None, uses all.""" camera_optimizer: CameraOptimizerConfig = CameraOptimizerConfig() """Specifies the camera pose optimizer used during training. Helpful if poses are noisy, such as for data from Record3D.""" diff --git a/nerfstudio/scripts/render.py b/nerfstudio/scripts/render.py index 7f6102428f..307bb5adba 100644 --- a/nerfstudio/scripts/render.py +++ b/nerfstudio/scripts/render.py @@ -624,7 +624,6 @@ def main(self): def update_config(config: TrainerConfig) -> TrainerConfig: data_manager_config = config.pipeline.datamanager assert isinstance(data_manager_config, VanillaDataManagerConfig) - data_manager_config.eval_image_indices = None data_manager_config.eval_num_images_to_sample_from = -1 data_manager_config.eval_num_times_to_repeat_images = -1 data_manager_config.train_num_images_to_sample_from = -1 diff --git a/nerfstudio/utils/eval_utils.py b/nerfstudio/utils/eval_utils.py index 7e04005368..0b7306791c 100644 --- a/nerfstudio/utils/eval_utils.py +++ b/nerfstudio/utils/eval_utils.py @@ -26,7 +26,6 @@ import yaml from nerfstudio.configs.method_configs import all_methods -from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManagerConfig from nerfstudio.engine.trainer import TrainerConfig from nerfstudio.pipelines.base_pipeline import Pipeline from nerfstudio.utils.rich_utils import CONSOLE @@ -100,8 +99,6 @@ def eval_setup( # load checkpoints from wherever they were saved # TODO: expose the ability to choose an arbitrary checkpoint config.load_dir = config.get_checkpoint_dir() - if isinstance(config.pipeline.datamanager, VanillaDataManagerConfig): - config.pipeline.datamanager.eval_image_indices = None # setup pipeline (which includes the DataManager) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") diff --git a/tests/data/configs/test_config1.yml b/tests/data/configs/test_config1.yml index 3cf0147603..67ce18cda1 100644 --- a/tests/data/configs/test_config1.yml +++ b/tests/data/configs/test_config1.yml @@ -111,8 +111,6 @@ pipeline: !!python/object:nerfstudio.pipelines.base_pipeline.VanillaPipelineConf - data - varun - cheezit-scaled - eval_image_indices: !!python/tuple - - 0 eval_num_images_to_sample_from: -1 eval_num_rays_per_batch: 4096 eval_num_times_to_repeat_images: -1 diff --git a/tests/data/configs/test_config2.yml b/tests/data/configs/test_config2.yml index c1ccda3791..90737789fd 100644 --- a/tests/data/configs/test_config2.yml +++ b/tests/data/configs/test_config2.yml @@ -105,8 +105,6 @@ pipeline: !!python/object:nerfstudio.pipelines.base_pipeline.VanillaPipelineConf - data - varun - cheezit-scaled - eval_image_indices: !!python/tuple - - 0 eval_num_images_to_sample_from: -1 eval_num_rays_per_batch: 4096 eval_num_times_to_repeat_images: -1 From 79297c7c61749e1a8e27966f90106bab4aa53efc Mon Sep 17 00:00:00 2001 From: Ruoyu Wang Date: Mon, 9 Oct 2023 12:22:53 +0800 Subject: [PATCH 033/101] (minor) Align upsampling in TensorCPEncoding with TensorVMEncoding (#2493) * (minor) Align upsampling in TensorCPEncoding with TensorVMEncoding * Fix black --------- Co-authored-by: Matthew Tancik Co-authored-by: Matthew Tancik --- nerfstudio/field_components/encodings.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/nerfstudio/field_components/encodings.py b/nerfstudio/field_components/encodings.py index 4ba28f0c4e..ee2aac93c8 100644 --- a/nerfstudio/field_components/encodings.py +++ b/nerfstudio/field_components/encodings.py @@ -482,9 +482,8 @@ def upsample_grid(self, resolution: int) -> None: resolution: Target resolution. """ - self.line_coef.data = F.interpolate( - self.line_coef.data, size=(resolution, 1), mode="bilinear", align_corners=True - ) + line_coef = F.interpolate(self.line_coef.data, size=(resolution, 1), mode="bilinear", align_corners=True) + self.line_coef = torch.nn.Parameter(line_coef) self.resolution = resolution From 81db1808a72b798d0e314d5bdb6abb25bd9c71c8 Mon Sep 17 00:00:00 2001 From: Jami Pekkanen Date: Mon, 9 Oct 2023 07:26:10 +0300 Subject: [PATCH 034/101] Add documentation about unknown depth pixel convention (#2490) Updated the documentation to explicate that zero value in depth images is treated as unknown depth. This behavior is defined in https://github.com/nerfstudio-project/nerfstudio/blob/a484d255b4f71c55915afcfc52d90ec88963779f/nerfstudio/model_components/losses.py#L242 Co-authored-by: Matthew Tancik --- docs/quickstart/data_conventions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/quickstart/data_conventions.md b/docs/quickstart/data_conventions.md index 34324e30e0..2e9e76effa 100644 --- a/docs/quickstart/data_conventions.md +++ b/docs/quickstart/data_conventions.md @@ -82,7 +82,7 @@ For a transform matrix, the first 3 columns are the +X, +Y, and +Z defining the ### Depth images -To train with depth supervision, you can also provide a `depth_file_path` for each frame in your `transforms.json` and use one of the methods that support additional depth losses (e.g., depth-nerfacto). The depths are assumed to be 16-bit or 32-bit and to be in millimeters to remain consistent with [Polyform](https://github.com/PolyCam/polyform). You can adjust this scaling factor using the `depth_unit_scale_factor` parameter in `NerfstudioDataParserConfig`. Note that by default, we resize the depth images to match the shape of the RGB images. +To train with depth supervision, you can also provide a `depth_file_path` for each frame in your `transforms.json` and use one of the methods that support additional depth losses (e.g., depth-nerfacto). The depths are assumed to be 16-bit or 32-bit and to be in millimeters to remain consistent with [Polyform](https://github.com/PolyCam/polyform). Zero-value in the depth image is treated as unknown depth. You can adjust this scaling factor using the `depth_unit_scale_factor` parameter in `NerfstudioDataParserConfig`. Note that by default, we resize the depth images to match the shape of the RGB images. ```json { From 1b85fb528b087f6ef9939addc4693f6a6e507219 Mon Sep 17 00:00:00 2001 From: Justin Kerr Date: Tue, 10 Oct 2023 13:09:09 -0700 Subject: [PATCH 035/101] patch for mkdirs on camera path json filepath (#2502) fix camera path json mkdir in viewer beta --- nerfstudio/viewer_beta/render_panel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nerfstudio/viewer_beta/render_panel.py b/nerfstudio/viewer_beta/render_panel.py index 7342b0f388..690027f8d0 100644 --- a/nerfstudio/viewer_beta/render_panel.py +++ b/nerfstudio/viewer_beta/render_panel.py @@ -625,6 +625,7 @@ def _(event: viser.GuiEvent) -> None: # now write the json file json_outfile = datapath / "camera_paths" / f"{render_name_text.value}.json" + json_outfile.parent.mkdir(parents=True, exist_ok=True) with open(json_outfile.absolute(), "w") as outfile: json.dump(json_data, outfile) # now show the command From e694f341a7ad49e2f6842b69cbaa95c370dd7a8a Mon Sep 17 00:00:00 2001 From: Justin Kerr Date: Tue, 10 Oct 2023 14:52:25 -0700 Subject: [PATCH 036/101] Move camera optimization out of datamanager and parallelize dataloading (#2092) --- nerfstudio/cameras/camera_optimizers.py | 99 ++++-- nerfstudio/configs/method_configs.py | 105 +++--- .../data/datamanagers/base_datamanager.py | 46 +-- .../data/datamanagers/parallel_datamanager.py | 321 ++++++++++++++++++ nerfstudio/data/pixel_samplers.py | 4 +- nerfstudio/engine/optimizers.py | 19 ++ nerfstudio/engine/trainer.py | 7 - nerfstudio/fields/nerfacto_field.py | 1 + nerfstudio/model_components/ray_generators.py | 7 +- nerfstudio/models/nerfacto.py | 18 +- nerfstudio/models/tensorf.py | 15 + nerfstudio/pipelines/base_pipeline.py | 15 +- nerfstudio/utils/tensor_dataclass.py | 8 + 13 files changed, 531 insertions(+), 134 deletions(-) create mode 100644 nerfstudio/data/datamanagers/parallel_datamanager.py diff --git a/nerfstudio/cameras/camera_optimizers.py b/nerfstudio/cameras/camera_optimizers.py index df8296ea1a..7b76488842 100644 --- a/nerfstudio/cameras/camera_optimizers.py +++ b/nerfstudio/cameras/camera_optimizers.py @@ -23,19 +23,16 @@ from typing import Literal, Optional, Type, Union import torch -import tyro from jaxtyping import Float, Int from torch import Tensor, nn from typing_extensions import assert_never from nerfstudio.cameras.lie_groups import exp_map_SE3, exp_map_SO3xR3 +from nerfstudio.cameras.rays import RayBundle from nerfstudio.configs.base_config import InstantiateConfig -from nerfstudio.engine.optimizers import AdamOptimizerConfig, OptimizerConfig -from nerfstudio.engine.schedulers import ( - ExponentialDecaySchedulerConfig, - SchedulerConfig, -) from nerfstudio.utils import poses as pose_utils +from nerfstudio.engine.optimizers import OptimizerConfig +from nerfstudio.engine.schedulers import SchedulerConfig @dataclass @@ -47,21 +44,38 @@ class CameraOptimizerConfig(InstantiateConfig): mode: Literal["off", "SO3xR3", "SE3"] = "off" """Pose optimization strategy to use. If enabled, we recommend SO3xR3.""" - position_noise_std: float = 0.0 - """Noise to add to initial positions. Useful for debugging.""" + trans_l2_penalty: float = 1e-2 + """L2 penalty on translation parameters.""" - orientation_noise_std: float = 0.0 - """Noise to add to initial orientations. Useful for debugging.""" + rot_l2_penalty: float = 1e-3 + """L2 penalty on rotation parameters.""" - optimizer: OptimizerConfig = field(default_factory=lambda: AdamOptimizerConfig(lr=6e-4, eps=1e-15)) - """ADAM parameters for camera optimization.""" + optimizer: Optional[OptimizerConfig] = field(default=None) + """Deprecated, now specified inside the optimizers dict""" - scheduler: SchedulerConfig = field(default_factory=lambda: ExponentialDecaySchedulerConfig(max_steps=10000)) - """Learning rate scheduler for camera optimizer..""" + scheduler: Optional[SchedulerConfig] = field(default=None) + """Deprecated, now specified inside the optimizers dict""" - param_group: tyro.conf.Suppress[str] = "camera_opt" - """Name of the parameter group used for pose optimization. Can be any string that doesn't conflict with other - groups.""" + def __post_init__(self): + if self.optimizer is not None: + import warnings + from nerfstudio.utils.rich_utils import CONSOLE + + CONSOLE.print( + "\noptimizer is no longer specified in the CameraOptimizerConfig, it is now defined with the rest of the param groups inside the config file under the name 'camera_opt'\n", + style="bold yellow", + ) + warnings.warn("above message coming from", FutureWarning, stacklevel=3) + + if self.scheduler is not None: + import warnings + from nerfstudio.utils.rich_utils import CONSOLE + + CONSOLE.print( + "\nscheduler is no longer specified in the CameraOptimizerConfig, it is now defined with the rest of the param groups inside the config file under the name 'camera_opt'\n", + style="bold yellow", + ) + warnings.warn("above message coming from", FutureWarning, stacklevel=3) class CameraOptimizer(nn.Module): @@ -91,16 +105,6 @@ def __init__( else: assert_never(self.config.mode) - # Initialize pose noise; useful for debugging. - if config.position_noise_std != 0.0 or config.orientation_noise_std != 0.0: - assert config.position_noise_std >= 0.0 and config.orientation_noise_std >= 0.0 - std_vector = torch.tensor( - [config.position_noise_std] * 3 + [config.orientation_noise_std] * 3, device=device - ) - self.pose_noise = exp_map_SE3(torch.normal(torch.zeros((num_cameras, 6), device=device), std_vector)) - else: - self.pose_noise = None - def forward( self, indices: Int[Tensor, "camera_indices"], @@ -125,13 +129,46 @@ def forward( assert_never(self.config.mode) # Detach non-trainable indices by setting to identity transform if self.non_trainable_camera_indices is not None: - outputs[0][self.non_trainable_camera_indices] = torch.eye(4, device=self.device)[:3, :4] + if self.non_trainable_camera_indices.device != self.pose_adjustment.device: + self.non_trainable_camera_indices = self.non_trainable_camera_indices.to(self.pose_adjustment.device) + outputs[0][self.non_trainable_camera_indices] = torch.eye(4, device=self.pose_adjustment.device)[:3, :4] - # Apply initial pose noise. - if self.pose_noise is not None: - outputs.append(self.pose_noise[indices, :, :]) # Return: identity if no transforms are needed, otherwise multiply transforms together. if len(outputs) == 0: # Note that using repeat() instead of tile() here would result in unnecessary copies. return torch.eye(4, device=self.device)[None, :3, :4].tile(indices.shape[0], 1, 1) return functools.reduce(pose_utils.multiply, outputs) + + def apply_to_raybundle(self, raybundle: RayBundle) -> None: + """Apply the pose correction to the raybundle""" + if self.config.mode != "off": + correction_matrices = self(raybundle.camera_indices.squeeze()) # type: ignore + raybundle.origins = raybundle.origins + correction_matrices[:, :3, 3] + raybundle.directions = torch.bmm(correction_matrices[:, :3, :3], raybundle.directions[..., None]).squeeze() + + def get_loss_dict(self, loss_dict: dict) -> None: + """Add regularization""" + if self.config.mode != "off": + loss_dict["camera_opt_regularizer"] = ( + self.pose_adjustment[:, :3].norm(dim=-1).mean() * self.config.trans_l2_penalty + + self.pose_adjustment[:, 3:].norm(dim=-1).mean() * self.config.rot_l2_penalty + ) + + def get_correction_matrices(self): + """Get optimized pose correction matrices""" + return self(torch.arange(0, self.num_cameras).long()) + + def get_metrics_dict(self, metrics_dict: dict) -> None: + """Get camera optimizer metrics""" + if self.config.mode != "off": + metrics_dict["camera_opt_translation"] = self.pose_adjustment[:, :3].norm() + metrics_dict["camera_opt_rotation"] = self.pose_adjustment[:, 3:].norm() + + def get_param_groups(self, param_groups: dict) -> None: + """Get camera optimizer parameters""" + camera_opt_params = list(self.parameters()) + if self.config.mode != "off": + assert len(camera_opt_params) > 0 + param_groups["camera_opt"] = camera_opt_params + else: + assert len(camera_opt_params) == 0 diff --git a/nerfstudio/configs/method_configs.py b/nerfstudio/configs/method_configs.py index a92c042bf9..74cabbafcb 100644 --- a/nerfstudio/configs/method_configs.py +++ b/nerfstudio/configs/method_configs.py @@ -27,15 +27,23 @@ from nerfstudio.cameras.camera_optimizers import CameraOptimizerConfig from nerfstudio.configs.base_config import ViewerConfig from nerfstudio.configs.external_methods import get_external_methods - -from nerfstudio.data.datamanagers.random_cameras_datamanager import RandomCamerasDataManagerConfig -from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager, VanillaDataManagerConfig - +from nerfstudio.data.datamanagers.base_datamanager import ( + VanillaDataManager, + VanillaDataManagerConfig, +) +from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManagerConfig +from nerfstudio.data.datamanagers.random_cameras_datamanager import ( + RandomCamerasDataManagerConfig, +) from nerfstudio.data.dataparsers.blender_dataparser import BlenderDataParserConfig from nerfstudio.data.dataparsers.dnerf_dataparser import DNeRFDataParserConfig -from nerfstudio.data.dataparsers.instant_ngp_dataparser import InstantNGPDataParserConfig +from nerfstudio.data.dataparsers.instant_ngp_dataparser import ( + InstantNGPDataParserConfig, +) from nerfstudio.data.dataparsers.nerfstudio_dataparser import NerfstudioDataParserConfig -from nerfstudio.data.dataparsers.phototourism_dataparser import PhototourismDataParserConfig +from nerfstudio.data.dataparsers.phototourism_dataparser import ( + PhototourismDataParserConfig, +) from nerfstudio.data.dataparsers.sdfstudio_dataparser import SDFStudioDataParserConfig from nerfstudio.data.dataparsers.sitcoms3d_dataparser import Sitcoms3DDataParserConfig from nerfstudio.data.datasets.depth_dataset import DepthDataset @@ -88,17 +96,15 @@ max_num_iterations=30000, mixed_precision=True, pipeline=VanillaPipelineConfig( - datamanager=VanillaDataManagerConfig( + datamanager=ParallelDataManagerConfig( dataparser=NerfstudioDataParserConfig(), train_num_rays_per_batch=4096, eval_num_rays_per_batch=4096, - camera_optimizer=CameraOptimizerConfig( - mode="SO3xR3", - optimizer=AdamOptimizerConfig(lr=6e-4, eps=1e-8, weight_decay=1e-2), - scheduler=ExponentialDecaySchedulerConfig(lr_final=6e-6, max_steps=200000), - ), ), - model=NerfactoModelConfig(eval_num_rays_per_chunk=1 << 15), + model=NerfactoModelConfig( + eval_num_rays_per_chunk=1 << 15, + camera_optimizer=CameraOptimizerConfig(mode="SO3xR3"), + ), ), optimizers={ "proposal_networks": { @@ -109,10 +115,15 @@ "optimizer": AdamOptimizerConfig(lr=1e-2, eps=1e-15), "scheduler": ExponentialDecaySchedulerConfig(lr_final=0.0001, max_steps=200000), }, + "camera_opt": { + "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15), + "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=5000), + }, }, viewer=ViewerConfig(num_rays_per_chunk=1 << 15), vis="viewer", ) + method_configs["nerfacto-big"] = TrainerConfig( method_name="nerfacto", steps_per_eval_batch=500, @@ -120,14 +131,10 @@ max_num_iterations=100000, mixed_precision=True, pipeline=VanillaPipelineConfig( - datamanager=VanillaDataManagerConfig( + datamanager=ParallelDataManagerConfig( dataparser=NerfstudioDataParserConfig(), train_num_rays_per_batch=8192, eval_num_rays_per_batch=4096, - camera_optimizer=CameraOptimizerConfig( - mode="SO3xR3", - optimizer=RAdamOptimizerConfig(lr=6e-4, eps=1e-8, weight_decay=1e-3), - ), ), model=NerfactoModelConfig( eval_num_rays_per_chunk=1 << 15, @@ -139,6 +146,7 @@ max_res=4096, proposal_weights_anneal_max_num_iters=5000, log2_hashmap_size=21, + camera_optimizer=CameraOptimizerConfig(mode="SO3xR3"), ), ), optimizers={ @@ -150,10 +158,15 @@ "optimizer": RAdamOptimizerConfig(lr=1e-2, eps=1e-15), "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=50000), }, + "camera_opt": { + "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15), + "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=5000), + }, }, viewer=ViewerConfig(num_rays_per_chunk=1 << 15), vis="viewer", ) + method_configs["nerfacto-huge"] = TrainerConfig( method_name="nerfacto", steps_per_eval_batch=500, @@ -161,15 +174,10 @@ max_num_iterations=100000, mixed_precision=True, pipeline=VanillaPipelineConfig( - datamanager=VanillaDataManagerConfig( + datamanager=ParallelDataManagerConfig( dataparser=NerfstudioDataParserConfig(), train_num_rays_per_batch=16384, eval_num_rays_per_batch=4096, - camera_optimizer=CameraOptimizerConfig( - mode="SO3xR3", - optimizer=RAdamOptimizerConfig(lr=6e-4, eps=1e-8, weight_decay=1e-3), - scheduler=ExponentialDecaySchedulerConfig(lr_final=6e-5, max_steps=50000), - ), ), model=NerfactoModelConfig( eval_num_rays_per_chunk=1 << 15, @@ -185,6 +193,7 @@ max_res=8192, proposal_weights_anneal_max_num_iters=5000, log2_hashmap_size=21, + camera_optimizer=CameraOptimizerConfig(mode="SO3xR3"), ), ), optimizers={ @@ -196,6 +205,10 @@ "optimizer": RAdamOptimizerConfig(lr=1e-2, eps=1e-15), "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=50000), }, + "camera_opt": { + "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15), + "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=5000), + }, }, viewer=ViewerConfig(num_rays_per_chunk=1 << 15), vis="viewer", @@ -214,11 +227,11 @@ dataparser=NerfstudioDataParserConfig(), train_num_rays_per_batch=4096, eval_num_rays_per_batch=4096, - camera_optimizer=CameraOptimizerConfig( - mode="SO3xR3", optimizer=AdamOptimizerConfig(lr=6e-4, eps=1e-8, weight_decay=1e-2) - ), ), - model=DepthNerfactoModelConfig(eval_num_rays_per_chunk=1 << 15), + model=DepthNerfactoModelConfig( + eval_num_rays_per_chunk=1 << 15, + camera_optimizer=CameraOptimizerConfig(mode="SO3xR3"), + ), ), optimizers={ "proposal_networks": { @@ -229,6 +242,10 @@ "optimizer": AdamOptimizerConfig(lr=1e-2, eps=1e-15), "scheduler": None, }, + "camera_opt": { + "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15), + "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=5000), + }, }, viewer=ViewerConfig(num_rays_per_chunk=1 << 15), vis="viewer", @@ -258,7 +275,6 @@ vis="viewer", ) - method_configs["instant-ngp-bounded"] = TrainerConfig( method_name="instant-ngp-bounded", steps_per_eval_batch=500, @@ -286,12 +302,12 @@ viewer=ViewerConfig(num_rays_per_chunk=1 << 12), vis="viewer", ) - - +# +# method_configs["mipnerf"] = TrainerConfig( method_name="mipnerf", pipeline=VanillaPipelineConfig( - datamanager=VanillaDataManagerConfig(dataparser=NerfstudioDataParserConfig(), train_num_rays_per_batch=1024), + datamanager=ParallelDataManagerConfig(dataparser=NerfstudioDataParserConfig(), train_num_rays_per_batch=1024), model=VanillaModelConfig( _target=MipNerfModel, loss_coefficients={"rgb_loss_coarse": 0.1, "rgb_loss_fine": 1.0}, @@ -364,13 +380,14 @@ max_num_iterations=30000, mixed_precision=False, pipeline=VanillaPipelineConfig( - datamanager=VanillaDataManagerConfig( + datamanager=ParallelDataManagerConfig( dataparser=BlenderDataParserConfig(), train_num_rays_per_batch=4096, eval_num_rays_per_batch=4096, ), model=TensoRFModelConfig( regularization="tv", + camera_optimizer=CameraOptimizerConfig(mode="off"), ), ), optimizers={ @@ -382,6 +399,10 @@ "optimizer": AdamOptimizerConfig(lr=0.02), "scheduler": ExponentialDecaySchedulerConfig(lr_final=0.002, max_steps=30000), }, + "camera_opt": { + "optimizer": AdamOptimizerConfig(lr=1e-4, eps=1e-15), + "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-5, max_steps=5000), + }, }, viewer=ViewerConfig(num_rays_per_chunk=1 << 15), vis="viewer", @@ -420,16 +441,16 @@ dataparser=PhototourismDataParserConfig(), # NOTE: one of the only differences with nerfacto train_num_rays_per_batch=4096, eval_num_rays_per_batch=4096, - camera_optimizer=CameraOptimizerConfig( - mode="SO3xR3", optimizer=AdamOptimizerConfig(lr=6e-4, eps=1e-8, weight_decay=1e-2) - ), # Large dataset, so using prior values from VariableResDataManager. train_num_images_to_sample_from=40, train_num_times_to_repeat_images=100, eval_num_images_to_sample_from=40, eval_num_times_to_repeat_images=100, ), - model=NerfactoModelConfig(eval_num_rays_per_chunk=1 << 15), + model=NerfactoModelConfig( + eval_num_rays_per_chunk=1 << 15, + camera_optimizer=CameraOptimizerConfig(mode="SO3xR3"), + ), ), optimizers={ "proposal_networks": { @@ -440,6 +461,10 @@ "optimizer": AdamOptimizerConfig(lr=1e-2, eps=1e-15), "scheduler": None, }, + "camera_opt": { + "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15), + "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=5000), + }, }, viewer=ViewerConfig(num_rays_per_chunk=1 << 15), vis="viewer", @@ -504,9 +529,6 @@ dataparser=SDFStudioDataParserConfig(), train_num_rays_per_batch=1024, eval_num_rays_per_batch=1024, - camera_optimizer=CameraOptimizerConfig( - mode="off", optimizer=AdamOptimizerConfig(lr=6e-4, eps=1e-8, weight_decay=1e-2) - ), ), model=NeuSModelConfig(eval_num_rays_per_chunk=1024), ), @@ -538,9 +560,6 @@ dataparser=SDFStudioDataParserConfig(), train_num_rays_per_batch=2048, eval_num_rays_per_batch=2048, - camera_optimizer=CameraOptimizerConfig( - mode="SO3xR3", optimizer=AdamOptimizerConfig(lr=6e-4, eps=1e-8, weight_decay=1e-2) - ), ), model=NeuSFactoModelConfig( # proposal network allows for significantly smaller sdf/color network diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py index cb63202dff..0b937f64c6 100644 --- a/nerfstudio/data/datamanagers/base_datamanager.py +++ b/nerfstudio/data/datamanagers/base_datamanager.py @@ -113,8 +113,6 @@ class DataManagerConfig(InstantiateConfig): """Target class to instantiate.""" data: Optional[Path] = None """Source of data, may not be used by all models.""" - camera_optimizer: Optional[CameraOptimizerConfig] = None - """Specifies the camera pose optimizer used during training. Helpful if poses are noisy.""" masks_on_gpu: bool = False """Process masks on GPU for speed at the expense of memory, if True.""" images_on_gpu: bool = False @@ -335,9 +333,8 @@ class VanillaDataManagerConfig(DataManagerConfig): eval_num_times_to_repeat_images: int = -1 """When not evaluating on all images, number of iterations before picking new images. If -1, never pick new images.""" - camera_optimizer: CameraOptimizerConfig = CameraOptimizerConfig() - """Specifies the camera pose optimizer used during training. Helpful if poses are noisy, such as for data from - Record3D.""" + eval_image_indices: Optional[Tuple[int, ...]] = (0,) + """Specifies the image indices to use during eval; if None, uses all.""" collate_fn: Callable[[Any], Any] = cast(Any, staticmethod(nerfstudio_collate)) """Specifies the collate function to use for the train and eval dataloaders.""" camera_res_scale_factor: float = 1.0 @@ -346,9 +343,21 @@ class VanillaDataManagerConfig(DataManagerConfig): """ patch_size: int = 1 """Size of patch to sample from. If >1, patch-based sampling will be used.""" + camera_optimizer: Optional[CameraOptimizerConfig] = field(default=None) + """Deprecated, has been moved to the model config.""" pixel_sampler: PixelSamplerConfig = PixelSamplerConfig() """Specifies the pixel sampler used to sample pixels from images.""" + def __post_init__(self): + """Warn user of camera optimizer change.""" + if self.camera_optimizer is not None: + import warnings + + CONSOLE.print( + "\nCameraOptimizerConfig has been moved from the DataManager to the Model.\n", style="bold yellow" + ) + warnings.warn("above message coming from", FutureWarning, stacklevel=3) + TDataset = TypeVar("TDataset", bound=InputDataset, default=InputDataset) @@ -486,13 +495,7 @@ def setup_train(self): ) self.iter_train_image_dataloader = iter(self.train_image_dataloader) self.train_pixel_sampler = self._get_pixel_sampler(self.train_dataset, self.config.train_num_rays_per_batch) - self.train_camera_optimizer = self.config.camera_optimizer.setup( - num_cameras=self.train_dataset.cameras.size, device=self.device - ) - self.train_ray_generator = RayGenerator( - self.train_dataset.cameras.to(self.device), - self.train_camera_optimizer, - ) + self.train_ray_generator = RayGenerator(self.train_dataset.cameras.to(self.device)) def setup_eval(self): """Sets up the data loader for evaluation""" @@ -510,13 +513,7 @@ def setup_eval(self): ) self.iter_eval_image_dataloader = iter(self.eval_image_dataloader) self.eval_pixel_sampler = self._get_pixel_sampler(self.eval_dataset, self.config.eval_num_rays_per_batch) - self.eval_camera_optimizer = self.config.camera_optimizer.setup( - num_cameras=self.eval_dataset.cameras.size, device=self.device - ) - self.eval_ray_generator = RayGenerator( - self.eval_dataset.cameras.to(self.device), - self.eval_camera_optimizer, - ) + self.eval_ray_generator = RayGenerator(self.eval_dataset.cameras.to(self.device)) # for loading full images self.fixed_indices_eval_dataloader = FixedIndicesEvalDataloader( input_dataset=self.eval_dataset, @@ -572,13 +569,4 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]: Returns: A list of dictionaries containing the data manager's param groups. """ - param_groups = {} - - camera_opt_params = list(self.train_camera_optimizer.parameters()) - if self.config.camera_optimizer.mode != "off": - assert len(camera_opt_params) > 0 - param_groups[self.config.camera_optimizer.param_group] = camera_opt_params - else: - assert len(camera_opt_params) == 0 - - return param_groups + return {} diff --git a/nerfstudio/data/datamanagers/parallel_datamanager.py b/nerfstudio/data/datamanagers/parallel_datamanager.py new file mode 100644 index 0000000000..8d541d79ef --- /dev/null +++ b/nerfstudio/data/datamanagers/parallel_datamanager.py @@ -0,0 +1,321 @@ +# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Parallel data manager that generates training data in multiple python processes. +""" +from __future__ import annotations + +import concurrent.futures +import queue +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import ( + Dict, + Generic, + List, + Literal, + Optional, + Tuple, + Type, + Union, +) + +import torch +import torch.multiprocessing as mp +from rich.progress import track +from torch.nn import Parameter + +from nerfstudio.cameras.cameras import CameraType +from nerfstudio.cameras.rays import RayBundle +from nerfstudio.data.datamanagers.base_datamanager import ( + DataManager, + VanillaDataManagerConfig, + TDataset, + variable_res_collate, +) +from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs +from nerfstudio.data.pixel_samplers import ( + PixelSampler, + PixelSamplerConfig, + PatchPixelSamplerConfig, +) +from nerfstudio.data.utils.dataloaders import ( + CacheDataloader, + FixedIndicesEvalDataloader, + RandIndicesEvalDataloader, +) +from nerfstudio.model_components.ray_generators import RayGenerator +from nerfstudio.utils.rich_utils import CONSOLE + + +@dataclass +class ParallelDataManagerConfig(VanillaDataManagerConfig): + """Config for a `ParallelDataManager` which reads data in multiple processes""" + + _target: Type = field(default_factory=lambda: ParallelDataManager) + """Target class to instantiate.""" + num_processes: int = 1 + """Number of processes to use for train data loading. More than 1 doesn't result in that much better performance""" + queue_size: int = 2 + """Size of shared data queue containing generated ray bundles and batches. + If queue_size <= 0, the queue size is infinite.""" + max_thread_workers: Optional[int] = None + """Maximum number of threads to use in thread pool executor. If None, use ThreadPool default.""" + + +class DataProcessor(mp.Process): + """Parallel dataset batch processor. + + This class is responsible for generating ray bundles from an input dataset + in parallel python processes. + + Args: + out_queue: the output queue for storing the processed data + config: configuration object for the parallel data manager + dataparser_outputs: outputs from the dataparser + dataset: input dataset + pixel_sampler: The pixel sampler for sampling rays + """ + + def __init__( + self, + out_queue: mp.Queue, + config: ParallelDataManagerConfig, + dataparser_outputs: DataparserOutputs, + dataset: TDataset, + pixel_sampler: PixelSampler, + ): + super().__init__() + self.daemon = True + self.out_queue = out_queue + self.config = config + self.dataparser_outputs = dataparser_outputs + self.dataset = dataset + self.exclude_batch_keys_from_device = self.dataset.exclude_batch_keys_from_device + self.pixel_sampler = pixel_sampler + self.ray_generator = RayGenerator(self.dataset.cameras) + self.cache_images() + + def run(self): + """Append out queue in parallel with ray bundles and batches.""" + while True: + batch = self.pixel_sampler.sample(self.img_data) + ray_indices = batch["indices"] + ray_bundle: RayBundle = self.ray_generator(ray_indices) + # check that GPUs are available + if torch.cuda.is_available(): + ray_bundle = ray_bundle.pin_memory() + while True: + try: + self.out_queue.put_nowait((ray_bundle, batch)) + break + except queue.Full: + time.sleep(0.0001) + except Exception: + CONSOLE.print_exception() + CONSOLE.print("[bold red]Error: Error occured in parallel datamanager queue.") + + def cache_images(self): + """Caches all input images into a NxHxWx3 tensor.""" + indices = range(len(self.dataset)) + batch_list = [] + results = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=self.config.max_thread_workers) as executor: + for idx in indices: + res = executor.submit(self.dataset.__getitem__, idx) + results.append(res) + for res in track(results, description="Loading data batch", transient=False): + batch_list.append(res.result()) + self.img_data = self.config.collate_fn(batch_list) + + +class ParallelDataManager(DataManager, Generic[TDataset]): + """Data manager implementation for parallel dataloading. + + Args: + config: the DataManagerConfig used to instantiate class + """ + + def __init__( + self, + config: ParallelDataManagerConfig, + device: Union[torch.device, str] = "cpu", + test_mode: Literal["test", "val", "inference"] = "val", + world_size: int = 1, + local_rank: int = 0, + **kwargs, + ): + self.dataset_type: Type[TDataset] = kwargs.get("_dataset_type", getattr(TDataset, "__default__")) + self.config = config + self.device = device + self.world_size = world_size + self.local_rank = local_rank + self.test_mode = test_mode + self.test_split = "test" if test_mode in ["test", "inference"] else "val" + self.dataparser_config = self.config.dataparser + if self.config.data is not None: + self.config.dataparser.data = Path(self.config.data) + else: + self.config.data = self.config.dataparser.data + self.dataparser = self.dataparser_config.setup() + if test_mode == "inference": + self.dataparser.downscale_factor = 1 # Avoid opening images + self.includes_time = self.dataparser.includes_time + self.train_dataparser_outputs: DataparserOutputs = self.dataparser.get_dataparser_outputs(split="train") + self.eval_dataparser_outputs: DataparserOutputs = self.dataparser.get_dataparser_outputs(split=self.test_split) + cameras = self.train_dataparser_outputs.cameras + if len(cameras) > 1: + for i in range(1, len(cameras)): + if cameras[0].width != cameras[i].width or cameras[0].height != cameras[i].height: + CONSOLE.print("Variable resolution, using variable_res_collate") + self.config.collate_fn = variable_res_collate + break + self.train_dataset = self.create_train_dataset() + self.eval_dataset = self.create_eval_dataset() + self.exclude_batch_keys_from_device = self.train_dataset.exclude_batch_keys_from_device + # Spawn is critical for not freezing the program (PyTorch compatability issue) + # check if spawn is already set + if mp.get_start_method(allow_none=True) is None: + mp.set_start_method("spawn") + super().__init__() + + def create_train_dataset(self) -> TDataset: + """Sets up the data loaders for training.""" + return self.dataset_type( + dataparser_outputs=self.train_dataparser_outputs, + scale_factor=self.config.camera_res_scale_factor, + ) + + def create_eval_dataset(self) -> TDataset: + """Sets up the data loaders for evaluation.""" + return self.dataset_type( + dataparser_outputs=self.dataparser.get_dataparser_outputs(split=self.test_split), + scale_factor=self.config.camera_res_scale_factor, + ) + + def _get_pixel_sampler(self, dataset: TDataset, num_rays_per_batch: int) -> PixelSampler: + """Infer pixel sampler to use.""" + if self.config.patch_size > 1 and type(self.config.pixel_sampler) is PixelSamplerConfig: + return PatchPixelSamplerConfig().setup( + patch_size=self.config.patch_size, num_rays_per_batch=num_rays_per_batch + ) + is_equirectangular = (dataset.cameras.camera_type == CameraType.EQUIRECTANGULAR.value).all() + if is_equirectangular.any(): + CONSOLE.print("[bold yellow]Warning: Some cameras are equirectangular, but using default pixel sampler.") + return self.config.pixel_sampler.setup( + is_equirectangular=is_equirectangular, num_rays_per_batch=num_rays_per_batch + ) + + def setup_train(self): + """Sets up parallel python data processes for training.""" + assert self.train_dataset is not None + self.train_pix_sampler = self._get_pixel_sampler(self.train_dataset, self.config.train_num_rays_per_batch) # type: ignore + self.data_queue = mp.Manager().Queue(maxsize=self.config.queue_size) + self.data_procs = [ + DataProcessor( + out_queue=self.data_queue, # type: ignore + config=self.config, + dataparser_outputs=self.train_dataparser_outputs, + dataset=self.train_dataset, + pixel_sampler=self.train_pix_sampler, + ) + for i in range(self.config.num_processes) + ] + for proc in self.data_procs: + proc.start() + print("Started threads") + + # Prime the executor with the first batch + self.train_executor = concurrent.futures.ThreadPoolExecutor(max_workers=self.config.max_thread_workers) + self.train_batch_fut = self.train_executor.submit(self.data_queue.get) + + def setup_eval(self): + """Sets up the data loader for evaluation.""" + assert self.eval_dataset is not None + CONSOLE.print("Setting up evaluation dataset...") + self.eval_image_dataloader = CacheDataloader( + self.eval_dataset, + num_images_to_sample_from=self.config.eval_num_images_to_sample_from, + num_times_to_repeat_images=self.config.eval_num_times_to_repeat_images, + device=self.device, + num_workers=self.world_size * 4, + pin_memory=True, + collate_fn=self.config.collate_fn, + exclude_batch_keys_from_device=self.exclude_batch_keys_from_device, + ) + self.iter_eval_image_dataloader = iter(self.eval_image_dataloader) + self.eval_pixel_sampler = self._get_pixel_sampler(self.eval_dataset, self.config.eval_num_rays_per_batch) # type: ignore + self.eval_ray_generator = RayGenerator(self.eval_dataset.cameras.to(self.device)) + # for loading full images + self.fixed_indices_eval_dataloader = FixedIndicesEvalDataloader( + input_dataset=self.eval_dataset, + device=self.device, + num_workers=self.world_size * 4, + ) + self.eval_dataloader = RandIndicesEvalDataloader( + input_dataset=self.eval_dataset, + device=self.device, + num_workers=self.world_size * 4, + ) + + def next_train(self, step: int) -> Tuple[RayBundle, Dict]: + """Returns the next batch of data from the parallel training processes.""" + self.train_count += 1 + + # Fetch the next batch in an executor to parallelize the queue get() operation + # with the train step + bundle, batch = self.train_batch_fut.result() + self.train_batch_fut = self.train_executor.submit(self.data_queue.get) + ray_bundle = bundle.to(self.device) + return ray_bundle, batch + + def next_eval(self, step: int) -> Tuple[RayBundle, Dict]: + """Returns the next batch of data from the eval dataloader.""" + self.eval_count += 1 + image_batch = next(self.iter_eval_image_dataloader) + assert self.eval_pixel_sampler is not None + assert isinstance(image_batch, dict) + batch = self.eval_pixel_sampler.sample(image_batch) + ray_indices = batch["indices"] + ray_bundle = self.eval_ray_generator(ray_indices) + return ray_bundle, batch + + def next_eval_image(self, step: int) -> Tuple[int, RayBundle, Dict]: + """Retrieve the next eval image.""" + for camera_ray_bundle, batch in self.eval_dataloader: + assert camera_ray_bundle.camera_indices is not None + image_idx = int(camera_ray_bundle.camera_indices[0, 0, 0]) + return image_idx, camera_ray_bundle, batch + raise ValueError("No more eval images") + + def get_train_rays_per_batch(self) -> int: + """Returns the number of rays per batch for training.""" + return self.config.train_num_rays_per_batch + + def get_eval_rays_per_batch(self) -> int: + """Returns the number of rays per batch for evaluation.""" + return self.config.eval_num_rays_per_batch + + def get_datapath(self) -> Path: + """Returns the path to the data. This is used to determine where to save camera paths.""" + return self.config.dataparser.data + + def get_param_groups(self) -> Dict[str, List[Parameter]]: + """Get the param groups for the data manager. + Returns: + A list of dictionaries containing the data manager's param groups. + """ + return {} diff --git a/nerfstudio/data/pixel_samplers.py b/nerfstudio/data/pixel_samplers.py index d7e76806d8..9234a5d420 100644 --- a/nerfstudio/data/pixel_samplers.py +++ b/nerfstudio/data/pixel_samplers.py @@ -98,7 +98,7 @@ def sample_method( chosen_indices = random.sample(range(len(nonzero_indices)), k=batch_size) indices = nonzero_indices[chosen_indices] else: - indices = torch.floor( + indices = ( torch.rand((batch_size, 3), device=device) * torch.tensor([num_images, image_height, image_width], device=device) ).long() @@ -172,13 +172,11 @@ def collate_image_dataset_batch(self, batch: Dict, num_rays_per_batch: int, keep collated_batch = { key: value[c, y, x] for key, value in batch.items() if key != "image_idx" and value is not None } - assert collated_batch["image"].shape[0] == num_rays_per_batch # Needed to correct the random indices to their actual camera idx locations. indices[:, 0] = batch["image_idx"][c] collated_batch["indices"] = indices # with the abs camera indices - if keep_full_image: collated_batch["full_image"] = batch["image"] diff --git a/nerfstudio/engine/optimizers.py b/nerfstudio/engine/optimizers.py index b80b77bcb5..9b8d48dba3 100644 --- a/nerfstudio/engine/optimizers.py +++ b/nerfstudio/engine/optimizers.py @@ -84,6 +84,25 @@ def __init__(self, config: Dict[str, Any], param_groups: Dict[str, List[Paramete self.schedulers = {} self.parameters = {} for param_group_name, params in param_groups.items(): + # For deprecation, catch the camera_opt param group and fix it nicely + if param_group_name == "camera_opt" and "camera_opt" not in config: + from nerfstudio.engine.schedulers import ExponentialDecaySchedulerConfig + from nerfstudio.utils.rich_utils import CONSOLE + + CONSOLE.print( + "\nThe 'camera_opt' param group should be assigned an optimizer in the config. Assigning default optimizers for now. This will be removed in a future release.\n", + style="bold yellow", + ) + + config["camera_opt"] = { + "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15), + "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=30000), + } + # Print some nice warning messages if the user forgot to specify an optimizer + if param_group_name not in config: + raise RuntimeError( + f"""Optimizer config for '{param_group_name}' not found in config file. Make sure you specify an optimizer for each parameter group. Provided configs were: {config.keys()}""" + ) lr_init = config[param_group_name]["optimizer"].lr self.optimizers[param_group_name] = config[param_group_name]["optimizer"].setup(params=params) self.parameters[param_group_name] = params diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py index b4ccba9eea..fe4aa85cea 100644 --- a/nerfstudio/engine/trainer.py +++ b/nerfstudio/engine/trainer.py @@ -219,13 +219,6 @@ def setup_optimizers(self) -> Optimizers: """ optimizer_config = self.config.optimizers.copy() param_groups = self.pipeline.get_param_groups() - camera_optimizer_config = self.config.pipeline.datamanager.camera_optimizer - if camera_optimizer_config is not None and camera_optimizer_config.mode != "off": - assert camera_optimizer_config.param_group not in optimizer_config - optimizer_config[camera_optimizer_config.param_group] = { - "optimizer": camera_optimizer_config.optimizer, - "scheduler": camera_optimizer_config.scheduler, - } return Optimizers(optimizer_config, param_groups) def train(self) -> None: diff --git a/nerfstudio/fields/nerfacto_field.py b/nerfstudio/fields/nerfacto_field.py index 34c6346fcf..f215a00458 100644 --- a/nerfstudio/fields/nerfacto_field.py +++ b/nerfstudio/fields/nerfacto_field.py @@ -116,6 +116,7 @@ def __init__( self.use_pred_normals = use_pred_normals self.pass_semantic_gradients = pass_semantic_gradients self.base_res = base_res + self.step = 0 self.direction_encoding = SHEncoding( levels=4, diff --git a/nerfstudio/model_components/ray_generators.py b/nerfstudio/model_components/ray_generators.py index 49754548dc..bad32258d0 100644 --- a/nerfstudio/model_components/ray_generators.py +++ b/nerfstudio/model_components/ray_generators.py @@ -18,7 +18,6 @@ from jaxtyping import Int from torch import Tensor, nn -from nerfstudio.cameras.camera_optimizers import CameraOptimizer from nerfstudio.cameras.cameras import Cameras from nerfstudio.cameras.rays import RayBundle @@ -34,10 +33,9 @@ class RayGenerator(nn.Module): image_coords: Tensor - def __init__(self, cameras: Cameras, pose_optimizer: CameraOptimizer) -> None: + def __init__(self, cameras: Cameras) -> None: super().__init__() self.cameras = cameras - self.pose_optimizer = pose_optimizer self.register_buffer("image_coords", cameras.get_image_coords(), persistent=False) def forward(self, ray_indices: Int[Tensor, "num_rays 3"]) -> RayBundle: @@ -51,11 +49,8 @@ def forward(self, ray_indices: Int[Tensor, "num_rays 3"]) -> RayBundle: x = ray_indices[:, 2] # col indices coords = self.image_coords[y, x] - camera_opt_to_camera = self.pose_optimizer(c) - ray_bundle = self.cameras.generate_rays( camera_indices=c.unsqueeze(-1), coords=coords, - camera_opt_to_camera=camera_opt_to_camera, ) return ray_bundle diff --git a/nerfstudio/models/nerfacto.py b/nerfstudio/models/nerfacto.py index 1122d5c203..df8eed9b65 100644 --- a/nerfstudio/models/nerfacto.py +++ b/nerfstudio/models/nerfacto.py @@ -28,6 +28,7 @@ from torchmetrics.image import PeakSignalNoiseRatio from torchmetrics.image.lpip import LearnedPerceptualImagePatchSimilarity +from nerfstudio.cameras.camera_optimizers import CameraOptimizer, CameraOptimizerConfig from nerfstudio.cameras.rays import RayBundle, RaySamples from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes, TrainingCallbackLocation from nerfstudio.field_components.field_heads import FieldHeadNames @@ -126,6 +127,8 @@ class NerfactoModelConfig(ModelConfig): """Which implementation to use for the model.""" appearance_embed_dim: int = 32 """Dimension of the appearance embedding.""" + camera_optimizer: CameraOptimizerConfig = CameraOptimizerConfig(mode="SO3xR3") + """Config of the camera optimizer to use""" class NerfactoModel(Model): @@ -165,6 +168,9 @@ def populate_modules(self): implementation=self.config.implementation, ) + self.camera_optimizer: CameraOptimizer = self.config.camera_optimizer.setup( + num_cameras=self.num_train_data, device="cpu" + ) self.density_fns = [] num_prop_nets = self.config.num_proposal_iterations # Build the proposal network(s) @@ -229,7 +235,7 @@ def update_schedule(step): # losses self.rgb_loss = MSELoss() - + self.step = 0 # metrics self.psnr = PeakSignalNoiseRatio(data_range=1.0) self.ssim = structural_similarity_index_measure @@ -240,6 +246,7 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]: param_groups = {} param_groups["proposal_networks"] = list(self.proposal_networks.parameters()) param_groups["fields"] = list(self.field.parameters()) + self.camera_optimizer.get_param_groups(param_groups=param_groups) return param_groups def get_training_callbacks( @@ -252,6 +259,7 @@ def get_training_callbacks( def set_anneal(step): # https://arxiv.org/pdf/2111.12077.pdf eq. 18 + self.step = step train_frac = np.clip(step / N, 0, 1) self.step = step @@ -278,6 +286,9 @@ def bias(x, b): return callbacks def get_outputs(self, ray_bundle: RayBundle): + # apply the camera optimizer pose tweaks + if self.training: + self.camera_optimizer.apply_to_raybundle(ray_bundle) ray_samples: RaySamples ray_samples, weights_list, ray_samples_list = self.proposal_sampler(ray_bundle, density_fns=self.density_fns) field_outputs = self.field.forward(ray_samples, compute_normals=self.config.predict_normals) @@ -324,7 +335,6 @@ def get_outputs(self, ray_bundle: RayBundle): for i in range(self.config.num_proposal_iterations): outputs[f"prop_depth_{i}"] = self.renderer_depth(weights=weights_list[i], ray_samples=ray_samples_list[i]) - return outputs def get_metrics_dict(self, outputs, batch): @@ -336,6 +346,8 @@ def get_metrics_dict(self, outputs, batch): if self.training: metrics_dict["distortion"] = distortion_loss(outputs["weights_list"], outputs["ray_samples_list"]) + + self.camera_optimizer.get_metrics_dict(metrics_dict) return metrics_dict def get_loss_dict(self, outputs, batch, metrics_dict=None): @@ -364,6 +376,8 @@ def get_loss_dict(self, outputs, batch, metrics_dict=None): loss_dict["pred_normal_loss"] = self.config.pred_normal_loss_mult * torch.mean( outputs["rendered_pred_normal_loss"] ) + # Add loss from camera optimizer + self.camera_optimizer.get_loss_dict(loss_dict) return loss_dict def get_image_metrics_and_images( diff --git a/nerfstudio/models/tensorf.py b/nerfstudio/models/tensorf.py index a0380d42e4..9cc9c7ccf4 100644 --- a/nerfstudio/models/tensorf.py +++ b/nerfstudio/models/tensorf.py @@ -53,6 +53,7 @@ from nerfstudio.model_components.scene_colliders import AABBBoxCollider from nerfstudio.models.base_model import Model, ModelConfig from nerfstudio.utils import colormaps, colors, misc +from nerfstudio.cameras.camera_optimizers import CameraOptimizer, CameraOptimizerConfig @dataclass @@ -89,6 +90,8 @@ class TensoRFModelConfig(ModelConfig): tensorf_encoding: Literal["triplane", "vm", "cp"] = "vm" regularization: Literal["none", "l1", "tv"] = "l1" """Regularization method used in tensorf paper""" + camera_optimizer: CameraOptimizerConfig = CameraOptimizerConfig(mode="SO3xR3") + """Config of the camera optimizer to use""" background_color: Literal["random", "last_sample", "black", "white"] = "white" """Whether to randomize the background color.""" @@ -256,6 +259,11 @@ def populate_modules(self): if self.config.tensorf_encoding == "cp" and self.config.regularization == "tv": raise RuntimeError("TV reg not supported for CP decomposition") + # (optional) camera optimizer + self.camera_optimizer: CameraOptimizer = self.config.camera_optimizer.setup( + num_cameras=self.num_train_data, device="cpu" + ) + def get_param_groups(self) -> Dict[str, List[Parameter]]: param_groups = {} @@ -267,11 +275,14 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]: param_groups["encodings"] = list(self.field.color_encoding.parameters()) + list( self.field.density_encoding.parameters() ) + self.camera_optimizer.get_param_groups(param_groups=param_groups) return param_groups def get_outputs(self, ray_bundle: RayBundle): # uniform sampling + if self.training: + self.camera_optimizer.apply_to_raybundle(ray_bundle) ray_samples_uniform = self.sampler_uniform(ray_bundle) dens = self.field.get_density(ray_samples_uniform) weights = ray_samples_uniform.get_weights(dens) @@ -334,6 +345,8 @@ def get_loss_dict(self, outputs, batch, metrics_dict=None) -> Dict[str, torch.Te else: raise ValueError(f"Regularization {self.config.regularization} not supported") + self.camera_optimizer.get_loss_dict(loss_dict) + loss_dict = misc.scale_dict(loss_dict, self.config.loss_coefficients) return loss_dict @@ -367,5 +380,7 @@ def get_image_metrics_and_images( "ssim": float(ssim.item()), "lpips": float(lpips.item()), } + self.camera_optimizer.get_metrics_dict(metrics_dict) + images_dict = {"img": combined_rgb, "accumulation": acc, "depth": depth} return metrics_dict, images_dict diff --git a/nerfstudio/pipelines/base_pipeline.py b/nerfstudio/pipelines/base_pipeline.py index 5e202b06c2..10aca0e70a 100644 --- a/nerfstudio/pipelines/base_pipeline.py +++ b/nerfstudio/pipelines/base_pipeline.py @@ -45,6 +45,7 @@ DataManagerConfig, VanillaDataManager, ) +from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManager from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes from nerfstudio.models.base_model import Model, ModelConfig from nerfstudio.utils import profiler @@ -298,18 +299,6 @@ def get_train_loss_dict(self, step: int): ray_bundle, batch = self.datamanager.next_train(step) model_outputs = self._model(ray_bundle) # train distributed data parallel model if world_size > 1 metrics_dict = self.model.get_metrics_dict(model_outputs, batch) - - if self.config.datamanager.camera_optimizer is not None: - camera_opt_param_group = self.config.datamanager.camera_optimizer.param_group - if camera_opt_param_group in self.datamanager.get_param_groups(): - # Report the camera optimization metrics - metrics_dict["camera_opt_translation"] = ( - self.datamanager.get_param_groups()[camera_opt_param_group][0].data[:, :3].norm() - ) - metrics_dict["camera_opt_rotation"] = ( - self.datamanager.get_param_groups()[camera_opt_param_group][0].data[:, 3:].norm() - ) - loss_dict = self.model.get_loss_dict(model_outputs, batch, metrics_dict) return model_outputs, loss_dict, metrics_dict @@ -372,7 +361,7 @@ def get_average_eval_image_metrics( """ self.eval() metrics_dict_list = [] - assert isinstance(self.datamanager, VanillaDataManager) + assert isinstance(self.datamanager, (VanillaDataManager, ParallelDataManager)) num_images = len(self.datamanager.fixed_indices_eval_dataloader) with Progress( TextColumn("[progress.description]{task.description}"), diff --git a/nerfstudio/utils/tensor_dataclass.py b/nerfstudio/utils/tensor_dataclass.py index 0ebe7b5ced..a2b8d1dadb 100644 --- a/nerfstudio/utils/tensor_dataclass.py +++ b/nerfstudio/utils/tensor_dataclass.py @@ -261,6 +261,14 @@ def to(self: TensorDataclassT, device) -> TensorDataclassT: """ return self._apply_fn_to_fields(lambda x: x.to(device)) + def pin_memory(self: TensorDataclassT) -> TensorDataclassT: + """Pins the tensor dataclass memory + + Returns: + TensorDataclass: A new TensorDataclass with the same data but pinned. + """ + return self._apply_fn_to_fields(lambda x: x.pin_memory()) + def _apply_fn_to_fields( self: TensorDataclassT, fn: Callable, From dba20fcb26d9256c4267b578f39b3614a6ffe669 Mon Sep 17 00:00:00 2001 From: Justin Kerr Date: Tue, 10 Oct 2023 17:18:16 -0700 Subject: [PATCH 037/101] minor patch for exporting pcs and meshes with parallel datamanager (#2503) * minor patch for exporting pcs and meshes with parallel datamanager * format --- nerfstudio/data/datamanagers/parallel_datamanager.py | 11 +++++++++-- nerfstudio/scripts/exporter.py | 5 +++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/nerfstudio/data/datamanagers/parallel_datamanager.py b/nerfstudio/data/datamanagers/parallel_datamanager.py index 8d541d79ef..7a2e5ac559 100644 --- a/nerfstudio/data/datamanagers/parallel_datamanager.py +++ b/nerfstudio/data/datamanagers/parallel_datamanager.py @@ -222,7 +222,7 @@ def _get_pixel_sampler(self, dataset: TDataset, num_rays_per_batch: int) -> Pixe def setup_train(self): """Sets up parallel python data processes for training.""" assert self.train_dataset is not None - self.train_pix_sampler = self._get_pixel_sampler(self.train_dataset, self.config.train_num_rays_per_batch) # type: ignore + self.train_pixel_sampler = self._get_pixel_sampler(self.train_dataset, self.config.train_num_rays_per_batch) # type: ignore self.data_queue = mp.Manager().Queue(maxsize=self.config.queue_size) self.data_procs = [ DataProcessor( @@ -230,7 +230,7 @@ def setup_train(self): config=self.config, dataparser_outputs=self.train_dataparser_outputs, dataset=self.train_dataset, - pixel_sampler=self.train_pix_sampler, + pixel_sampler=self.train_pixel_sampler, ) for i in range(self.config.num_processes) ] @@ -319,3 +319,10 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]: A list of dictionaries containing the data manager's param groups. """ return {} + + def __del__(self): + """Clean up the parallel data processes.""" + if hasattr(self, "data_procs"): + for proc in self.data_procs: + proc.terminate() + proc.join() diff --git a/nerfstudio/scripts/exporter.py b/nerfstudio/scripts/exporter.py index 2e4fc68cac..b104597abd 100644 --- a/nerfstudio/scripts/exporter.py +++ b/nerfstudio/scripts/exporter.py @@ -34,6 +34,7 @@ from nerfstudio.cameras.rays import RayBundle from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager +from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManager from nerfstudio.data.scene_box import OrientedBox from nerfstudio.exporter import texture_utils, tsdf_utils from nerfstudio.exporter.exporter_utils import ( @@ -137,7 +138,7 @@ def main(self) -> None: validate_pipeline(self.normal_method, self.normal_output_name, pipeline) # Increase the batchsize to speed up the evaluation. - assert isinstance(pipeline.datamanager, VanillaDataManager) + assert isinstance(pipeline.datamanager, (VanillaDataManager, ParallelDataManager)) assert pipeline.datamanager.train_pixel_sampler is not None pipeline.datamanager.train_pixel_sampler.num_rays_per_batch = self.num_rays_per_batch @@ -307,7 +308,7 @@ def main(self) -> None: validate_pipeline(self.normal_method, self.normal_output_name, pipeline) # Increase the batchsize to speed up the evaluation. - assert isinstance(pipeline.datamanager, VanillaDataManager) + assert isinstance(pipeline.datamanager, (VanillaDataManager, ParallelDataManager)) assert pipeline.datamanager.train_pixel_sampler is not None pipeline.datamanager.train_pixel_sampler.num_rays_per_batch = self.num_rays_per_batch From 8e2a0651d0dc9bb9d6de5d4a934d0a195ad5032d Mon Sep 17 00:00:00 2001 From: Justin Kerr Date: Tue, 10 Oct 2023 17:49:52 -0700 Subject: [PATCH 038/101] Fix jitter viewer (#2505) --- nerfstudio/viewer_beta/control_panel.py | 6 +----- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/nerfstudio/viewer_beta/control_panel.py b/nerfstudio/viewer_beta/control_panel.py index 07f556dd46..7161816941 100644 --- a/nerfstudio/viewer_beta/control_panel.py +++ b/nerfstudio/viewer_beta/control_panel.py @@ -269,11 +269,7 @@ def update_step(self, step): Args: step: the train step to set the model to """ - with self.viser_server.atomic(), self.stat_folder: - # TODO change to a .value call instead of remove() and add, this makes it jittery - with self.viser_server.atomic(): - self.markdown.remove() - self.markdown = self.viser_server.add_gui_markdown(f"Step: {step}") + self.markdown.content = f"Step: {step}" def update_output_options(self, new_options: List[str]): """ diff --git a/pyproject.toml b/pyproject.toml index c041858ba3..fc2b0be03a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ dependencies = [ "torchvision>=0.14.1", "torchmetrics[image]>=1.0.1", "typing_extensions>=4.4.0", - "viser==0.1.3", + "viser==0.1.6", "nuscenes-devkit>=1.1.1", "wandb>=0.13.3", "xatlas", From 42cabedc600571c6b0095bd967b801c21a2f48d8 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Thu, 12 Oct 2023 23:08:45 -0400 Subject: [PATCH 039/101] Add ODM data processor (#2517) * Add ODM data processor * Fix ruff warnings * Re-format with black * Add annotations * Add explicit dtype * Add explicit dtype * Revert "Add explicit dtype" This reverts commit 6f3cde1671e920f4f38b390c808dcae69ab86a10. --- README.md | 1 + docs/quickstart/custom_dataset.md | 29 ++++++ nerfstudio/process_data/odm_utils.py | 148 +++++++++++++++++++++++++++ nerfstudio/scripts/process_data.py | 89 ++++++++++++++++ 4 files changed, 267 insertions(+) create mode 100644 nerfstudio/process_data/odm_utils.py diff --git a/README.md b/README.md index a853192ad4..3c7e827b17 100644 --- a/README.md +++ b/README.md @@ -250,6 +250,7 @@ Using an existing dataset is great, but likely you want to use your own data! We | πŸ“± [Record3D](https://docs.nerf.studio/quickstart/custom_dataset.html#record3d-capture) | IOS with LiDAR | [Record3D app](https://record3d.app/) | πŸ‡ | | πŸ–₯ [Metashape](https://docs.nerf.studio/quickstart/custom_dataset.html#metashape) | Any | [Metashape](https://www.agisoft.com/) | πŸ‡ | | πŸ–₯ [RealityCapture](https://docs.nerf.studio/quickstart/custom_dataset.html#realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | πŸ‡ | +| πŸ–₯ [ODM](https://docs.nerf.studio/quickstart/custom_dataset.html#ODM) | Any | [ODM](https://github.com/OpenDroneMap/ODM) | πŸ‡ | | πŸ›  [Custom](https://docs.nerf.studio/quickstart/data_conventions.html) | Any | Camera Poses | πŸ‡ | ## 5. Advanced Options diff --git a/docs/quickstart/custom_dataset.md b/docs/quickstart/custom_dataset.md index 723a97c91e..14a89e4a2d 100644 --- a/docs/quickstart/custom_dataset.md +++ b/docs/quickstart/custom_dataset.md @@ -21,6 +21,7 @@ We Currently support the following custom data types: | πŸ“± [Record3D](record3d) | IOS with LiDAR | [Record3D app](https://record3d.app/) | πŸ‡ | | πŸ–₯ [Metashape](metashape) | Any | [Metashape](https://www.agisoft.com/) | πŸ‡ | | πŸ–₯ [RealityCapture](realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | πŸ‡ | +| πŸ–₯ [ODM](odm) | Any | [ODM](https://github.com/OpenDroneMap/ODM) | πŸ‡ | (images_and_video)= @@ -319,6 +320,34 @@ ns-process-data realitycapture --data {data directory} --csv {csv file} --output ns-train nerfacto --data {output directory} ``` +(odm)= + +## ODM + +All images/videos must be captured with the same camera. + +1. Process a dataset using [ODM](https://github.com/OpenDroneMap/ODM#quickstart) + +```bash +$ ls /path/to/dataset +images +odm_report +odm_orthophoto +... +``` + +2. Convert to nerfstudio format. + +```bash +ns-process-data odm --data /path/to/dataset --output-dir {output directory} +``` + +4. Train! + +```bash +ns-train nerfacto --data {output directory} +``` + (360_data)= ## 360 Data (Equirectangular) diff --git a/nerfstudio/process_data/odm_utils.py b/nerfstudio/process_data/odm_utils.py new file mode 100644 index 0000000000..aa9b695410 --- /dev/null +++ b/nerfstudio/process_data/odm_utils.py @@ -0,0 +1,148 @@ +# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Helper utils for processing ODM data into the nerfstudio format.""" + +import json +from pathlib import Path +from typing import Dict, List +import os +import sys +import math + +import numpy as np + +from nerfstudio.process_data.process_data_utils import CAMERA_MODELS + + +def rodrigues_vec_to_rotation_mat(rodrigues_vec: np.ndarray) -> np.ndarray: + theta = np.linalg.norm(rodrigues_vec) + if theta < sys.float_info.epsilon: + rotation_mat = np.eye(3, dtype=float) + else: + r = rodrigues_vec / theta + ident = np.eye(3, dtype=float) + r_rT = np.array( + [ + [r[0] * r[0], r[0] * r[1], r[0] * r[2]], + [r[1] * r[0], r[1] * r[1], r[1] * r[2]], + [r[2] * r[0], r[2] * r[1], r[2] * r[2]], + ] + ) + r_cross = np.array([[0, -r[2], r[1]], [r[2], 0, -r[0]], [-r[1], r[0], 0]], dtype=float) + rotation_mat = math.cos(theta) * ident + (1 - math.cos(theta)) * r_rT + math.sin(theta) * r_cross + return rotation_mat + + +def cameras2nerfds( + image_filename_map: Dict[str, Path], + cameras_file: Path, + shots_file: Path, + output_dir: Path, + verbose: bool = False, +) -> List[str]: + """Convert ODM cameras into a nerfstudio dataset. + + Args: + image_filename_map: Mapping of original image filenames to their saved locations. + shots_file: Path to ODM's shots.geojson + output_dir: Path to the output directory. + verbose: Whether to print verbose output. + + Returns: + Summary of the conversion. + """ + + with open(cameras_file, "r", encoding="utf-8") as f: + cameras = json.loads(f.read()) + with open(shots_file, "r", encoding="utf-8") as f: + shots = json.loads(f.read()) + + camera_ids = list(cameras.keys()) + if len(camera_ids) > 1: + raise ValueError("Only one camera is supported") + camera_id = camera_ids[0] + camera = cameras[camera_id] + data = {} + if camera["projection_type"] in ["brown", "perspective"]: + data["camera_model"] = CAMERA_MODELS["perspective"].value + elif camera["projection_type"] in ["fisheye", "fisheye_opencv"]: + data["camera_model"] = CAMERA_MODELS["fisheye"].value + elif camera["projection_type"] in ["spherical", "equirectangular"]: + data["camera_model"] = CAMERA_MODELS["equirectangular"].value + else: + raise ValueError("Unsupported ODM camera model: " + data["camera_model"]) + + sensor_dict = {} + s = {"w": int(camera["width"]), "h": int(camera["height"])} + + s["fl_x"] = camera.get("focal_x", camera.get("focal")) * max(s["w"], s["h"]) + s["fl_y"] = camera.get("focal_y", camera.get("focal")) * max(s["w"], s["h"]) + + s["cx"] = camera["c_x"] + (s["w"] - 1.0) / 2.0 + s["cy"] = camera["c_y"] + (s["h"] - 1.0) / 2.0 + + for p in ["k1", "k2", "p1", "p2", "k3"]: + if p in camera: + s[p] = camera[p] + + sensor_dict[camera_id] = s + + shots = shots["features"] + shots_dict = {} + for shot in shots: + props = shot["properties"] + filename = props["filename"] + rotation = rodrigues_vec_to_rotation_mat(np.array(props["rotation"]) * -1) + translation = np.array(props["translation"]) + + m = np.eye(4) + m[:3, :3] = rotation + m[:3, 3] = translation + + name, ext = os.path.splitext(filename) + shots_dict[name] = m + + frames = [] + num_skipped = 0 + + for fname in shots_dict: + transform = shots_dict[fname] + if fname not in image_filename_map: + num_skipped += 1 + continue + + frame = {} + frame["file_path"] = image_filename_map[fname].as_posix() + frame.update(sensor_dict[camera_id]) + + transform = transform[[2, 0, 1, 3], :] + transform[:, 1:3] *= -1 + frame["transform_matrix"] = transform.tolist() + frames.append(frame) + + data["frames"] = frames + + with open(output_dir / "transforms.json", "w", encoding="utf-8") as f: + json.dump(data, f, indent=4) + + summary = [] + if num_skipped == 1: + summary.append(f"{num_skipped} image skipped because it was missing its camera pose.") + if num_skipped > 1: + summary.append(f"{num_skipped} images were skipped because they were missing camera poses.") + + summary.append(f"Final dataset is {len(data['frames'])} frames.") + + return summary diff --git a/nerfstudio/scripts/process_data.py b/nerfstudio/scripts/process_data.py index 8b4151f757..83cdcca473 100644 --- a/nerfstudio/scripts/process_data.py +++ b/nerfstudio/scripts/process_data.py @@ -32,6 +32,7 @@ process_data_utils, realitycapture_utils, record3d_utils, + odm_utils, ) from nerfstudio.process_data.colmap_converter_to_nerfstudio_dataset import BaseConverterToNerfstudioDataset from nerfstudio.process_data.images_to_nerfstudio_dataset import ImagesToNerfstudioDataset @@ -387,6 +388,93 @@ def main(self) -> None: CONSOLE.rule() +@dataclass +class ProcessODM(BaseConverterToNerfstudioDataset): + """Process ODM data into a nerfstudio dataset. + + This script does the following: + + 1. Scales images to a specified size. + 2. Converts ODM poses into the nerfstudio format. + """ + + num_downscales: int = 3 + """Number of times to downscale the images. Downscales by 2 each time. For example a value of 3 + will downscale the images by 2x, 4x, and 8x.""" + max_dataset_size: int = 600 + """Max number of images to train on. If the dataset has more, images will be sampled approximately evenly. If -1, + use all images.""" + + def main(self) -> None: + """Process images into a nerfstudio dataset.""" + + orig_images_dir = self.data / "images" + cameras_file = self.data / "cameras.json" + shots_file = self.data / "odm_report" / "shots.geojson" + + if not shots_file.exists: + raise ValueError(f"shots file {shots_file} doesn't exist") + if not shots_file.exists: + raise ValueError(f"cameras file {cameras_file} doesn't exist") + + if not orig_images_dir.exists: + raise ValueError(f"Images dir {orig_images_dir} doesn't exist") + + if self.eval_data is not None: + raise ValueError("Cannot use eval_data since cameras were already aligned with ODM.") + + self.output_dir.mkdir(parents=True, exist_ok=True) + image_dir = self.output_dir / "images" + image_dir.mkdir(parents=True, exist_ok=True) + + summary_log = [] + + # Copy images to output directory + image_filenames, num_orig_images = process_data_utils.get_image_filenames( + orig_images_dir, self.max_dataset_size + ) + copied_image_paths = process_data_utils.copy_images_list( + image_filenames, + image_dir=image_dir, + verbose=self.verbose, + num_downscales=self.num_downscales, + ) + num_frames = len(copied_image_paths) + + copied_image_paths = [Path("images/" + copied_image_path.name) for copied_image_path in copied_image_paths] + original_names = [image_path.stem for image_path in image_filenames] + image_filename_map = dict(zip(original_names, copied_image_paths)) + + if self.max_dataset_size > 0 and num_frames != num_orig_images: + summary_log.append(f"Started with {num_frames} images out of {num_orig_images} total") + summary_log.append( + "To change the size of the dataset add the argument [yellow]--max_dataset_size[/yellow] to " + f"larger than the current value ({self.max_dataset_size}), or -1 to use all images." + ) + else: + summary_log.append(f"Started with {num_frames} images") + + # Save json + if num_frames == 0: + CONSOLE.print("[bold red]No images found, exiting") + sys.exit(1) + summary_log.extend( + odm_utils.cameras2nerfds( + image_filename_map=image_filename_map, + cameras_file=cameras_file, + shots_file=shots_file, + output_dir=self.output_dir, + verbose=self.verbose, + ) + ) + + CONSOLE.rule("[bold green]:tada: :tada: :tada: All DONE :tada: :tada: :tada:") + + for summary in summary_log: + CONSOLE.print(summary, justify="center") + CONSOLE.rule() + + Commands = Union[ Annotated[ImagesToNerfstudioDataset, tyro.conf.subcommand(name="images")], Annotated[VideoToNerfstudioDataset, tyro.conf.subcommand(name="video")], @@ -394,6 +482,7 @@ def main(self) -> None: Annotated[ProcessMetashape, tyro.conf.subcommand(name="metashape")], Annotated[ProcessRealityCapture, tyro.conf.subcommand(name="realitycapture")], Annotated[ProcessRecord3D, tyro.conf.subcommand(name="record3d")], + Annotated[ProcessODM, tyro.conf.subcommand(name="odm")], ] From d3934737a121296af8d236c2fa59e2c931a73444 Mon Sep 17 00:00:00 2001 From: Brent Yi Date: Sat, 14 Oct 2023 13:00:17 +0800 Subject: [PATCH 040/101] Upgrade pyright (#2526) * Upgrade pyright * Resolve pyright errors * Suppress trimesh error * Run ruff --- nerfstudio/exporter/marching_cubes.py | 2 +- nerfstudio/process_data/hloc_utils.py | 29 ++++++++++++----------- nerfstudio/scripts/render.py | 14 +++++++---- nerfstudio/utils/plotly_utils.py | 1 + nerfstudio/viewer/server/control_panel.py | 2 +- nerfstudio/viewer/server/viewer_state.py | 8 +++---- nerfstudio/viewer/viser/message_api.py | 4 ++-- nerfstudio/viewer_beta/control_panel.py | 9 +++---- nerfstudio/viewer_beta/viewer.py | 2 +- pyproject.toml | 4 ++-- 10 files changed, 42 insertions(+), 33 deletions(-) diff --git a/nerfstudio/exporter/marching_cubes.py b/nerfstudio/exporter/marching_cubes.py index a833075010..ee13422b1f 100644 --- a/nerfstudio/exporter/marching_cubes.py +++ b/nerfstudio/exporter/marching_cubes.py @@ -249,7 +249,7 @@ def evaluate(points: torch.Tensor) -> torch.Tensor: ) verts = verts + np.array([x_min, y_min, z_min]) - meshcrop = trimesh.Trimesh(verts, faces, normals) + meshcrop = trimesh.Trimesh(verts, faces, normals) # type: ignore meshes.append(meshcrop) combined_mesh: trimesh.Trimesh = trimesh.util.concatenate(meshes) # type: ignore diff --git a/nerfstudio/process_data/hloc_utils.py b/nerfstudio/process_data/hloc_utils.py index 85049b5bf8..a833f51edd 100644 --- a/nerfstudio/process_data/hloc_utils.py +++ b/nerfstudio/process_data/hloc_utils.py @@ -29,7 +29,7 @@ try: # TODO(1480) un-hide pycolmap import import pycolmap - from hloc import ( + from hloc import ( # type: ignore extract_features, match_features, pairs_from_exhaustive, @@ -38,6 +38,7 @@ ) except ImportError: _HAS_HLOC = False + else: _HAS_HLOC = True @@ -95,24 +96,24 @@ def run_hloc( features = outputs / "features.h5" matches = outputs / "matches.h5" - retrieval_conf = extract_features.confs["netvlad"] - feature_conf = extract_features.confs[feature_type] - matcher_conf = match_features.confs[matcher_type] + retrieval_conf = extract_features.confs["netvlad"] # type: ignore + feature_conf = extract_features.confs[feature_type] # type: ignore + matcher_conf = match_features.confs[matcher_type] # type: ignore references = [p.relative_to(image_dir).as_posix() for p in image_dir.iterdir()] - extract_features.main(feature_conf, image_dir, image_list=references, feature_path=features) + extract_features.main(feature_conf, image_dir, image_list=references, feature_path=features) # type: ignore if matching_method == "exhaustive": - pairs_from_exhaustive.main(sfm_pairs, image_list=references) + pairs_from_exhaustive.main(sfm_pairs, image_list=references) # type: ignore else: - retrieval_path = extract_features.main(retrieval_conf, image_dir, outputs) + retrieval_path = extract_features.main(retrieval_conf, image_dir, outputs) # type: ignore if num_matched >= len(references): num_matched = len(references) - pairs_from_retrieval.main(retrieval_path, sfm_pairs, num_matched=num_matched) - match_features.main(matcher_conf, sfm_pairs, features=features, matches=matches) + pairs_from_retrieval.main(retrieval_path, sfm_pairs, num_matched=num_matched) # type: ignore + match_features.main(matcher_conf, sfm_pairs, features=features, matches=matches) # type: ignore - image_options = pycolmap.ImageReaderOptions(camera_model=camera_model.value) + image_options = pycolmap.ImageReaderOptions(camera_model=camera_model.value) # type: ignore if refine_pixsfm: - sfm = PixSfM( + sfm = PixSfM( # type: ignore conf={ "dense_features": {"use_cache": True}, "KA": {"dense_features": {"use_cache": True}, "max_kps_per_problem": 1000}, @@ -126,20 +127,20 @@ def run_hloc( features, matches, image_list=references, - camera_mode=pycolmap.CameraMode.SINGLE, + camera_mode=pycolmap.CameraMode.SINGLE, # type: ignore image_options=image_options, verbose=verbose, ) print("Refined", refined.summary()) else: - reconstruction.main( + reconstruction.main( # type: ignore sfm_dir, image_dir, sfm_pairs, features, matches, - camera_mode=pycolmap.CameraMode.SINGLE, + camera_mode=pycolmap.CameraMode.SINGLE, # type: ignore image_options=image_options, verbose=verbose, ) diff --git a/nerfstudio/scripts/render.py b/nerfstudio/scripts/render.py index 307bb5adba..5e7a52e015 100644 --- a/nerfstudio/scripts/render.py +++ b/nerfstudio/scripts/render.py @@ -18,12 +18,12 @@ """ from __future__ import annotations +import gzip import json import os -import struct import shutil +import struct import sys -import gzip from contextlib import ExitStack, contextmanager from dataclasses import dataclass, field from pathlib import Path @@ -54,11 +54,14 @@ get_spiral_path, ) from nerfstudio.cameras.cameras import Cameras, CameraType, RayBundle +from nerfstudio.data.datamanagers.base_datamanager import ( + VanillaDataManager, + VanillaDataManagerConfig, +) from nerfstudio.data.datasets.base_dataset import Dataset -from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager, VanillaDataManagerConfig +from nerfstudio.data.scene_box import OrientedBox from nerfstudio.data.utils.dataloaders import FixedIndicesEvalDataloader from nerfstudio.engine.trainer import TrainerConfig -from nerfstudio.data.scene_box import OrientedBox from nerfstudio.model_components import renderers from nerfstudio.pipelines.base_pipeline import Pipeline from nerfstudio.utils import colormaps, install_checks @@ -318,6 +321,9 @@ def get_crop_from_json(camera_json: Dict[str, Any]) -> Optional[CropData]: center = camera_json["crop"]["crop_center"] scale = camera_json["crop"]["crop_scale"] rot = (0.0, 0.0, 0.0) if "crop_rot" not in camera_json["crop"] else tuple(camera_json["crop"]["crop_rot"]) + assert len(center) == 3 + assert len(scale) == 3 + assert len(rot) == 3 return CropData( background_color=torch.Tensor([bg_color["r"] / 255.0, bg_color["g"] / 255.0, bg_color["b"] / 255.0]), obb=OrientedBox.from_params(center, rot, scale), diff --git a/nerfstudio/utils/plotly_utils.py b/nerfstudio/utils/plotly_utils.py index 00cab47605..1ab7da1fb1 100644 --- a/nerfstudio/utils/plotly_utils.py +++ b/nerfstudio/utils/plotly_utils.py @@ -239,6 +239,7 @@ def get_cube( if isinstance(side_length, float): pts *= side_length / 2.0 else: + assert isinstance(side_length, torch.Tensor) pts[0] *= side_length[0].item() / 2.0 pts[1] *= side_length[1].item() / 2.0 pts[2] *= side_length[2].item() / 2.0 diff --git a/nerfstudio/viewer/server/control_panel.py b/nerfstudio/viewer/server/control_panel.py index 7039b61c3d..0f789a8ae3 100644 --- a/nerfstudio/viewer/server/control_panel.py +++ b/nerfstudio/viewer/server/control_panel.py @@ -196,7 +196,7 @@ def update_output_options(self, new_options: List[str]): self._split_output_render.set_options(new_options) self._split_output_render.value = new_options[-1] - def add_element(self, e: ViewerElement, additional_tags: Tuple[str] = tuple()) -> None: + def add_element(self, e: ViewerElement, additional_tags: Tuple[str, ...] = tuple()) -> None: """Adds an element to the control panel Args: diff --git a/nerfstudio/viewer/server/viewer_state.py b/nerfstudio/viewer/server/viewer_state.py index ad80822ce8..3bbce5e2d5 100644 --- a/nerfstudio/viewer/server/viewer_state.py +++ b/nerfstudio/viewer/server/viewer_state.py @@ -198,8 +198,8 @@ def _crop_params_update(self, _) -> None: self.viser_server.send_crop_params( crop_enabled=self.control_panel.crop_viewport, crop_bg_color=self.control_panel.background_color, - crop_scale=tuple(crop_scale.tolist()), - crop_center=tuple(crop_center.tolist()), + crop_scale=tuple(crop_scale.tolist()), # type: ignore + crop_center=tuple(crop_center.tolist()), # type: ignore ) if self.camera_message is not None: self.render_statemachine.action(RenderAction("rerender", self.camera_message)) @@ -258,8 +258,8 @@ def _handle_crop_params_message(self, message: NerfstudioMessage) -> None: scale = np.array(message.crop_scale) crop_min = center - scale / 2.0 crop_max = center + scale / 2.0 - self.control_panel.crop_min = tuple(crop_min.tolist()) - self.control_panel.crop_max = tuple(crop_max.tolist()) + self.control_panel.crop_min = tuple(crop_min.tolist()) # type: ignore + self.control_panel.crop_max = tuple(crop_max.tolist()) # type: ignore def _handle_click_message(self, message: NerfstudioMessage) -> None: """Handle click message from viewer.""" diff --git a/nerfstudio/viewer/viser/message_api.py b/nerfstudio/viewer/viser/message_api.py index 3c8ce752c9..bb6449cf8e 100644 --- a/nerfstudio/viewer/viser/message_api.py +++ b/nerfstudio/viewer/viser/message_api.py @@ -501,8 +501,8 @@ def update_scene_box(self, scene_box: SceneBox) -> None: """ self._queue( messages.SceneBoxMessage( - min=tuple(scene_box.aabb[0].tolist()), - max=tuple(scene_box.aabb[1].tolist()), + min=tuple(scene_box.aabb[0].tolist()), # type: ignore + max=tuple(scene_box.aabb[1].tolist()), # type: ignore ) ) diff --git a/nerfstudio/viewer_beta/control_panel.py b/nerfstudio/viewer_beta/control_panel.py index 7161816941..a6fdef970d 100644 --- a/nerfstudio/viewer_beta/control_panel.py +++ b/nerfstudio/viewer_beta/control_panel.py @@ -19,6 +19,8 @@ import numpy as np import torch import viser.transforms as vtf +from viser import ViserServer + from nerfstudio.data.scene_box import OrientedBox from nerfstudio.utils.colormaps import ColormapOptions, Colormaps from nerfstudio.viewer_beta.viewer_elements import ( # ViewerButtonGroup, @@ -32,7 +34,6 @@ ViewerSlider, ViewerVec3, ) -from viser import ViserServer class ControlPanel: @@ -147,7 +148,7 @@ def __init__( self._crop_handle = self.viser_server.add_transform_controls("Crop", depth_test=False, line_width=4.0) def update_center(han): - self._crop_handle.position = tuple(p * self.viser_scale_ratio for p in han.value) + self._crop_handle.position = tuple(p * self.viser_scale_ratio for p in han.value) # type: ignore self._crop_center = ViewerVec3( "Crop Center", @@ -175,7 +176,7 @@ def update_rot(han): @self._crop_handle.on_update def _update_crop_handle(han): pos = self._crop_handle.position - self._crop_center.value = tuple(p / self.viser_scale_ratio for p in pos) + self._crop_center.value = tuple(p / self.viser_scale_ratio for p in pos) # type: ignore rpy = vtf.SO3(self._crop_handle.wxyz).as_rpy_radians() self._crop_rot.value = (float(rpy.roll), float(rpy.pitch), float(rpy.yaw)) @@ -280,7 +281,7 @@ def update_output_options(self, new_options: List[str]): self._split_output_render.set_options(new_options) self._split_output_render.value = new_options[-1] - def add_element(self, e: ViewerElement, additional_tags: Tuple[str] = tuple()) -> None: + def add_element(self, e: ViewerElement, additional_tags: Tuple[str, ...] = tuple()) -> None: """Adds an element to the control panel Args: diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py index d4fbf973b3..d122cf0946 100644 --- a/nerfstudio/viewer_beta/viewer.py +++ b/nerfstudio/viewer_beta/viewer.py @@ -326,7 +326,7 @@ def init_scene( ) @camera_handle.on_click - def _(event: viser.ClickEvent[viser.CameraFrustumHandle]) -> None: + def _(event: viser.SceneNodePointerEvent[viser.CameraFrustumHandle]) -> None: assert self.client is not None with self.client.atomic(): self.client.camera.position = event.target.position diff --git a/pyproject.toml b/pyproject.toml index fc2b0be03a..835a569daf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ dependencies = [ "torchvision>=0.14.1", "torchmetrics[image]>=1.0.1", "typing_extensions>=4.4.0", - "viser==0.1.6", + "viser==0.1.7", "nuscenes-devkit>=1.1.1", "wandb>=0.13.3", "xatlas", @@ -90,7 +90,7 @@ dev = [ "diffusers==0.16.1", "opencv-stubs==0.0.7", "transformers==4.29.2", - "pyright==1.1.308", + "pyright==1.1.331", ] # Documentation related packages From e9a3bae859d7e9e715f255b9854144f0dc9a5c85 Mon Sep 17 00:00:00 2001 From: Cyrus Vachha Date: Fri, 13 Oct 2023 22:23:20 -0700 Subject: [PATCH 041/101] Update URLs for readme and image references (#2527) * Update URLs for readme and image references Update the image URLs and documentation links due to change in docs url structure. Files edited include readme, documentation, and legacy viewer files. * Updating url in custom dataset --- README.md | 30 +++++++++---------- docs/nerfology/methods/instant_ngp.md | 2 +- docs/quickstart/custom_dataset.md | 4 +-- docs/reference/contributing.md | 2 +- nerfstudio/configs/external_methods.py | 12 ++++---- .../viewer/app/src/modules/Banner/Banner.jsx | 2 +- .../src/modules/LandingModal/LandingModal.jsx | 2 +- .../ViewportControlsModal.jsx | 2 +- 8 files changed, 28 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 3c7e827b17..20f041410b 100644 --- a/README.md +++ b/README.md @@ -23,10 +23,10 @@

- - + + - nerfstudio + nerfstudio @@ -34,7 +34,7 @@

A collaboration friendly studio for NeRFs

@@ -70,7 +70,7 @@ Nerfstudio initially launched as an opensource project by Berkeley students in [ We are committed to providing learning resources to help you understand the basics of (if you're just getting started), and keep up-to-date with (if you're a seasoned veteran) all things NeRF. As researchers, we know just how hard it is to get onboarded with this next-gen technology. So we're here to help with tutorials, documentation, and more! -Have feature requests? Want to add your brand-spankin'-new NeRF model? Have a new dataset? **We welcome [contributions](https://docs.nerf.studio/en/latest/reference/contributing.html)!** Please do not hesitate to reach out to the nerfstudio team with any questions via [Discord](https://discord.gg/uMbNqcraFc). +Have feature requests? Want to add your brand-spankin'-new NeRF model? Have a new dataset? **We welcome [contributions](https://docs.nerf.studio/reference/contributing.html)!** Please do not hesitate to reach out to the nerfstudio team with any questions via [Discord](https://discord.gg/uMbNqcraFc). Have feedback? We'd love for you to fill out our [Nerfstudio Feedback Form](https://forms.gle/sqN5phJN7LfQVwnP9) if you want to let us know who you are, why you are interested in Nerfstudio, or provide any feedback! @@ -118,7 +118,7 @@ You must have an NVIDIA video card with CUDA installed on the system. This libra ### Create environment -Nerfstudio requires `python >= 3.8`. We recommend using conda to manage dependencies. Make sure to install [Conda](https://docs.conda.io/en/latest/miniconda.html) before proceeding. +Nerfstudio requires `python >= 3.8`. We recommend using conda to manage dependencies. Make sure to install [Conda](https://docs.conda.io/miniconda.html) before proceeding. ```bash conda create --name nerfstudio -y python=3.8 @@ -281,23 +281,23 @@ We support four different methods to track training progress, using the viewer[t And that's it for getting started with the basics of nerfstudio. -If you're interested in learning more on how to create your own pipelines, develop with the viewer, run benchmarks, and more, please check out some of the quicklinks below or visit our [documentation](https://docs.nerf.studio/en/latest/) directly. +If you're interested in learning more on how to create your own pipelines, develop with the viewer, run benchmarks, and more, please check out some of the quicklinks below or visit our [documentation](https://docs.nerf.studio/) directly. | Section | Description | | -------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------- | -| [Documentation](https://docs.nerf.studio/en/latest/) | Full API documentation and tutorials | +| [Documentation](https://docs.nerf.studio/) | Full API documentation and tutorials | | [Viewer](https://viewer.nerf.studio/) | Home page for our web viewer | | πŸŽ’ **Educational** | -| [Model Descriptions](https://docs.nerf.studio/en/latest/nerfology/methods/index.html) | Description of all the models supported by nerfstudio and explanations of component parts. | -| [Component Descriptions](https://docs.nerf.studio/en/latest/nerfology/model_components/index.html) | Interactive notebooks that explain notable/commonly used modules in various models. | +| [Model Descriptions](https://docs.nerf.studio/nerfology/methods/index.html) | Description of all the models supported by nerfstudio and explanations of component parts. | +| [Component Descriptions](https://docs.nerf.studio/nerfology/model_components/index.html) | Interactive notebooks that explain notable/commonly used modules in various models. | | πŸƒ **Tutorials** | -| [Getting Started](https://docs.nerf.studio/en/latest/quickstart/installation.html) | A more in-depth guide on how to get started with nerfstudio from installation to contributing. | -| [Using the Viewer](https://docs.nerf.studio/en/latest/quickstart/viewer_quickstart.html) | A quick demo video on how to navigate the viewer. | +| [Getting Started](https://docs.nerf.studio/quickstart/installation.html) | A more in-depth guide on how to get started with nerfstudio from installation to contributing. | +| [Using the Viewer](https://docs.nerf.studio/quickstart/viewer_quickstart.html) | A quick demo video on how to navigate the viewer. | | [Using Record3D](https://www.youtube.com/watch?v=XwKq7qDQCQk) | Demo video on how to run nerfstudio without using COLMAP. | | πŸ’» **For Developers** | -| [Creating pipelines](https://docs.nerf.studio/en/latest/developer_guides/pipelines/index.html) | Learn how to easily build new neural rendering pipelines by using and/or implementing new modules. | -| [Creating datasets](https://docs.nerf.studio/en/latest/quickstart/custom_dataset.html) | Have a new dataset? Learn how to run it with nerfstudio. | -| [Contributing](https://docs.nerf.studio/en/latest/reference/contributing.html) | Walk-through for how you can start contributing now. | +| [Creating pipelines](https://docs.nerf.studio/developer_guides/pipelines/index.html) | Learn how to easily build new neural rendering pipelines by using and/or implementing new modules. | +| [Creating datasets](https://docs.nerf.studio/quickstart/custom_dataset.html) | Have a new dataset? Learn how to run it with nerfstudio. | +| [Contributing](https://docs.nerf.studio/reference/contributing.html) | Walk-through for how you can start contributing now. | | πŸ’– **Community** | | [Discord](https://discord.gg/uMbNqcraFc) | Join our community to discuss more. We would love to hear from you! | | [Twitter](https://twitter.com/nerfstudioteam) | Follow us on Twitter @nerfstudioteam to see cool updates and announcements | diff --git a/docs/nerfology/methods/instant_ngp.md b/docs/nerfology/methods/instant_ngp.md index c7589e295c..5f5f7e07f1 100644 --- a/docs/nerfology/methods/instant_ngp.md +++ b/docs/nerfology/methods/instant_ngp.md @@ -34,7 +34,7 @@ Instant-NGP breaks NeRF training into 3 pillars and proposes improvements to eac The core idea behind the improved sampling technique is that sampling over empty space should be skipped and sampling behind high density areas should also be skipped. This is achieved by maintaining a set of multiscale occupancy grids which coarsely mark empty and non-empty space. Occupancy is stored as a single bit, and a sample on a ray is skipped if its occupancy is too low. These occupancy grids are stored independently of the trainable encoding and are updated throughout training based on the updated density predictions. The authors find they can increase sampling speed by 10-100x compared to naive approaches. -Nerfstudio uses [NerfAcc](https://www.nerfacc.com/en/latest/index.html) as the sampling algorithm implementation. The details on NerfAcc's sampling and occupancy grid is discussed [here](https://www.nerfacc.com/en/stable/methodology/sampling.html#occupancy-grid-estimator). +Nerfstudio uses [NerfAcc](https://www.nerfacc.com/index.html) as the sampling algorithm implementation. The details on NerfAcc's sampling and occupancy grid is discussed [here](https://www.nerfacc.com/en/stable/methodology/sampling.html#occupancy-grid-estimator). Another major bottleneck for NeRF's training speed has been querying the neural network. The authors of this work implement the network such that it runs entirely on a single CUDA kernel. The network is also shrunk down to be just 4 layers with 64 neurons in each layer. They show that their fully-fused neural network is 5-10x faster than a Tensorflow implementation. diff --git a/docs/quickstart/custom_dataset.md b/docs/quickstart/custom_dataset.md index 14a89e4a2d..46aaa907d6 100644 --- a/docs/quickstart/custom_dataset.md +++ b/docs/quickstart/custom_dataset.md @@ -406,7 +406,7 @@ This outputs two 180 deg equirectangular renders horizontally stacked, one for e ### Setup instructions -To render for VR video it is essential to adjust the NeRF to have an approximately true-to-life real world scale (adjustable in the camera path) to ensure that the scene depth and IPD (distance between the eyes) is appropriate for the render to be viewable in VR. You can adjust the scene scale with the [Nerfstudio Blender Add-on](https://docs.nerf.studio/en/latest/extensions/blender_addon.html) by appropriately scaling a point cloud representation of the NeRF. +To render for VR video it is essential to adjust the NeRF to have an approximately true-to-life real world scale (adjustable in the camera path) to ensure that the scene depth and IPD (distance between the eyes) is appropriate for the render to be viewable in VR. You can adjust the scene scale with the [Nerfstudio Blender Add-on](https://docs.nerf.studio/extensions/blender_addon.html) by appropriately scaling a point cloud representation of the NeRF. Results may be unviewable if the scale is not set appropriately. The IPD is set at 64mm by default but only is accurate when the NeRF scene is true to scale. For good quality renders, it is recommended to render at high resolutions (For ODS: 4096x2048 per eye, or 2048x1024 per eye. For VR180: 4096x4096 per eye or 2048x2048 per eye). Render resolutions for a single eye are specified in the camera path. For VR180, resolutions must be in a 1:1 aspect ratio. For ODS, resolutions must be in a 2:1 aspect ratio. The final stacked render output will automatically be constructed (with aspect ratios for VR180 as 2:1 and ODS as 1:1). @@ -417,7 +417,7 @@ If you are rendering an image sequence, it is recommended to render as png inste ::: To render with the VR videos camera: -1. Use the [Nerfstudio Blender Add-on](https://docs.nerf.studio/en/latest/extensions/blender_addon.html) to set the scale of the NeRF scene and create the camera path +1. Use the [Nerfstudio Blender Add-on](https://docs.nerf.studio/extensions/blender_addon.html) to set the scale of the NeRF scene and create the camera path - Export a point cloud representation of the NeRF - Import the point cloud representation in Blender and enable the Nerfstudio Blender Add-on - Create a reference object such as a cube which may be 1x1x1 meter. You could also create a cylinder and scale it to an appropriate height of a viewer. diff --git a/docs/reference/contributing.md b/docs/reference/contributing.md index 529d154eb4..bf0af33db3 100644 --- a/docs/reference/contributing.md +++ b/docs/reference/contributing.md @@ -94,7 +94,7 @@ python nerfstudio/scripts/docs/build_docs.py ### Auto build -As you change or add models/components, the auto-generated [Reference API](https://docs.nerf.studio/en/latest/reference/api/index.html) may change. +As you change or add models/components, the auto-generated [Reference API](https://docs.nerf.studio/reference/api/index.html) may change. If you want the code to build on save you can use [sphinx autobuild](https://github.com/executablebooks/sphinx-autobuild). :::{admonition} Tip diff --git a/nerfstudio/configs/external_methods.py b/nerfstudio/configs/external_methods.py index 6a0c78b099..ce4e7bbc2b 100644 --- a/nerfstudio/configs/external_methods.py +++ b/nerfstudio/configs/external_methods.py @@ -45,7 +45,7 @@ class ExternalMethod: external_methods.append( ExternalMethod( """[bold yellow]Instruct-NeRF2NeRF[/bold yellow] -For more information visit: https://docs.nerf.studio/en/latest/nerfology/methods/in2n.html +For more information visit: https://docs.nerf.studio/nerfology/methods/in2n.html To enable Instruct-NeRF2NeRF, you must install it first by running: [grey]pip install git+https://github.com/ayaanzhaque/instruct-nerf2nerf[/grey]""", @@ -62,7 +62,7 @@ class ExternalMethod: external_methods.append( ExternalMethod( """[bold yellow]K-Planes[/bold yellow] -For more information visit https://docs.nerf.studio/en/latest/nerfology/methods/kplanes.html +For more information visit https://docs.nerf.studio/nerfology/methods/kplanes.html To enable K-Planes, you must install it first by running: [grey]pip install kplanes-nerfstudio[/grey]""", @@ -78,7 +78,7 @@ class ExternalMethod: external_methods.append( ExternalMethod( """[bold yellow]LERF[/bold yellow] -For more information visit: https://docs.nerf.studio/en/latest/nerfology/methods/lerf.html +For more information visit: https://docs.nerf.studio/nerfology/methods/lerf.html To enable LERF, you must install it first by running: [grey]pip install git+https://github.com/kerrj/lerf[/grey]""", @@ -95,7 +95,7 @@ class ExternalMethod: external_methods.append( ExternalMethod( """[bold yellow]Tetra-NeRF[/bold yellow] -For more information visit: https://docs.nerf.studio/en/latest/nerfology/methods/tetranerf.html +For more information visit: https://docs.nerf.studio/nerfology/methods/tetranerf.html To enable Tetra-NeRF, you must install it first. Please follow the instructions here: https://github.com/jkulhanek/tetra-nerf/blob/master/README.md#installation""", @@ -110,7 +110,7 @@ class ExternalMethod: external_methods.append( ExternalMethod( """[bold yellow]NeRFPlayer[/bold yellow] -For more information visit: https://docs.nerf.studio/en/latest/nerfology/methods/nerfplayer.html +For more information visit: https://docs.nerf.studio/nerfology/methods/nerfplayer.html To enable NeRFPlayer, you must install it first by running: [grey]pip install git+https://github.com/lsongx/nerfplayer-nerfstudio[/grey]""", @@ -125,7 +125,7 @@ class ExternalMethod: external_methods.append( ExternalMethod( """[bold yellow]Volinga[/bold yellow] -For more information visit: https://docs.nerf.studio/en/latest/extensions/unreal_engine.html +For more information visit: https://docs.nerf.studio/extensions/unreal_engine.html To enable Volinga, you must install it first by running: [grey]pip install git+https://github.com/Volinga/volinga-model[/grey]""", diff --git a/nerfstudio/viewer/app/src/modules/Banner/Banner.jsx b/nerfstudio/viewer/app/src/modules/Banner/Banner.jsx index baedf714d6..e2156606a6 100644 --- a/nerfstudio/viewer/app/src/modules/Banner/Banner.jsx +++ b/nerfstudio/viewer/app/src/modules/Banner/Banner.jsx @@ -62,7 +62,7 @@ export default function Banner() {
The favicon.
diff --git a/nerfstudio/viewer/app/src/modules/LandingModal/LandingModal.jsx b/nerfstudio/viewer/app/src/modules/LandingModal/LandingModal.jsx index faf2db4c24..42caec03a6 100644 --- a/nerfstudio/viewer/app/src/modules/LandingModal/LandingModal.jsx +++ b/nerfstudio/viewer/app/src/modules/LandingModal/LandingModal.jsx @@ -100,7 +100,7 @@ export default function LandingModel(props: LandingModalProps) {
The favicon.
diff --git a/nerfstudio/viewer/app/src/modules/ViewportControlsModal/ViewportControlsModal.jsx b/nerfstudio/viewer/app/src/modules/ViewportControlsModal/ViewportControlsModal.jsx index 192bc262b6..953c4d4356 100644 --- a/nerfstudio/viewer/app/src/modules/ViewportControlsModal/ViewportControlsModal.jsx +++ b/nerfstudio/viewer/app/src/modules/ViewportControlsModal/ViewportControlsModal.jsx @@ -30,7 +30,7 @@ export default function ControlsModal() {
The favicon. Date: Sat, 14 Oct 2023 21:46:53 -0700 Subject: [PATCH 042/101] Add camera_type dropdown in new viewer (#2525) * Add camera_type dropdown in new viewer Added a gui dropdown menu for selecting the camera type. Currently, the render view doesn't update to preview the equirectangular or fisheye render. * Updating the image url of nerfstudio logo images for the viewer Changing the image urls to restore the nerfstudio logo in the viewer --------- Co-authored-by: Brent Yi --- nerfstudio/viewer_beta/render_panel.py | 9 ++++++++- nerfstudio/viewer_beta/viewer.py | 4 ++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/nerfstudio/viewer_beta/render_panel.py b/nerfstudio/viewer_beta/render_panel.py index 690027f8d0..4543009942 100644 --- a/nerfstudio/viewer_beta/render_panel.py +++ b/nerfstudio/viewer_beta/render_panel.py @@ -297,6 +297,13 @@ def _(_) -> None: """Update the aspect ratio for all cameras when the resolution changes.""" camera_path.update_aspect(resolution.value[0] / resolution.value[1]) + camera_type = server.add_gui_dropdown( + "Camera Type", + ("Perspective", "Fisheye", "Equirectangular"), + initial_value="Perspective", + hint="Camera model to render with.", + ) + add_button = server.add_gui_button( "Add keyframe", icon=viser.Icon.PLUS, @@ -584,7 +591,7 @@ def _(event: viser.GuiEvent) -> None: } ) json_data["keyframes"] = keyframes - json_data["camera_type"] = "perspective" + json_data["camera_type"] = camera_type.value.lower() json_data["render_height"] = resolution.value[1] json_data["render_width"] = resolution.value[0] json_data["fps"] = framerate_slider.value diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py index d122cf0946..cc352d553f 100644 --- a/nerfstudio/viewer_beta/viewer.py +++ b/nerfstudio/viewer_beta/viewer.py @@ -124,8 +124,8 @@ def __init__( ), ) image = viser.theme.TitlebarImage( - image_url_light="https://docs.nerf.studio/en/latest/_static/imgs/logo.png", - image_url_dark="https://docs.nerf.studio/en/latest/_static/imgs/logo-dark.png", + image_url_light="https://docs.nerf.studio/_static/imgs/logo.png", + image_url_dark="https://docs.nerf.studio/_static/imgs/logo-dark.png", image_alt="NerfStudio Logo", href="https://docs.nerf.studio/", ) From ce06935f3df92adf027c7421cbc1883c50e7252a Mon Sep 17 00:00:00 2001 From: Ji Shi Date: Mon, 16 Oct 2023 01:05:22 -0500 Subject: [PATCH 043/101] Fix datamanagers' get_rays_per_batch functions in case of using DynamicBatchPipeline (#2522) --- nerfstudio/data/datamanagers/base_datamanager.py | 4 ++++ nerfstudio/data/datamanagers/parallel_datamanager.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py index 0b937f64c6..b4cbeaa12d 100644 --- a/nerfstudio/data/datamanagers/base_datamanager.py +++ b/nerfstudio/data/datamanagers/base_datamanager.py @@ -556,9 +556,13 @@ def next_eval_image(self, step: int) -> Tuple[int, RayBundle, Dict]: raise ValueError("No more eval images") def get_train_rays_per_batch(self) -> int: + if self.train_pixel_sampler is not None: + return self.train_pixel_sampler.num_rays_per_batch return self.config.train_num_rays_per_batch def get_eval_rays_per_batch(self) -> int: + if self.eval_pixel_sampler is not None: + return self.eval_pixel_sampler.num_rays_per_batch return self.config.eval_num_rays_per_batch def get_datapath(self) -> Path: diff --git a/nerfstudio/data/datamanagers/parallel_datamanager.py b/nerfstudio/data/datamanagers/parallel_datamanager.py index 7a2e5ac559..5c4d5c4a27 100644 --- a/nerfstudio/data/datamanagers/parallel_datamanager.py +++ b/nerfstudio/data/datamanagers/parallel_datamanager.py @@ -303,10 +303,14 @@ def next_eval_image(self, step: int) -> Tuple[int, RayBundle, Dict]: def get_train_rays_per_batch(self) -> int: """Returns the number of rays per batch for training.""" + if self.train_pixel_sampler is not None: + return self.train_pixel_sampler.num_rays_per_batch return self.config.train_num_rays_per_batch def get_eval_rays_per_batch(self) -> int: """Returns the number of rays per batch for evaluation.""" + if self.eval_pixel_sampler is not None: + return self.eval_pixel_sampler.num_rays_per_batch return self.config.eval_num_rays_per_batch def get_datapath(self) -> Path: From 042c7d1465d3760cd5e256907f8ede8035a7364b Mon Sep 17 00:00:00 2001 From: Brent Yi Date: Mon, 16 Oct 2023 14:26:42 +0800 Subject: [PATCH 044/101] Bump `tyro` to 0.5.7 (#2393) * Bump `tyro` * tyro 0.5.7 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 835a569daf..0d8708ddab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ dependencies = [ "av>=9.2.0", "comet_ml>=3.33.8", "cryptography>=38", - "tyro>=0.5.3", + "tyro>=0.5.10", "gdown>=4.6.0", "ninja>=1.10", "h5py>=2.9.0", From 6cbe0709492d4da821922d327096d0ab069f3ea9 Mon Sep 17 00:00:00 2001 From: SeaOtocinclus <74376766+SeaOtocinclus@users.noreply.github.com> Date: Mon, 16 Oct 2023 16:11:43 -0700 Subject: [PATCH 045/101] Update data_conventions.md (#2534) Fix typo in `distortion` word --- docs/quickstart/data_conventions.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/quickstart/data_conventions.md b/docs/quickstart/data_conventions.md index 2e9e76effa..a929ff53ff 100644 --- a/docs/quickstart/data_conventions.md +++ b/docs/quickstart/data_conventions.md @@ -31,10 +31,10 @@ If all of the images share the same camera intrinsics, the values can be placed "cy": 1000.0, // principal point y "w": 3008, // image width "h": 2000, // image height - "k1": 0.0312, // first radial distorial parameter, used by [OPENCV, OPENCV_FISHEYE] - "k2": 0.0051, // second radial distorial parameter, used by [OPENCV, OPENCV_FISHEYE] - "k3": 0.0006, // third radial distorial parameter, used by [OPENCV_FISHEYE] - "k4": 0.0001, // fourth radial distorial parameter, used by [OPENCV_FISHEYE] + "k1": 0.0312, // first radial distortion parameter, used by [OPENCV, OPENCV_FISHEYE] + "k2": 0.0051, // second radial distortion parameter, used by [OPENCV, OPENCV_FISHEYE] + "k3": 0.0006, // third radial distortion parameter, used by [OPENCV_FISHEYE] + "k4": 0.0001, // fourth radial distortion parameter, used by [OPENCV_FISHEYE] "p1": -6.47e-5, // first tangential distortion parameter, used by [OPENCV] "p2": -1.37e-7, // second tangential distortion parameter, used by [OPENCV] "frames": // ... per-frame intrinsics and extrinsics parameters From 3dd162aae8ae7c166928e1f02bf97e7febe3a18e Mon Sep 17 00:00:00 2001 From: Ahmed Shariff Date: Mon, 16 Oct 2023 20:05:53 -0700 Subject: [PATCH 046/101] Fix pillow version to avoid error with ANTIALIAS (Fix #2378) (#2446) * Fix pillow version to avoid error with ANTIALIAS (Fix #2378) * Revert pillow version, use fix for pillow ANTIALIAS from upstream --- Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d73c487343..f333312e37 100644 --- a/Dockerfile +++ b/Dockerfile @@ -148,7 +148,9 @@ RUN git clone --branch v1.0 --recursive https://github.com/cvg/pyceres.git && \ cd .. # Install pixel perfect sfm. -RUN git clone --branch v1.0 --recursive https://github.com/cvg/pixel-perfect-sfm.git && \ +RUN git clone --recursive https://github.com/cvg/pixel-perfect-sfm.git && \ + git reset --hard 40f7c1339328b2a0c7cf71f76623fb848e0c0357 && \ + git clean -df && \ cd pixel-perfect-sfm && \ python3.10 -m pip install -e . && \ cd .. From 968176edb801ff08c8818437d0b80945e16bc13d Mon Sep 17 00:00:00 2001 From: Gina Wu <42229107+ginazhouhuiwu@users.noreply.github.com> Date: Thu, 19 Oct 2023 10:25:44 -0700 Subject: [PATCH 047/101] Support multiclient in viewer_beta with separate render threads for each client (#2519) * Initial multiclient, jittery screen bc fighting between clients * Fix multiclient jitter with separate render threads for each client * Handle client disconnect * Fix viewer.init_scene and viewer_elements.get_camera for multiclient --------- Co-authored-by: Justin Kerr Co-authored-by: Brent Yi --- .../viewer_beta/render_state_machine.py | 12 ++-- nerfstudio/viewer_beta/viewer.py | 66 ++++++++++++------- nerfstudio/viewer_beta/viewer_elements.py | 15 +++-- 3 files changed, 59 insertions(+), 34 deletions(-) diff --git a/nerfstudio/viewer_beta/render_state_machine.py b/nerfstudio/viewer_beta/render_state_machine.py index 6355a5ba64..ce4cbb1322 100644 --- a/nerfstudio/viewer_beta/render_state_machine.py +++ b/nerfstudio/viewer_beta/render_state_machine.py @@ -21,6 +21,7 @@ from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Tuple, get_args import torch +from viser import ClientHandle from nerfstudio.model_components.renderers import background_color_override_context from nerfstudio.utils import colormaps, writer from nerfstudio.utils.writer import GLOBAL_BUFFER, EventName, TimeWriter @@ -52,7 +53,7 @@ class RenderStateMachine(threading.Thread): viewer: the viewer state """ - def __init__(self, viewer: Viewer, viser_scale_ratio: float): + def __init__(self, viewer: Viewer, viser_scale_ratio: float, client: ClientHandle): threading.Thread.__init__(self) self.transitions: Dict[RenderStates, Dict[RenderActions, RenderStates]] = { s: {} for s in get_args(RenderStates) @@ -77,6 +78,8 @@ def __init__(self, viewer: Viewer, viser_scale_ratio: float): self.daemon = True self.output_keys = {} self.viser_scale_ratio = viser_scale_ratio + self.client = client + self.running = True def action(self, action: RenderAction): """Takes an action and updates the state machine @@ -107,7 +110,7 @@ def action(self, action: RenderAction): self.render_trigger.set() def _render_img(self, camera_state: CameraState): - """Takes the current camera, generates rays, and renders the iamge + """Takes the current camera, generates rays, and renders the image Args: camera_state: the current camera state @@ -166,7 +169,7 @@ def _render_img(self, camera_state: CameraState): def run(self): """Main loop for the render thread""" - while True: + while self.running: if not self.render_trigger.wait(0.2): # if we haven't received a trigger in a while, send a static action if self.viewer.camera_state is not None: @@ -237,7 +240,8 @@ def _send_output_to_viewer(self, outputs: Dict[str, Any]): depth = ( outputs["gl_z_buf_depth"].cpu().numpy() * self.viser_scale_ratio if "gl_z_buf_depth" in outputs else None ) - self.viewer.viser_server.set_background_image( + + self.client.set_background_image( selected_output.cpu().numpy(), format=self.viewer.config.image_format, jpeg_quality=self.viewer.config.jpeg_quality, diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py index cc352d553f..41b142aeaa 100644 --- a/nerfstudio/viewer_beta/viewer.py +++ b/nerfstudio/viewer_beta/viewer.py @@ -103,8 +103,8 @@ def __init__( self.step = 0 self.train_btn_state: Literal["training", "paused", "completed"] = "training" self._prev_train_state: Literal["training", "paused", "completed"] = "training" + self.last_move_time = 0 - self.client: Optional[viser.ClientHandle] = None self.viser_server = viser.ViserServer(host=config.websocket_host, port=websocket_port, share=share) buttons = ( viser.theme.TitlebarButton( @@ -137,7 +137,8 @@ def __init__( brand_color=(255, 211, 105), ) - self.render_statemachine = RenderStateMachine(self, VISER_NERFSTUDIO_SCALE_RATIO) + self.render_statemachines: Dict[int, RenderStateMachine] = {} + self.viser_server.on_client_disconnect(self.handle_disconnect) self.viser_server.on_client_connect(self.handle_new_client) tabs = self.viser_server.add_gui_tab_group() @@ -202,24 +203,30 @@ def nested_folder_install(folder_labels: List[str], prev_labels: List[str], elem ] for c in self.viewer_controls: c._setup(self) - self.render_statemachine.start() + + def get_camera_state(self, client: viser.ClientHandle) -> CameraState: + R = vtf.SO3(wxyz=client.camera.wxyz) + R = R @ vtf.SO3.from_x_radians(np.pi) + R = torch.tensor(R.as_matrix()) + pos = torch.tensor(client.camera.position, dtype=torch.float64) / VISER_NERFSTUDIO_SCALE_RATIO + c2w = torch.concatenate([R, pos[:, None]], dim=1) + camera_state = CameraState(fov=client.camera.fov, aspect=client.camera.aspect, c2w=c2w) + return camera_state + + def handle_disconnect(self, client: viser.ClientHandle) -> None: + self.render_statemachines[client.client_id].running = False + self.render_statemachines.pop(client.client_id) def handle_new_client(self, client: viser.ClientHandle) -> None: - self.client = client - self.last_move_time = 0 + self.render_statemachines[client.client_id] = RenderStateMachine(self, VISER_NERFSTUDIO_SCALE_RATIO, client) + self.render_statemachines[client.client_id].start() @client.camera.on_update def _(cam: viser.CameraHandle) -> None: - assert self.client is not None - with client.atomic(): - self.last_move_time = time.time() - R = vtf.SO3(wxyz=self.client.camera.wxyz) - R = R @ vtf.SO3.from_x_radians(np.pi) - R = torch.tensor(R.as_matrix()) - pos = torch.tensor(self.client.camera.position, dtype=torch.float64) / VISER_NERFSTUDIO_SCALE_RATIO - c2w = torch.concatenate([R, pos[:, None]], dim=1) - self.camera_state = CameraState(fov=self.client.camera.fov, aspect=self.client.camera.aspect, c2w=c2w) - self.render_statemachine.action(RenderAction("move", self.camera_state)) + self.last_move_time = time.time() + with self.viser_server.atomic(): + camera_state = self.get_camera_state(client) + self.render_statemachines[client.client_id].action(RenderAction("move", camera_state)) def set_camera_visibility(self, visible: bool) -> None: """Toggle the visibility of the training cameras.""" @@ -250,8 +257,11 @@ def update_camera_poses(self): def _interrupt_render(self, _) -> None: """Interrupt current render.""" - if self.camera_state is not None: - self.render_statemachine.action(RenderAction("rerender", self.camera_state)) + clients = self.viser_server.get_clients() + for id in clients: + camera_state = self.get_camera_state(clients[id]) + if camera_state is not None: + self.render_statemachines[id].action(RenderAction("rerender", camera_state)) def _toggle_training_state(self, _) -> None: """Toggle the trainer's training state.""" @@ -263,8 +273,11 @@ def _toggle_training_state(self, _) -> None: def _crop_params_update(self, _) -> None: """Update crop parameters""" - if self.camera_state is not None: - self.render_statemachine.action(RenderAction("move", self.camera_state)) + clients = self.viser_server.get_clients() + for id in clients: + camera_state = self.get_camera_state(clients[id]) + if camera_state is not None: + self.render_statemachines[id].action(RenderAction("move", camera_state)) def _output_type_change(self, _): self.output_type_changed = True @@ -327,10 +340,9 @@ def init_scene( @camera_handle.on_click def _(event: viser.SceneNodePointerEvent[viser.CameraFrustumHandle]) -> None: - assert self.client is not None - with self.client.atomic(): - self.client.camera.position = event.target.position - self.client.camera.wxyz = event.target.wxyz + with event.client.atomic(): + event.client.camera.position = event.target.position + event.client.camera.wxyz = event.target.wxyz self.camera_handles[idx] = camera_handle self.original_c2w[idx] = c2w @@ -347,7 +359,7 @@ def update_scene(self, step: int, num_rays_per_batch: Optional[int] = None) -> N """ self.step = step - if self.camera_state is None: + if len(self.render_statemachines) == 0: return # this stops training while moving to make the response smoother while time.time() - self.last_move_time < 0.1: @@ -371,7 +383,11 @@ def update_scene(self, step: int, num_rays_per_batch: Optional[int] = None) -> N render_freq = 30 if step > self.last_step + render_freq: self.last_step = step - self.render_statemachine.action(RenderAction("step", self.camera_state)) + clients = self.viser_server.get_clients() + for id in clients: + camera_state = self.get_camera_state(clients[id]) + if camera_state is not None: + self.render_statemachines[id].action(RenderAction("step", camera_state)) self.update_camera_poses() self.control_panel.update_step(step) diff --git a/nerfstudio/viewer_beta/viewer_elements.py b/nerfstudio/viewer_beta/viewer_elements.py index 916566d5d9..82a8b3d447 100644 --- a/nerfstudio/viewer_beta/viewer_elements.py +++ b/nerfstudio/viewer_beta/viewer_elements.py @@ -117,7 +117,7 @@ def set_crop(self, min_point: Tuple[float, float, float], max_point: Tuple[float """ raise NotImplementedError() - def get_camera(self, img_height: int, img_width: int) -> Optional[Cameras]: + def get_camera(self, img_height: int, img_width: int, client_id: Optional[int] = None) -> Optional[Cameras]: """ Returns the Cameras object representing the current camera for the viewer, or None if the viewer is not connected yet @@ -126,16 +126,21 @@ def get_camera(self, img_height: int, img_width: int) -> Optional[Cameras]: img_height: The height of the image to get camera intrinsics for img_width: The width of the image to get camera intrinsics for """ - assert self.viewer.client is not None + clients = self.viser_server.get_clients() + if len(clients) == 0: + return None + if not client_id: + client_id = list(clients.keys())[0] from nerfstudio.viewer_beta.viewer import VISER_NERFSTUDIO_SCALE_RATIO - R = vtf.SO3(wxyz=self.viewer.client.camera.wxyz) + client = clients[client_id] + R = vtf.SO3(wxyz=client.camera.wxyz) R = R @ vtf.SO3.from_x_radians(np.pi) R = torch.tensor(R.as_matrix()) - pos = torch.tensor(self.viewer.client.camera.position, dtype=torch.float64) / VISER_NERFSTUDIO_SCALE_RATIO + pos = torch.tensor(client.camera.position, dtype=torch.float64) / VISER_NERFSTUDIO_SCALE_RATIO c2w = torch.concatenate([R, pos[:, None]], dim=1) - camera_state = CameraState(fov=self.viewer.client.camera.fov, aspect=self.viewer.client.camera.aspect, c2w=c2w) + camera_state = CameraState(fov=client.camera.fov, aspect=client.camera.aspect, c2w=c2w) return get_camera(camera_state, img_height, img_width) def register_click_cb(self, cb: Callable): From f31f3bba12841955102f3f3846ee9f855f4a6878 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yueh-Cheng=20Liu=20=E5=8A=89=E5=B2=B3=E6=89=BF?= Date: Thu, 19 Oct 2023 20:19:11 +0200 Subject: [PATCH 048/101] Add scannetpp dataparser (#2498) * Add scannetpp dataparser * Fix formatting * Remove unused import * Add links and descriptions * Update input file structure and remove unused code * Reformat with black --------- Co-authored-by: Matthew Tancik --- nerfstudio/configs/dataparser_configs.py | 2 + .../data/dataparsers/scannetpp_dataparser.py | 207 ++++++++++++++++++ 2 files changed, 209 insertions(+) create mode 100644 nerfstudio/data/dataparsers/scannetpp_dataparser.py diff --git a/nerfstudio/configs/dataparser_configs.py b/nerfstudio/configs/dataparser_configs.py index f60092a46d..e3fa66ec5c 100644 --- a/nerfstudio/configs/dataparser_configs.py +++ b/nerfstudio/configs/dataparser_configs.py @@ -35,6 +35,7 @@ from nerfstudio.data.dataparsers.scannet_dataparser import ScanNetDataParserConfig from nerfstudio.data.dataparsers.sdfstudio_dataparser import SDFStudioDataParserConfig from nerfstudio.data.dataparsers.sitcoms3d_dataparser import Sitcoms3DDataParserConfig +from nerfstudio.data.dataparsers.scannetpp_dataparser import ScanNetppDataParserConfig from nerfstudio.plugins.registry_dataparser import discover_dataparsers dataparsers = { @@ -51,6 +52,7 @@ "sdfstudio-data": SDFStudioDataParserConfig(), "nerfosr-data": NeRFOSRDataParserConfig(), "sitcoms3d-data": Sitcoms3DDataParserConfig(), + "scannetpp-data": ScanNetppDataParserConfig(), "colmap": ColmapDataParserConfig(), } diff --git a/nerfstudio/data/dataparsers/scannetpp_dataparser.py b/nerfstudio/data/dataparsers/scannetpp_dataparser.py new file mode 100644 index 0000000000..bbd260730f --- /dev/null +++ b/nerfstudio/data/dataparsers/scannetpp_dataparser.py @@ -0,0 +1,207 @@ +# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Data parser for ScanNet++ datasets. """ + +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Literal, Type + +import numpy as np +import torch +from nerfstudio.cameras import camera_utils +from nerfstudio.cameras.cameras import CAMERA_MODEL_TO_TYPE, Cameras, CameraType +from nerfstudio.data.dataparsers.base_dataparser import DataParser, DataParserConfig, DataparserOutputs +from nerfstudio.data.scene_box import SceneBox +from nerfstudio.utils.io import load_from_json +from nerfstudio.utils.rich_utils import CONSOLE + + +@dataclass +class ScanNetppDataParserConfig(DataParserConfig): + """ScanNet++ dataset config. + ScanNet++ dataset (https://kaldir.vc.in.tum.de/scannetpp/) is a real-world 3D indoor dataset for semantics understanding and novel view synthesis. + This dataparser follow the file structure of the dataset. + Expected structure of the directory: + + .. code-block:: text + + root/ + β”œβ”€β”€ SCENE_ID0 + β”œβ”€β”€ dslr + β”œβ”€β”€ resized_images + β”œβ”€β”€ resized_anon_masks + β”œβ”€β”€ nerfstudio/transforms.json + β”œβ”€β”€ SCENE_ID1/ + ... + """ + + _target: Type = field(default_factory=lambda: ScanNetpp) + """target class to instantiate""" + data: Path = Path("scannetpp/410c470782") + """Directory to the root of the data.""" + scale_factor: float = 1.0 + """How much to scale the camera origins by.""" + scene_scale: float = 1.5 + """How much to scale the region of interest by. Default is 1.5 since the cameras are inside the rooms.""" + orientation_method: Literal["pca", "up", "vertical", "none"] = "up" + """The method to use for orientation.""" + center_method: Literal["poses", "focus", "none"] = "poses" + """The method to use to center the poses.""" + auto_scale_poses: bool = True + """Whether to automatically scale the poses to fit in +/- 1 bounding box.""" + images_dir: Path = Path("dslr/resized_images") + """Relative path to the images directory (default: resized_images)""" + masks_dir: Path = Path("dslr/resized_anon_masks") + """Relative path to the masks directory (default: resized_anon_masks)""" + transforms_path: Path = Path("dslr/nerfstudio/transforms.json") + """Relative path to the transforms.json file""" + + +@dataclass +class ScanNetpp(DataParser): + """ScanNet++ DatasetParser""" + + config: ScanNetppDataParserConfig + + def _generate_dataparser_outputs(self, split="train"): + assert self.config.data.exists(), f"Data directory {self.config.data} does not exist." + meta = load_from_json(self.config.data / self.config.transforms_path) + data_dir = self.config.data / self.config.images_dir + mask_dir = self.config.data / self.config.masks_dir + + image_filenames = [] + mask_filenames = [] + poses = [] + i_train = [] + i_eval = [] + # sort the frames by fname + frames = meta["frames"] + meta["test_frames"] + test_frames = [f["file_path"] for f in meta["test_frames"]] + frames.sort(key=lambda x: x["file_path"]) + + for idx, frame in enumerate(frames): + filepath = Path(frame["file_path"]) + fname = data_dir / filepath + + image_filenames.append(fname) + poses.append(np.array(frame["transform_matrix"])) + if meta.get("has_mask", True) and "mask_path" in frame: + mask_filepath = Path(frame["mask_path"]) + mask_fname = mask_dir / mask_filepath + mask_filenames.append(mask_fname) + + if frame["file_path"] in test_frames: + i_eval.append(idx) + else: + i_train.append(idx) + + assert len(mask_filenames) == 0 or ( + len(mask_filenames) == len(image_filenames) + ), """ + Different number of image and mask filenames. + You should check that mask_path is specified for every frame (or zero frames) in transforms.json. + """ + + if split == "train": + indices = i_train + elif split in ["val", "test"]: + indices = i_eval + else: + raise ValueError(f"Unknown dataparser split {split}") + + if "orientation_override" in meta: + orientation_method = meta["orientation_override"] + CONSOLE.log(f"[yellow] Dataset is overriding orientation method to {orientation_method}") + else: + orientation_method = self.config.orientation_method + + poses = torch.from_numpy(np.array(poses).astype(np.float32)) + poses, transform_matrix = camera_utils.auto_orient_and_center_poses( + poses, + method=orientation_method, + center_method=self.config.center_method, + ) + + # Scale poses + scale_factor = 1.0 + if self.config.auto_scale_poses: + scale_factor /= float(torch.max(torch.abs(poses[:, :3, 3]))) + scale_factor *= self.config.scale_factor + + poses[:, :3, 3] *= scale_factor + + # Choose image_filenames and poses based on split, but after auto orient and scaling the poses. + image_filenames = [image_filenames[i] for i in indices] + mask_filenames = [mask_filenames[i] for i in indices] if len(mask_filenames) > 0 else [] + + idx_tensor = torch.tensor(indices, dtype=torch.long) + poses = poses[idx_tensor] + + # in x,y,z order + # assumes that the scene is centered at the origin + if not self.config.auto_scale_poses: + # Set aabb_scale to scene_scale * the max of the absolute values of the poses + aabb_scale = self.config.scene_scale * float(torch.max(torch.abs(poses[:, :3, 3]))) + else: + aabb_scale = self.config.scene_scale + scene_box = SceneBox( + aabb=torch.tensor( + [[-aabb_scale, -aabb_scale, -aabb_scale], [aabb_scale, aabb_scale, aabb_scale]], dtype=torch.float32 + ) + ) + + if "camera_model" in meta: + camera_type = CAMERA_MODEL_TO_TYPE[meta["camera_model"]] + else: + camera_type = CameraType.PERSPECTIVE + + fx = float(meta["fl_x"]) + fy = float(meta["fl_y"]) + cx = float(meta["cx"]) + cy = float(meta["cy"]) + height = int(meta["h"]) + width = int(meta["w"]) + distortion_params = camera_utils.get_distortion_params( + k1=float(meta["k1"]) if "k1" in meta else 0.0, + k2=float(meta["k2"]) if "k2" in meta else 0.0, + k3=float(meta["k3"]) if "k3" in meta else 0.0, + k4=float(meta["k4"]) if "k4" in meta else 0.0, + p1=float(meta["p1"]) if "p1" in meta else 0.0, + p2=float(meta["p2"]) if "p2" in meta else 0.0, + ) + + cameras = Cameras( + fx=fx, + fy=fy, + cx=cx, + cy=cy, + distortion_params=distortion_params, + height=height, + width=width, + camera_to_worlds=poses[:, :3, :4], + camera_type=camera_type, + ) + + dataparser_outputs = DataparserOutputs( + image_filenames=image_filenames, + cameras=cameras, + scene_box=scene_box, + mask_filenames=mask_filenames if len(mask_filenames) > 0 else None, + dataparser_scale=scale_factor, + dataparser_transform=transform_matrix, + metadata={}, + ) + return dataparser_outputs From e2060793705f8a6f2b0533dd844991050c6c1760 Mon Sep 17 00:00:00 2001 From: Shiva Gantha Date: Fri, 20 Oct 2023 00:58:27 -0400 Subject: [PATCH 049/101] minor typing error (#2545) --- docs/developer_guides/pipelines/datamanagers.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/developer_guides/pipelines/datamanagers.md b/docs/developer_guides/pipelines/datamanagers.md index 78aec242da..84722ba78b 100644 --- a/docs/developer_guides/pipelines/datamanagers.md +++ b/docs/developer_guides/pipelines/datamanagers.md @@ -66,7 +66,7 @@ class VanillaDataManagerConfig(InstantiateConfig): """specifies the camera pose optimizer used during training""" ``` -Let's take a quick look at how the `run_train` method is implemented. Here we sample images, then pixels, and then return the RayBundle and RayGT information. +Let's take a quick look at how the `next_train` method is implemented. Here we sample images, then pixels, and then return the RayBundle and RayGT information. ```python def next_train(self, step: int) -> Tuple[RayBundle, Dict]: From 9e7144620bdfb92e40154288e09f288e52c4c814 Mon Sep 17 00:00:00 2001 From: YangXiuyu Date: Fri, 20 Oct 2023 23:07:12 +0800 Subject: [PATCH 050/101] Missing a space in base_datamanager.py (#2546) Update base_datamanager.py --- nerfstudio/data/datamanagers/base_datamanager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py index b4cbeaa12d..9be95514c1 100644 --- a/nerfstudio/data/datamanagers/base_datamanager.py +++ b/nerfstudio/data/datamanagers/base_datamanager.py @@ -342,7 +342,7 @@ class VanillaDataManagerConfig(DataManagerConfig): along with relevant information about camera intrinsics """ patch_size: int = 1 - """Size of patch to sample from. If >1, patch-based sampling will be used.""" + """Size of patch to sample from. If > 1, patch-based sampling will be used.""" camera_optimizer: Optional[CameraOptimizerConfig] = field(default=None) """Deprecated, has been moved to the model config.""" pixel_sampler: PixelSamplerConfig = PixelSamplerConfig() From 23cc960bfc1b2c24925067d6fd6f96daaba4e151 Mon Sep 17 00:00:00 2001 From: Tobit Flatscher <53856473+2b-t@users.noreply.github.com> Date: Fri, 20 Oct 2023 19:42:42 +0100 Subject: [PATCH 051/101] Fix pixel-perfect-sfm installation in Dockerfile (#2549) fix: Fix pixel-perfect-sfm installation in Dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index f333312e37..300d00df3a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -149,9 +149,9 @@ RUN git clone --branch v1.0 --recursive https://github.com/cvg/pyceres.git && \ # Install pixel perfect sfm. RUN git clone --recursive https://github.com/cvg/pixel-perfect-sfm.git && \ + cd pixel-perfect-sfm && \ git reset --hard 40f7c1339328b2a0c7cf71f76623fb848e0c0357 && \ git clean -df && \ - cd pixel-perfect-sfm && \ python3.10 -m pip install -e . && \ cd .. From bf32fc1e69605cdf7715dd0c1c958c4abfcd59f7 Mon Sep 17 00:00:00 2001 From: Justin Kerr Date: Fri, 20 Oct 2023 14:45:54 -0700 Subject: [PATCH 052/101] static imgs never rendered in multiclient (#2551) * fix jittering in markdown in viewer beta * Revert "fix jittering in markdown in viewer beta" This reverts commit 70ade42e46f3dcf0e89e4efc445650f6d6525673. * static rendering never triggered --- nerfstudio/viewer_beta/render_state_machine.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nerfstudio/viewer_beta/render_state_machine.py b/nerfstudio/viewer_beta/render_state_machine.py index ce4cbb1322..5f9ec1f977 100644 --- a/nerfstudio/viewer_beta/render_state_machine.py +++ b/nerfstudio/viewer_beta/render_state_machine.py @@ -172,8 +172,7 @@ def run(self): while self.running: if not self.render_trigger.wait(0.2): # if we haven't received a trigger in a while, send a static action - if self.viewer.camera_state is not None: - self.action(RenderAction(action="static", camera_state=self.viewer.camera_state)) + self.action(RenderAction(action="static", camera_state=self.viewer.get_camera_state(self.client))) action = self.next_action self.render_trigger.clear() if action is None: From f362eae60832c1f41d316b19d636cb3bfd34f7a1 Mon Sep 17 00:00:00 2001 From: Daniel Crispell Date: Fri, 20 Oct 2023 22:41:19 -0400 Subject: [PATCH 053/101] Move only per-chunk data to the GPU to support rendering large images (#2481) * Move only per-chunk data to the GPU to support large images * make type checking happy * Use isinstance check torch.is_tensor() doesn't have a TypeGuard annotation --------- Co-authored-by: Brent Yi --- nerfstudio/models/base_model.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/nerfstudio/models/base_model.py b/nerfstudio/models/base_model.py index ebda710b31..d7a2217443 100644 --- a/nerfstudio/models/base_model.py +++ b/nerfstudio/models/base_model.py @@ -168,6 +168,7 @@ def get_outputs_for_camera_ray_bundle(self, camera_ray_bundle: RayBundle) -> Dic Args: camera_ray_bundle: ray bundle to calculate outputs over """ + input_device = camera_ray_bundle.directions.device num_rays_per_chunk = self.config.eval_num_rays_per_chunk image_height, image_width = camera_ray_bundle.origins.shape[:2] num_rays = len(camera_ray_bundle) @@ -176,12 +177,15 @@ def get_outputs_for_camera_ray_bundle(self, camera_ray_bundle: RayBundle) -> Dic start_idx = i end_idx = i + num_rays_per_chunk ray_bundle = camera_ray_bundle.get_row_major_sliced_ray_bundle(start_idx, end_idx) + # move the chunk inputs to the model device + ray_bundle = ray_bundle.to(self.device) outputs = self.forward(ray_bundle=ray_bundle) for output_name, output in outputs.items(): # type: ignore - if not torch.is_tensor(output): + if not isinstance(output, torch.Tensor): # TODO: handle lists of tensors as well continue - outputs_lists[output_name].append(output) + # move the chunk outputs from the model device back to the device of the inputs. + outputs_lists[output_name].append(output.to(input_device)) outputs = {} for output_name, outputs_list in outputs_lists.items(): outputs[output_name] = torch.cat(outputs_list).view(image_height, image_width, -1) # type: ignore From e8cee3fdadda5068309cb14bdf04aced082cb091 Mon Sep 17 00:00:00 2001 From: machenmusik Date: Sat, 21 Oct 2023 00:08:51 -0400 Subject: [PATCH 054/101] make refine intrinsics during ns-process-data optional (#2550) ns-process-data will still refine intrinsics by default for compatibility. Specify --no-refine-intrinsics to turn off. --- .../colmap_converter_to_nerfstudio_dataset.py | 4 ++++ nerfstudio/process_data/colmap_utils.py | 22 +++++++++++-------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py b/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py index 7ab711b46e..b4dccbdd82 100644 --- a/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py +++ b/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py @@ -41,6 +41,9 @@ class ColmapConverterToNerfstudioDataset(BaseConverterToNerfstudioDataset): refine_pixsfm: bool = False """If True, runs refinement using Pixel Perfect SFM. Only works with hloc sfm_tool""" + refine_intrinsics: bool = True + """If True, do bundle adjustment to refine intrinsics. + Only works with colmap sfm_tool""" feature_type: Literal[ "any", "sift", @@ -204,6 +207,7 @@ def _run_colmap(self, mask_path: Optional[Path] = None): gpu=self.gpu, verbose=self.verbose, matching_method=self.matching_method, + refine_intrinsics=self.refine_intrinsics, colmap_cmd=self.colmap_cmd, ) elif sfm_tool == "hloc": diff --git a/nerfstudio/process_data/colmap_utils.py b/nerfstudio/process_data/colmap_utils.py index 2322075b52..86f3a15241 100644 --- a/nerfstudio/process_data/colmap_utils.py +++ b/nerfstudio/process_data/colmap_utils.py @@ -93,6 +93,7 @@ def run_colmap( gpu: bool = True, verbose: bool = False, matching_method: Literal["vocab_tree", "exhaustive", "sequential"] = "vocab_tree", + refine_intrinsics: bool = True, colmap_cmd: str = "colmap", ) -> None: """Runs COLMAP on the images. @@ -105,6 +106,7 @@ def run_colmap( gpu: If True, use GPU. verbose: If True, logs the output of the command. matching_method: Matching method to use. + refine_intrinsics: If True, refine intrinsics. colmap_cmd: Path to the COLMAP executable. """ @@ -165,15 +167,17 @@ def run_colmap( ): run_command(mapper_cmd, verbose=verbose) CONSOLE.log("[bold green]:tada: Done COLMAP bundle adjustment.") - with status(msg="[bold yellow]Refine intrinsics...", spinner="dqpb", verbose=verbose): - bundle_adjuster_cmd = [ - f"{colmap_cmd} bundle_adjuster", - f"--input_path {sparse_dir}/0", - f"--output_path {sparse_dir}/0", - "--BundleAdjustment.refine_principal_point 1", - ] - run_command(" ".join(bundle_adjuster_cmd), verbose=verbose) - CONSOLE.log("[bold green]:tada: Done refining intrinsics.") + + if refine_intrinsics: + with status(msg="[bold yellow]Refine intrinsics...", spinner="dqpb", verbose=verbose): + bundle_adjuster_cmd = [ + f"{colmap_cmd} bundle_adjuster", + f"--input_path {sparse_dir}/0", + f"--output_path {sparse_dir}/0", + "--BundleAdjustment.refine_principal_point 1", + ] + run_command(" ".join(bundle_adjuster_cmd), verbose=verbose) + CONSOLE.log("[bold green]:tada: Done refining intrinsics.") def parse_colmap_camera_params(camera) -> Dict[str, Any]: From dff216d4b8c941f1db9dc22afec4f29d74a1ecc1 Mon Sep 17 00:00:00 2001 From: Cyrus Vachha Date: Mon, 23 Oct 2023 01:05:19 -0700 Subject: [PATCH 055/101] Few viewer changes (#2553) * Viewer changes for render panel for fps and resolution Small changes for the new viewer including changing the FPS slider to a text entry and fixing the incorrect resolution text box description. * Check if render_time is nonzero in render state machine Added a check if render_time is non zero in the render_state_machine file in the new viewer due to a divide by zero error occasionally causing the viewer to stop running. I noticed this usually happened when using nerfacto-huge. --- nerfstudio/viewer_beta/render_panel.py | 20 +++++++++---------- .../viewer_beta/render_state_machine.py | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/nerfstudio/viewer_beta/render_panel.py b/nerfstudio/viewer_beta/render_panel.py index 4543009942..53966fe74e 100644 --- a/nerfstudio/viewer_beta/render_panel.py +++ b/nerfstudio/viewer_beta/render_panel.py @@ -289,7 +289,7 @@ def _(_) -> None: min=(50, 50), max=(10_000, 10_000), step=1, - hint="Tension parameter for adjusting smoothness of spline interpolation.", + hint="Render output resolution in pixels.", ) @resolution.on_update @@ -428,12 +428,12 @@ def _(_) -> None: playback_folder = server.add_gui_folder("Playback") with playback_folder: duration_number = server.add_gui_number("Duration (sec)", min=0.0, max=1e8, step=0.0001, initial_value=4.0) - framerate_slider = server.add_gui_slider("FPS", min=1.0, max=240.0, step=1e-8, initial_value=30.0) + framerate_number = server.add_gui_number("Frame rate (FPS)", min=0.1, max=240.0, step=1e-8, initial_value=30.0) framerate_buttons = server.add_gui_button_group("", ("24", "30", "60")) @framerate_buttons.on_click def _(_) -> None: - framerate_slider.value = float(framerate_buttons.value) + framerate_number.value = float(framerate_buttons.value) play_button = server.add_gui_button("Play", icon=viser.Icon.PLAYER_PLAY) pause_button = server.add_gui_button("Pause", icon=viser.Icon.PLAYER_PAUSE, visible=False) @@ -451,7 +451,7 @@ def _(_: viser.GuiEvent) -> None: def add_preview_frame_slider() -> Optional[viser.GuiInputHandle[int]]: """Helper for creating the current frame # slider. This is removed and re-added anytime the `max` value changes.""" - max_frame_index = int(framerate_slider.value * duration_number.value) - 1 + max_frame_index = int(framerate_number.value * duration_number.value) - 1 if max_frame_index <= 0: return None @@ -468,7 +468,7 @@ def add_preview_frame_slider() -> Optional[viser.GuiInputHandle[int]]: @preview_frame_slider.on_update def _(_) -> None: - max_frame_index = int(framerate_slider.value * duration_number.value) - 1 + max_frame_index = int(framerate_number.value * duration_number.value) - 1 maybe_pose_and_fov = camera_path.interpolate_pose_and_fov( preview_frame_slider.value / max_frame_index if max_frame_index > 0 else 0 ) @@ -507,7 +507,7 @@ def _(_) -> None: preview_frame_slider = add_preview_frame_slider() @duration_number.on_update - @framerate_slider.on_update + @framerate_number.on_update def _(_) -> None: nonlocal preview_frame_slider old = preview_frame_slider @@ -527,11 +527,11 @@ def _(_) -> None: def play() -> None: while not play_button.visible: - max_frame = int(framerate_slider.value * duration_number.value) + max_frame = int(framerate_number.value * duration_number.value) if max_frame > 0: assert preview_frame_slider is not None preview_frame_slider.value = (preview_frame_slider.value + 1) % max_frame - time.sleep(1.0 / framerate_slider.value) + time.sleep(1.0 / framerate_number.value) threading.Thread(target=play).start() @@ -556,7 +556,7 @@ def _(_) -> None: @render_button.on_click def _(event: viser.GuiEvent) -> None: assert event.client is not None - num_frames = int(framerate_slider.value * duration_number.value) + num_frames = int(framerate_number.value * duration_number.value) json_data = {} # json data has the properties: # keyframes: list of keyframes with @@ -594,7 +594,7 @@ def _(event: viser.GuiEvent) -> None: json_data["camera_type"] = camera_type.value.lower() json_data["render_height"] = resolution.value[1] json_data["render_width"] = resolution.value[0] - json_data["fps"] = framerate_slider.value + json_data["fps"] = framerate_number.value json_data["seconds"] = duration_number.value json_data["is_cycle"] = loop.value json_data["smoothness_value"] = smoothness.value diff --git a/nerfstudio/viewer_beta/render_state_machine.py b/nerfstudio/viewer_beta/render_state_machine.py index 5f9ec1f977..46ceeec39c 100644 --- a/nerfstudio/viewer_beta/render_state_machine.py +++ b/nerfstudio/viewer_beta/render_state_machine.py @@ -161,7 +161,7 @@ def _render_img(self, camera_state: CameraState): pts = (R @ (pts.view(-1, 3).T)).T.view(*camera_ray_bundle.directions.shape) outputs["gl_z_buf_depth"] = -pts[..., 2:3] # negative z axis is the coordinate convention render_time = vis_t.duration - if writer.is_initialized(): + if writer.is_initialized() and render_time != 0: writer.put_time( name=EventName.VIS_RAYS_PER_SEC, duration=num_rays / render_time, step=step, avg_over_steps=True ) From 8a44e46179c465bcaaf28066cbb55777073e3224 Mon Sep 17 00:00:00 2001 From: machenmusik Date: Wed, 25 Oct 2023 19:08:16 -0400 Subject: [PATCH 056/101] ns-process-data images fix: multinerf uses equals sign (#2558) multinerf uses equals sign --- nerfstudio/process_data/colmap_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nerfstudio/process_data/colmap_utils.py b/nerfstudio/process_data/colmap_utils.py index 86f3a15241..83188c005b 100644 --- a/nerfstudio/process_data/colmap_utils.py +++ b/nerfstudio/process_data/colmap_utils.py @@ -156,7 +156,7 @@ def run_colmap( f"--output_path {sparse_dir}", ] if colmap_version >= 3.7: - mapper_cmd.append("--Mapper.ba_global_function_tolerance 1e-6") + mapper_cmd.append("--Mapper.ba_global_function_tolerance=1e-6") mapper_cmd = " ".join(mapper_cmd) From e4a0050d0d61022530204e1644998afd0f77fb2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=83=95=E3=83=A9=E3=83=8F=E3=83=86=E3=82=A3=E3=80=80?= =?UTF-8?q?=E4=BB=81?= Date: Fri, 27 Oct 2023 07:15:58 +0900 Subject: [PATCH 057/101] Add gradient scaling option to more methods (#2555) add gradient scaling to instant-ngp, mipnerf, tensorf, vanilla-nerf --- nerfstudio/models/instant_ngp.py | 6 +++++- nerfstudio/models/mipnerf.py | 6 +++++- nerfstudio/models/tensorf.py | 6 +++++- nerfstudio/models/vanilla_nerf.py | 8 +++++++- 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/nerfstudio/models/instant_ngp.py b/nerfstudio/models/instant_ngp.py index c67d45db57..48cf1a95ab 100644 --- a/nerfstudio/models/instant_ngp.py +++ b/nerfstudio/models/instant_ngp.py @@ -37,7 +37,7 @@ from nerfstudio.field_components.field_heads import FieldHeadNames from nerfstudio.field_components.spatial_distortions import SceneContraction from nerfstudio.fields.nerfacto_field import NerfactoField -from nerfstudio.model_components.losses import MSELoss +from nerfstudio.model_components.losses import MSELoss, scale_gradients_by_distance_squared from nerfstudio.model_components.ray_samplers import VolumetricSampler from nerfstudio.model_components.renderers import ( AccumulationRenderer, @@ -78,6 +78,8 @@ class InstantNGPModelConfig(ModelConfig): """How far along ray to start sampling.""" far_plane: float = 1e3 """How far along ray to stop sampling.""" + use_gradient_scaling: bool = False + """Use gradient scaler where the gradients are lower for points closer to the camera.""" use_appearance_embedding: bool = False """Whether to use an appearance embedding.""" background_color: Literal["random", "black", "white"] = "random" @@ -187,6 +189,8 @@ def get_outputs(self, ray_bundle: RayBundle): ) field_outputs = self.field(ray_samples) + if self.config.use_gradient_scaling: + field_outputs = scale_gradients_by_distance_squared(field_outputs, ray_samples) # accumulation packed_info = nerfacc.pack_info(ray_indices, num_rays) diff --git a/nerfstudio/models/mipnerf.py b/nerfstudio/models/mipnerf.py index 67ca632514..cf48b08a75 100644 --- a/nerfstudio/models/mipnerf.py +++ b/nerfstudio/models/mipnerf.py @@ -29,7 +29,7 @@ from nerfstudio.field_components.encodings import NeRFEncoding from nerfstudio.field_components.field_heads import FieldHeadNames from nerfstudio.fields.vanilla_nerf_field import NeRFField -from nerfstudio.model_components.losses import MSELoss +from nerfstudio.model_components.losses import MSELoss, scale_gradients_by_distance_squared from nerfstudio.model_components.ray_samplers import PDFSampler, UniformSampler from nerfstudio.model_components.renderers import ( AccumulationRenderer, @@ -109,6 +109,8 @@ def get_outputs(self, ray_bundle: RayBundle): # First pass: field_outputs_coarse = self.field.forward(ray_samples_uniform) + if self.config.use_gradient_scaling: + field_outputs_coarse = scale_gradients_by_distance_squared(field_outputs_coarse, ray_samples_uniform) weights_coarse = ray_samples_uniform.get_weights(field_outputs_coarse[FieldHeadNames.DENSITY]) rgb_coarse = self.renderer_rgb( rgb=field_outputs_coarse[FieldHeadNames.RGB], @@ -122,6 +124,8 @@ def get_outputs(self, ray_bundle: RayBundle): # Second pass: field_outputs_fine = self.field.forward(ray_samples_pdf) + if self.config.use_gradient_scaling: + field_outputs_fine = scale_gradients_by_distance_squared(field_outputs_fine, ray_samples_pdf) weights_fine = ray_samples_pdf.get_weights(field_outputs_fine[FieldHeadNames.DENSITY]) rgb_fine = self.renderer_rgb( rgb=field_outputs_fine[FieldHeadNames.RGB], diff --git a/nerfstudio/models/tensorf.py b/nerfstudio/models/tensorf.py index 9cc9c7ccf4..53d986337e 100644 --- a/nerfstudio/models/tensorf.py +++ b/nerfstudio/models/tensorf.py @@ -43,7 +43,7 @@ ) from nerfstudio.field_components.field_heads import FieldHeadNames from nerfstudio.fields.tensorf_field import TensoRFField -from nerfstudio.model_components.losses import MSELoss, tv_loss +from nerfstudio.model_components.losses import MSELoss, tv_loss, scale_gradients_by_distance_squared from nerfstudio.model_components.ray_samplers import PDFSampler, UniformSampler from nerfstudio.model_components.renderers import ( AccumulationRenderer, @@ -92,6 +92,8 @@ class TensoRFModelConfig(ModelConfig): """Regularization method used in tensorf paper""" camera_optimizer: CameraOptimizerConfig = CameraOptimizerConfig(mode="SO3xR3") """Config of the camera optimizer to use""" + use_gradient_scaling: bool = False + """Use gradient scaler where the gradients are lower for points closer to the camera.""" background_color: Literal["random", "last_sample", "black", "white"] = "white" """Whether to randomize the background color.""" @@ -296,6 +298,8 @@ def get_outputs(self, ray_bundle: RayBundle): field_outputs_fine = self.field.forward( ray_samples_pdf, mask=acc_mask, bg_color=colors.WHITE.to(weights.device) ) + if self.config.use_gradient_scaling: + field_outputs_fine = scale_gradients_by_distance_squared(field_outputs_fine, ray_samples_pdf) weights_fine = ray_samples_pdf.get_weights(field_outputs_fine[FieldHeadNames.DENSITY]) diff --git a/nerfstudio/models/vanilla_nerf.py b/nerfstudio/models/vanilla_nerf.py index e95c8dfa49..4f4dbd8f61 100644 --- a/nerfstudio/models/vanilla_nerf.py +++ b/nerfstudio/models/vanilla_nerf.py @@ -33,7 +33,7 @@ from nerfstudio.field_components.field_heads import FieldHeadNames from nerfstudio.field_components.temporal_distortions import TemporalDistortionKind from nerfstudio.fields.vanilla_nerf_field import NeRFField -from nerfstudio.model_components.losses import MSELoss +from nerfstudio.model_components.losses import MSELoss, scale_gradients_by_distance_squared from nerfstudio.model_components.ray_samplers import PDFSampler, UniformSampler from nerfstudio.model_components.renderers import ( AccumulationRenderer, @@ -58,6 +58,8 @@ class VanillaModelConfig(ModelConfig): """Specifies whether or not to include ray warping based on time.""" temporal_distortion_params: Dict[str, Any] = to_immutable_dict({"kind": TemporalDistortionKind.DNERF}) """Parameters to instantiate temporal distortion with""" + use_gradient_scaling: bool = False + """Use gradient scaler where the gradients are lower for points closer to the camera.""" background_color: Literal["random", "last_sample", "black", "white"] = "white" """Whether to randomize the background color.""" @@ -154,6 +156,8 @@ def get_outputs(self, ray_bundle: RayBundle): # coarse field: field_outputs_coarse = self.field_coarse.forward(ray_samples_uniform) + if self.config.use_gradient_scaling: + field_outputs_coarse = scale_gradients_by_distance_squared(field_outputs_coarse, ray_samples_uniform) weights_coarse = ray_samples_uniform.get_weights(field_outputs_coarse[FieldHeadNames.DENSITY]) rgb_coarse = self.renderer_rgb( rgb=field_outputs_coarse[FieldHeadNames.RGB], @@ -172,6 +176,8 @@ def get_outputs(self, ray_bundle: RayBundle): # fine field: field_outputs_fine = self.field_fine.forward(ray_samples_pdf) + if self.config.use_gradient_scaling: + field_outputs_fine = scale_gradients_by_distance_squared(field_outputs_fine, ray_samples_pdf) weights_fine = ray_samples_pdf.get_weights(field_outputs_fine[FieldHeadNames.DENSITY]) rgb_fine = self.renderer_rgb( rgb=field_outputs_fine[FieldHeadNames.RGB], From 87773bb08148b4132e6fd92b153c5e82a88f13ed Mon Sep 17 00:00:00 2001 From: Ethan Weber Date: Thu, 26 Oct 2023 16:56:58 -0700 Subject: [PATCH 058/101] Expose camera frustums size and occlusions to CLI (#2556) CLI expose for camera frustums --- nerfstudio/configs/base_config.py | 4 ++++ nerfstudio/viewer_beta/control_panel.py | 8 +++++++- nerfstudio/viewer_beta/viewer.py | 3 ++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/nerfstudio/configs/base_config.py b/nerfstudio/configs/base_config.py index fc906fae9e..b29661e116 100644 --- a/nerfstudio/configs/base_config.py +++ b/nerfstudio/configs/base_config.py @@ -148,3 +148,7 @@ class ViewerConfig(PrintableConfig): """Quality tradeoff to use for jpeg compression.""" make_share_url: bool = False """Viewer beta feature: print a shareable URL. `vis` must be set to viewer_beta; this flag is otherwise ignored.""" + camera_frustum_scale: float = 0.1 + """Scale for the camera frustums in the viewer.""" + default_composite_depth: bool = True + """The default value for compositing depth. Turn off if you want to see the camera frustums without occlusions.""" diff --git a/nerfstudio/viewer_beta/control_panel.py b/nerfstudio/viewer_beta/control_panel.py index a6fdef970d..faa027622b 100644 --- a/nerfstudio/viewer_beta/control_panel.py +++ b/nerfstudio/viewer_beta/control_panel.py @@ -45,6 +45,7 @@ class ControlPanel: (eg train speed, max res, etc) crop_update_cb: a callback that will be called when the user changes the crop parameters update_output_cb: a callback that will be called when the user changes the output render + default_composite_depth: whether to default to compositing depth or not """ def __init__( @@ -58,11 +59,13 @@ def __init__( update_split_output_cb: Callable, toggle_training_state_cb: Callable, camera_vis: Callable, + default_composite_depth: bool = True, ): self.viser_scale_ratio = scale_ratio # elements holds a mapping from tag: [elements] self.viser_server = viser_server self._elements_by_tag: DefaultDict[str, List[ViewerElement]] = defaultdict(lambda: []) + self.default_composite_depth = default_composite_depth self._train_speed = ViewerButtonGroup( name="Train Speed", @@ -131,7 +134,10 @@ def __init__( hint="Target training utilization, 0.0 is slow, 1.0 is fast. Doesn't affect final render quality", ) self._layer_depth = ViewerCheckbox( - "Composite Depth", True, cb_hook=rerender_cb, hint="Allow NeRF to occlude 3D browser objects" + "Composite Depth", + self.default_composite_depth, + cb_hook=rerender_cb, + hint="Allow NeRF to occlude 3D browser objects", ) self._max_res = ViewerSlider( "Max Res", 512, 64, 2048, 100, cb_hook=rerender_cb, hint="Maximum resolution to render in viewport" diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py index 41b142aeaa..ae8ce3ae77 100644 --- a/nerfstudio/viewer_beta/viewer.py +++ b/nerfstudio/viewer_beta/viewer.py @@ -154,6 +154,7 @@ def __init__( self._output_split_type_change, self._toggle_training_state, self.set_camera_visibility, + default_composite_depth=self.config.default_composite_depth, ) config_path = self.log_filename.parents[0] / "config.yml" with tabs.add_tab("Render", viser.Icon.CAMERA): @@ -331,7 +332,7 @@ def init_scene( camera_handle = self.viser_server.add_camera_frustum( name=f"/cameras/camera_{idx:05d}", fov=float(2 * np.arctan(camera.cx / camera.fx[0])), - scale=0.1, + scale=self.config.camera_frustum_scale, aspect=float(camera.cx[0] / camera.cy[0]), image=image_uint8, wxyz=R.wxyz, From 793fca55e61e574abd068ef8d585e3a9425edf53 Mon Sep 17 00:00:00 2001 From: Maxim Bonnaerens Date: Fri, 27 Oct 2023 22:55:39 +0200 Subject: [PATCH 059/101] Make resetting the near_plane in NearFarCollider optional. (#2465) * Make resetting the near_plane in NearFarCollider optional. It is not always desired to reset the near plane to 0.0 during inference. Added an optional flag. * no need to change precision * Update scene_colliders.py --------- Co-authored-by: Brent Yi --- nerfstudio/model_components/scene_colliders.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/nerfstudio/model_components/scene_colliders.py b/nerfstudio/model_components/scene_colliders.py index 69c073b3b6..025a462658 100644 --- a/nerfstudio/model_components/scene_colliders.py +++ b/nerfstudio/model_components/scene_colliders.py @@ -172,16 +172,20 @@ class NearFarCollider(SceneCollider): Args: near_plane: distance to near plane far_plane: distance to far plane + reset_near_plane: whether to reset the near plane to 0.0 during inference. The near plane can be + helpful for reducing floaters during training, but it can cause clipping artifacts during + inference when an evaluation or viewer camera moves closer to the object. """ - def __init__(self, near_plane: float, far_plane: float, **kwargs) -> None: + def __init__(self, near_plane: float, far_plane: float, reset_near_plane: bool = True, **kwargs) -> None: self.near_plane = near_plane self.far_plane = far_plane + self.reset_near_plane = reset_near_plane super().__init__(**kwargs) def set_nears_and_fars(self, ray_bundle: RayBundle) -> RayBundle: ones = torch.ones_like(ray_bundle.origins[..., 0:1]) - near_plane = self.near_plane if self.training else 0 + near_plane = self.near_plane if (self.training or not self.reset_near_plane) else 0 ray_bundle.nears = ones * near_plane ray_bundle.fars = ones * self.far_plane return ray_bundle From 55616e1299a853c64963550a10dbd5730640cb03 Mon Sep 17 00:00:00 2001 From: Justin Kerr Date: Tue, 31 Oct 2023 12:54:00 -0700 Subject: [PATCH 060/101] Print beta URL in banner correctly (#2569) --------- Co-authored-by: Brent Yi --- nerfstudio/viewer_beta/viewer.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py index ae8ce3ae77..3054cf5cba 100644 --- a/nerfstudio/viewer_beta/viewer.py +++ b/nerfstudio/viewer_beta/viewer.py @@ -95,8 +95,6 @@ def __init__( websocket_port = self.config.websocket_port self.log_filename.parent.mkdir(exist_ok=True) - self.viewer_url = viewer_utils.get_viewer_url(websocket_port) - # viewer specific variables self.output_type_changed = True self.output_split_type_changed = True @@ -106,6 +104,21 @@ def __init__( self.last_move_time = 0 self.viser_server = viser.ViserServer(host=config.websocket_host, port=websocket_port, share=share) + # Set the name of the URL either to the share link if available, or the localhost + # TODO: we should revisit this once a public API for share URL status is exposed in viser. + # https://github.com/nerfstudio-project/viser/issues/124 + if share: + assert self.viser_server._share_tunnel is not None + while self.viser_server._share_tunnel._shared_state["status"] == "connecting": + # wait for connection before grabbing URL + time.sleep(0.01) + url_maybe = self.viser_server._share_tunnel.get_url() + if url_maybe is not None: + self.viewer_url = url_maybe + else: + self.viewer_url = f"http://{config.websocket_host}:{websocket_port}" + else: + self.viewer_url = f"http://{config.websocket_host}:{websocket_port}" buttons = ( viser.theme.TitlebarButton( text="Getting Started", From 85677d310fcb8700b284225de646c63bb02b83de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Kulh=C3=A1nek?= Date: Thu, 2 Nov 2023 09:28:11 +0100 Subject: [PATCH 061/101] Fix torch.compile for torch 2.1 (#2577) --- nerfstudio/cameras/camera_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nerfstudio/cameras/camera_utils.py b/nerfstudio/cameras/camera_utils.py index 8c527b9dbd..40419528ac 100644 --- a/nerfstudio/cameras/camera_utils.py +++ b/nerfstudio/cameras/camera_utils.py @@ -407,7 +407,7 @@ def _compute_residual_and_jacobian( return fx, fy, fx_x, fx_y, fy_x, fy_y -@torch_compile(dynamic=True, mode="reduce-overhead", backend="eager") +@torch_compile(dynamic=True, mode="reduce-overhead") def radial_and_tangential_undistort( coords: torch.Tensor, distortion_params: torch.Tensor, From 1f6fb8337c05afb357d61815416266a591966c61 Mon Sep 17 00:00:00 2001 From: Gina Wu <42229107+ginazhouhuiwu@users.noreply.github.com> Date: Sun, 5 Nov 2023 10:21:16 -0800 Subject: [PATCH 062/101] Remove compile line in camera_utils (#2584) temporarily remove torch.compile in camera_utils --- nerfstudio/cameras/camera_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nerfstudio/cameras/camera_utils.py b/nerfstudio/cameras/camera_utils.py index 40419528ac..ffb557589b 100644 --- a/nerfstudio/cameras/camera_utils.py +++ b/nerfstudio/cameras/camera_utils.py @@ -25,7 +25,6 @@ from numpy.typing import NDArray from torch import Tensor -from nerfstudio.utils.misc import torch_compile _EPS = np.finfo(float).eps * 4.0 @@ -407,7 +406,7 @@ def _compute_residual_and_jacobian( return fx, fy, fx_x, fx_y, fy_x, fy_y -@torch_compile(dynamic=True, mode="reduce-overhead") +# @torch_compile(dynamic=True, mode="reduce-overhead", backend="eager") def radial_and_tangential_undistort( coords: torch.Tensor, distortion_params: torch.Tensor, From f911fe7a0cecba9261574264539040d5cac966c5 Mon Sep 17 00:00:00 2001 From: Yudong Jin Date: Sun, 12 Nov 2023 00:05:06 +0800 Subject: [PATCH 063/101] Fix HTML render of README.md (#2599) --- README.md | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 20f041410b..ab18747680 100644 --- a/README.md +++ b/README.md @@ -3,21 +3,17 @@ - Documentation Status - + Documentation Status PyPI version - Test Status - + Test Status - Viewer build Status - + Viewer build Status - License - + License

@@ -41,14 +37,11 @@

- documentation - + documentation - viewer - + viewer - colab - + colab

From 5f5562b191997e1c09216e1980a788bfdb1bd956 Mon Sep 17 00:00:00 2001 From: AdamRashid96 <71362382+AdamRashid96@users.noreply.github.com> Date: Mon, 13 Nov 2023 10:23:05 -0800 Subject: [PATCH 064/101] Render nearest training view (#2384) * render nearest camera to render view * added lpips * added the rotation distance and the depth check for the cameras * render nearest training img * formatting * add flag for occlusions * remove commented import * remove comments * fix pyright test * remove None * add assert * fix pyright tests * format * made it a flag render-nearest-camera * added default cams and dataset for tests * maintain resolution of training images --- nerfstudio/scripts/render.py | 93 ++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/nerfstudio/scripts/render.py b/nerfstudio/scripts/render.py index 5e7a52e015..d9d815ed71 100644 --- a/nerfstudio/scripts/render.py +++ b/nerfstudio/scripts/render.py @@ -48,11 +48,14 @@ from torch import Tensor from typing_extensions import Annotated +import viser.transforms as tf + from nerfstudio.cameras.camera_paths import ( get_interpolated_camera_path, get_path_from_json, get_spiral_path, ) + from nerfstudio.cameras.cameras import Cameras, CameraType, RayBundle from nerfstudio.data.datamanagers.base_datamanager import ( VanillaDataManager, @@ -84,6 +87,8 @@ def _render_trajectory_video( depth_near_plane: Optional[float] = None, depth_far_plane: Optional[float] = None, colormap_options: colormaps.ColormapOptions = colormaps.ColormapOptions(), + render_nearest_camera=False, + check_occlusions: bool = False, ) -> None: """Helper function to create a video of the spiral trajectory. @@ -99,6 +104,8 @@ def _render_trajectory_video( depth_near_plane: Closest depth to consider when using the colormap for depth. If None, use min value. depth_far_plane: Furthest depth to consider when using the colormap for depth. If None, use max value. colormap_options: Options for colormap. + render_nearest_camera: Whether to render the nearest training camera to the rendered camera. + check_occlusions: If true, checks line-of-sight occlusions when computing camera distance and rejects cameras not visible to each other """ CONSOLE.print("[bold green]Creating trajectory " + output_format) cameras.rescale_output_resolution(rendered_resolution_scaling_factor) @@ -132,6 +139,14 @@ def _render_trajectory_video( with ExitStack() as stack: writer = None + if render_nearest_camera: + assert pipeline.datamanager.train_dataset is not None + train_dataset = pipeline.datamanager.train_dataset + train_cameras = train_dataset.cameras.to(pipeline.device) + else: + train_dataset = None + train_cameras = None + with progress: for camera_idx in progress.track(range(cameras.size), description=""): obb_box = None @@ -139,6 +154,50 @@ def _render_trajectory_video( obb_box = crop_data.obb camera_ray_bundle = cameras.generate_rays(camera_indices=camera_idx, obb_box=obb_box) + max_dist, max_idx = -1, -1 + true_max_dist, true_max_idx = -1, -1 + + if render_nearest_camera: + assert pipeline.datamanager.train_dataset is not None + assert train_dataset is not None + assert train_cameras is not None + cam_pos = cameras[camera_idx].camera_to_worlds[:, 3].cpu() + cam_quat = tf.SO3.from_matrix(cameras[camera_idx].camera_to_worlds[:3, :3].numpy(force=True)).wxyz + + for i in range(len(train_cameras)): + train_cam_pos = train_cameras[i].camera_to_worlds[:, 3].cpu() + # Make sure the line of sight from rendered cam to training cam is not blocked by any object + bundle = RayBundle( + origins=cam_pos.view(1, 3), + directions=((cam_pos - train_cam_pos) / (cam_pos - train_cam_pos).norm()).view(1, 3), + pixel_area=torch.tensor(1).view(1, 1), + nears=torch.tensor(0.05).view(1, 1), + fars=torch.tensor(100).view(1, 1), + camera_indices=torch.tensor(0).view(1, 1), + metadata={}, + ).to(pipeline.device) + outputs = pipeline.model.get_outputs(bundle) + + q = tf.SO3.from_matrix(train_cameras[i].camera_to_worlds[:3, :3].numpy(force=True)).wxyz + # calculate distance between two quaternions + rot_dist = 1 - np.dot(q, cam_quat) ** 2 + pos_dist = torch.norm(train_cam_pos - cam_pos) + dist = 0.3 * rot_dist + 0.7 * pos_dist + + if true_max_dist == -1 or dist < true_max_dist: + true_max_dist = dist + true_max_idx = i + + if outputs["depth"][0] < torch.norm(cam_pos - train_cam_pos).item(): + continue + + if check_occlusions and (max_dist == -1 or dist < max_dist): + max_dist = dist + max_idx = i + + if max_idx == -1: + max_idx = true_max_idx + if crop_data is not None: with renderers.background_color_override_context( crop_data.background_color.to(pipeline.device) @@ -181,6 +240,28 @@ def _render_trajectory_video( .numpy() ) render_image.append(output_image) + + # Add closest training image to the right of the rendered image + if render_nearest_camera: + assert train_dataset is not None + assert train_cameras is not None + img = train_dataset.get_image(max_idx) + height = cameras.image_height[0] + # maintain the resolution of the img to calculate the width from the height + width = int(img.shape[1] * (height / img.shape[0])) + resized_image = torch.nn.functional.interpolate( + img.permute(2, 0, 1)[None], size=(int(height), int(width)) + )[0].permute(1, 2, 0) + resized_image = ( + colormaps.apply_colormap( + image=resized_image, + colormap_options=colormap_options, + ) + .cpu() + .numpy() + ) + render_image.append(resized_image) + render_image = np.concatenate(render_image, axis=1) if output_format == "images": if image_format == "png": @@ -354,6 +435,10 @@ class BaseRender: """Furthest depth to consider when using the colormap for depth. If None, use max value.""" colormap_options: colormaps.ColormapOptions = colormaps.ColormapOptions() """Colormap options.""" + render_nearest_camera: bool = False + """Whether to render the nearest training camera to the rendered camera.""" + check_occlusions: bool = False + """If true, checks line-of-sight occlusions when computing camera distance and rejects cameras not visible to each other""" @dataclass @@ -418,6 +503,8 @@ def main(self) -> None: depth_near_plane=self.depth_near_plane, depth_far_plane=self.depth_far_plane, colormap_options=self.colormap_options, + render_nearest_camera=self.render_nearest_camera, + check_occlusions=self.check_occlusions, ) if ( @@ -451,6 +538,8 @@ def main(self) -> None: depth_near_plane=self.depth_near_plane, depth_far_plane=self.depth_far_plane, colormap_options=self.colormap_options, + render_nearest_camera=self.render_nearest_camera, + check_occlusions=self.check_occlusions, ) self.output_path = Path(str(left_eye_path.parent)[:-5] + ".mp4") @@ -549,6 +638,8 @@ def main(self) -> None: depth_near_plane=self.depth_near_plane, depth_far_plane=self.depth_far_plane, colormap_options=self.colormap_options, + render_nearest_camera=self.render_nearest_camera, + check_occlusions=self.check_occlusions, ) @@ -592,6 +683,8 @@ def main(self) -> None: depth_near_plane=self.depth_near_plane, depth_far_plane=self.depth_far_plane, colormap_options=self.colormap_options, + render_nearest_camera=self.render_nearest_camera, + check_occlusions=self.check_occlusions, ) From 1f5f0c792b5d9bb2753fe3c0ad9e9c7e25337c66 Mon Sep 17 00:00:00 2001 From: Brent Yi Date: Tue, 14 Nov 2023 00:00:21 -0800 Subject: [PATCH 065/101] Make spherical harmonics test more forgiving (#2607) --- tests/utils/test_math.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/test_math.py b/tests/utils/test_math.py index c0e1edc433..952c6a1be5 100644 --- a/tests/utils/test_math.py +++ b/tests/utils/test_math.py @@ -13,4 +13,4 @@ def test_spherical_harmonics(components): dx = dx / torch.linalg.norm(dx, dim=-1, keepdim=True) sh = components_from_spherical_harmonics(components, dx) matrix = (sh.T @ sh) / N * 4 * torch.pi - torch.testing.assert_close(matrix, torch.eye(components**2), rtol=0, atol=1e-2) + torch.testing.assert_close(matrix, torch.eye(components**2), rtol=0, atol=1.5e-2) From b8736f704fcfdc7c1169c9a7f9956cdf6c817017 Mon Sep 17 00:00:00 2001 From: Chris Heinrich Date: Tue, 14 Nov 2023 00:14:17 -0800 Subject: [PATCH 066/101] Update LICENSE (#2597) Filling in the license to what I think it should be. We're adding this to our list of third party libraries --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 261eeb9e9f..ee561dfbe4 100644 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright [yyyy] [name of copyright owner] + Copyright 2023 The Nerfstudio Team Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. From be37bcddea3df1206c054a30441dad811ff41b0d Mon Sep 17 00:00:00 2001 From: emma <57429431+emmaguo13@users.noreply.github.com> Date: Tue, 14 Nov 2023 00:21:03 -0800 Subject: [PATCH 067/101] Remove an outdated comment in RayGenerator class (#2606) clean up comment --- nerfstudio/model_components/ray_generators.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nerfstudio/model_components/ray_generators.py b/nerfstudio/model_components/ray_generators.py index bad32258d0..fab9e39bba 100644 --- a/nerfstudio/model_components/ray_generators.py +++ b/nerfstudio/model_components/ray_generators.py @@ -28,7 +28,6 @@ class RayGenerator(nn.Module): Args: cameras: Camera objects containing camera info. - pose_optimizer: pose optimization module, for optimizing noisy camera intrinsics/extrinsics. """ image_coords: Tensor From 9528a3f717fe28a6c6500199b5b37d4f2c6e10ca Mon Sep 17 00:00:00 2001 From: Rohan Mathur Date: Wed, 15 Nov 2023 20:09:48 -0800 Subject: [PATCH 068/101] Added UI to load camera path from existing path for the dataset (#2530) * added load camera path * bugfixes * fixed fov value in load camera path --------- Co-authored-by: Justin Kerr --- nerfstudio/viewer_beta/render_panel.py | 72 ++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 4 deletions(-) diff --git a/nerfstudio/viewer_beta/render_panel.py b/nerfstudio/viewer_beta/render_panel.py index 53966fe74e..36e53ccbde 100644 --- a/nerfstudio/viewer_beta/render_panel.py +++ b/nerfstudio/viewer_beta/render_panel.py @@ -13,21 +13,24 @@ # limitations under the License. from __future__ import annotations -from pathlib import Path + import colorsys import dataclasses +import datetime +import json import threading import time +from pathlib import Path from typing import Dict, List, Optional, Tuple -import datetime -from nerfstudio.viewer_beta.control_panel import ControlPanel + import numpy as onp import splines import splines.quaternion import viser -import json import viser.transforms as tf +from nerfstudio.viewer_beta.control_panel import ControlPanel + @dataclasses.dataclass class Keyframe: @@ -541,6 +544,67 @@ def _(_) -> None: play_button.visible = True pause_button.visible = False + # add button for loading existing path + load_camera_path_button = server.add_gui_button( + "Load Path", icon=viser.Icon.FOLDER_OPEN, hint="Load an existing camera path." + ) + + @load_camera_path_button.on_click + def _(event: viser.GuiEvent) -> None: + assert event.client is not None + camera_path_dir = datapath / "camera_paths" + camera_path_dir.mkdir(parents=True, exist_ok=True) + preexisting_camera_paths = list(camera_path_dir.glob("*.json")) + preexisting_camera_filenames = [p.name for p in preexisting_camera_paths] + + with event.client.add_gui_modal("Load Path") as modal: + if len(preexisting_camera_filenames) == 0: + event.client.add_gui_markdown("No existing paths found") + else: + event.client.add_gui_markdown("Select existing camera path:") + camera_path_dropdown = event.client.add_gui_dropdown( + label="Camera Path", + options=[str(p) for p in preexisting_camera_filenames], + initial_value=str(preexisting_camera_filenames[0]), + ) + load_button = event.client.add_gui_button("Load") + + @load_button.on_click + def _(_) -> None: + # load the json file + json_path = datapath / "camera_paths" / camera_path_dropdown.value + with open(json_path, "r") as f: + json_data = json.load(f) + + keyframes = json_data["keyframes"] + camera_path.reset() + for i in range(len(keyframes)): + frame = keyframes[i] + pose = tf.SE3.from_matrix(onp.array(frame["matrix"]).reshape(4, 4)) + # apply the x rotation by 180 deg + pose = tf.SE3.from_rotation_and_translation( + pose.rotation() @ tf.SO3.from_x_radians(onp.pi), pose.translation() + ) + camera_path.add_camera( + Keyframe( + position=pose.translation() * VISER_NERFSTUDIO_SCALE_RATIO, + wxyz=pose.rotation().wxyz, + override_fov_enabled=True, + override_fov_value=frame["fov"] / 180.0 * onp.pi, + aspect=frame["aspect"], + ), + ) + # update the render name + render_name_text.value = json_path.stem + camera_path.update_spline() + modal.close() + + cancel_button = event.client.add_gui_button("Cancel") + + @cancel_button.on_click + def _(_) -> None: + modal.close() + # set the initial value to the current date-time string now = datetime.datetime.now() render_name_text = server.add_gui_text( From 43703fa05a306d765b5c6c57ccc763af3de66f0b Mon Sep 17 00:00:00 2001 From: Chung Min Kim Date: Sun, 26 Nov 2023 21:56:49 -0800 Subject: [PATCH 069/101] Bugfix; scrape pipeline for viewercontrol, not trainer (#2621) Scrape pipeline for viewercontrol, not trainer --- nerfstudio/viewer_beta/viewer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py index 3054cf5cba..e577ca5c37 100644 --- a/nerfstudio/viewer_beta/viewer.py +++ b/nerfstudio/viewer_beta/viewer.py @@ -213,7 +213,7 @@ def nested_folder_install(folder_labels: List[str], prev_labels: List[str], elem # scrape the trainer/pipeline for any ViewerControl objects to initialize them self.viewer_controls: List[ViewerControl] = [ - e for (_, e) in parse_object(self.trainer, ViewerControl, "Custom Elements") + e for (_, e) in parse_object(pipeline, ViewerControl, "Custom Elements") ] for c in self.viewer_controls: c._setup(self) From c896ee4e0a26f98f2b06ff17ecb6c99013816c4e Mon Sep 17 00:00:00 2001 From: Chung Min Kim Date: Sun, 26 Nov 2023 22:11:05 -0800 Subject: [PATCH 070/101] Remove PCA colormap bug for float output maps (#2620) Previously, "PCA" would be offered as a colormap option for float outputs (and throws "PCA is not a known colormap" error). --- nerfstudio/viewer_beta/control_panel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nerfstudio/viewer_beta/control_panel.py b/nerfstudio/viewer_beta/control_panel.py index faa027622b..d118836849 100644 --- a/nerfstudio/viewer_beta/control_panel.py +++ b/nerfstudio/viewer_beta/control_panel.py @@ -445,7 +445,7 @@ def _get_colormap_options(dimensions: int, dtype: type) -> List[Colormaps]: if dimensions == 3: colormap_options = ["default"] if dimensions == 1 and dtype in [torch.float64, torch.float32, torch.float16, torch.bfloat16]: - colormap_options = [c for c in list(get_args(Colormaps)) if c != "default"] + colormap_options = [c for c in list(get_args(Colormaps)) if c not in ("default", "pca")] if dimensions > 3: colormap_options = ["pca"] return colormap_options From 38726da2190780c2a2205449dfac5d9ae98136e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Kulh=C3=A1nek?= Date: Wed, 29 Nov 2023 20:48:41 +0100 Subject: [PATCH 071/101] Fix tangential camera distortion (p1,p2 in opencv camera) (#2627) --- nerfstudio/cameras/cameras.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/nerfstudio/cameras/cameras.py b/nerfstudio/cameras/cameras.py index 68f7b186fd..c1b988dbbd 100644 --- a/nerfstudio/cameras/cameras.py +++ b/nerfstudio/cameras/cameras.py @@ -615,9 +615,9 @@ def _generate_rays_from_coords( # Get our image coordinates and image coordinates offset by 1 (offsets used for dx, dy calculations) # Also make sure the shapes are correct - coord = torch.stack([(x - cx) / fx, -(y - cy) / fy], -1) # (num_rays, 2) - coord_x_offset = torch.stack([(x - cx + 1) / fx, -(y - cy) / fy], -1) # (num_rays, 2) - coord_y_offset = torch.stack([(x - cx) / fx, -(y - cy + 1) / fy], -1) # (num_rays, 2) + coord = torch.stack([(x - cx) / fx, (y - cy) / fy], -1) # (num_rays, 2) + coord_x_offset = torch.stack([(x - cx + 1) / fx, (y - cy) / fy], -1) # (num_rays, 2) + coord_y_offset = torch.stack([(x - cx) / fx, (y - cy + 1) / fy], -1) # (num_rays, 2) assert ( coord.shape == num_rays_shape + (2,) and coord_x_offset.shape == num_rays_shape + (2,) @@ -648,6 +648,9 @@ def _generate_rays_from_coords( distortion_params[mask, :], ).reshape(-1, 2) + # Switch from OpenCV to OpenGL + coord_stack[..., 1] *= -1 + # Make sure after we have undistorted our images, the shapes are still correct assert coord_stack.shape == (3,) + num_rays_shape + (2,) From 5865ecf3236e47818974354a793b5d94dbc0c272 Mon Sep 17 00:00:00 2001 From: Chung Min Kim Date: Wed, 29 Nov 2023 14:48:58 -0800 Subject: [PATCH 072/101] Integrate scene click capability from original viewer (#2509) * Integrate scene click capability from original viewer * filter non-click ScenePointerEvents from ClickEvent * lint * Put click origin+direction in nerfstudio world scale * Update docs * Update doc heading --------- Co-authored-by: Justin Kerr --- .../developer_guides/viewer/viewer_control.md | 14 ++++++-- nerfstudio/viewer_beta/viewer_elements.py | 33 ++++++++++++++++--- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/docs/developer_guides/viewer/viewer_control.md b/docs/developer_guides/viewer/viewer_control.md index e6a84d3d76..0bb33294c6 100644 --- a/docs/developer_guides/viewer/viewer_control.md +++ b/docs/developer_guides/viewer/viewer_control.md @@ -62,8 +62,8 @@ class MyModel(nn.Module): # Must inherit from nn.Module self.viewer_button = ViewerButton(name="Dummy Button",cb_hook=button_cb) ``` -## Double-click Callbacks -We forward *double* clicks inside the viewer to the ViewerControl object, which you can use to interact with the scene. To do this, register a callback using `register_click_cb()`. The click is defined to be a ray that starts at the camera origin and passes through the click point on the screen, in world coordinates. +## Scene Click Callbacks +We forward *single* clicks inside the viewer to the ViewerControl object, which you can use to interact with the scene. To do this, register a callback using `register_click_cb()`. The click is defined to be a ray that starts at the camera origin and passes through the click point on the screen, in world coordinates. ```python from nerfstudio.viewer.server.viewer_elements import ViewerControl,ViewerClick @@ -77,6 +77,16 @@ class MyModel(nn.Module): # must inherit from nn.Module self.viewer_control.register_click_cb(click_cb) ``` +You can also use `unregister_click_cb()` to remove callbacks that are no longer needed. A good example is a "Click on Scene" button, that when pressed, would register a callback that would wait for the next click, and then unregister itself. +```python + ... + def button_cb(button: ViewerButton): + def click_cb(click: ViewerClick): + print(f"Click at {click.origin} in direction {click.direction}") + self.viewer_control.unregister_click_cb(click_cb) + self.viewer_control.register_click_cb(click_cb) +``` + ### Thread safety Just like `ViewerElement` callbacks, click callbacks are asynchronous to training and can potentially interrupt a call to `get_outputs()`. diff --git a/nerfstudio/viewer_beta/viewer_elements.py b/nerfstudio/viewer_beta/viewer_elements.py index 82a8b3d447..e8de855e0d 100644 --- a/nerfstudio/viewer_beta/viewer_elements.py +++ b/nerfstudio/viewer_beta/viewer_elements.py @@ -31,6 +31,7 @@ GuiButtonHandle, GuiDropdownHandle, GuiInputHandle, + ScenePointerEvent, ViserServer, ) @@ -68,7 +69,7 @@ class for exposing non-gui controls of the viewer to the user def __init__(self): # this should be a user-facing constructor, since it will be used inside the model/pipeline class - self.click_cbs = [] + self._click_cbs = {} def _setup(self, viewer: Viewer): """ @@ -151,13 +152,35 @@ def register_click_cb(self, cb: Callable): cb: The callback to call when a click is detected. The callback should take a ViewerClick object as an argument """ - self.click_cbs.append(cb) + from nerfstudio.viewer_beta.viewer import VISER_NERFSTUDIO_SCALE_RATIO + + def wrapped_cb(scene_pointer_msg: ScenePointerEvent): + # only call the callback if the event is a click + if scene_pointer_msg.event != "click": + return + origin = scene_pointer_msg.ray_origin + direction = scene_pointer_msg.ray_direction + + origin = tuple([x / VISER_NERFSTUDIO_SCALE_RATIO for x in origin]) + assert len(origin) == 3 + + click = ViewerClick(origin, direction) + cb(click) - def on_click(self, msg): + self._click_cbs[cb] = wrapped_cb + self.viser_server.on_scene_click(wrapped_cb) + + def unregister_click_cb(self, cb: Callable): """ - Internal use only, register a click in the viewer which propagates to all self.click_cbs + Remove a callback which will be called when a click is detected in the viewer. + + Args: + cb: The callback to remove """ - raise NotImplementedError() + if cb not in self._click_cbs: + raise ValueError(f"Callback {cb} not registered, cannot remove") + self.viser_server.remove_scene_click_callback(self._click_cbs[cb]) + self._click_cbs.pop(cb) @property def server(self): From c8c55521e8f8e429e065d54ff45447181f8b6df3 Mon Sep 17 00:00:00 2001 From: Brent Yi Date: Wed, 29 Nov 2023 16:52:37 -0800 Subject: [PATCH 073/101] Keyframe timing for beta viewer (#2638) * Add keyframe timing to beta viewer * Nits * New keyframe timing logic * Bump viser version --- nerfstudio/viewer_beta/render_panel.py | 600 ++++++++++++++++++------- pyproject.toml | 2 +- 2 files changed, 431 insertions(+), 171 deletions(-) diff --git a/nerfstudio/viewer_beta/render_panel.py b/nerfstudio/viewer_beta/render_panel.py index 36e53ccbde..b5f4e1bfb2 100644 --- a/nerfstudio/viewer_beta/render_panel.py +++ b/nerfstudio/viewer_beta/render_panel.py @@ -21,9 +21,9 @@ import threading import time from pathlib import Path -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional, Tuple, Union -import numpy as onp +import numpy as np import splines import splines.quaternion import viser @@ -34,11 +34,13 @@ @dataclasses.dataclass class Keyframe: - position: onp.ndarray - wxyz: onp.ndarray + position: np.ndarray + wxyz: np.ndarray override_fov_enabled: bool - override_fov_value: float + override_fov_rad: float aspect: float + override_transition_enabled: bool + override_transition_sec: Optional[float] @staticmethod def from_camera(camera: viser.CameraHandle, aspect: float) -> Keyframe: @@ -46,17 +48,19 @@ def from_camera(camera: viser.CameraHandle, aspect: float) -> Keyframe: camera.position, camera.wxyz, override_fov_enabled=False, - override_fov_value=camera.fov, + override_fov_rad=camera.fov, aspect=aspect, + override_transition_enabled=False, + override_transition_sec=None, ) class CameraPath: - def __init__(self, server: viser.ViserServer): + def __init__(self, server: viser.ViserServer, duration_element: viser.GuiInputHandle[float]): self._server = server self._keyframes: Dict[int, Tuple[Keyframe, viser.CameraFrustumHandle]] = {} self._keyframe_counter: int = 0 - self._spline: Optional[viser.SceneNodeHandle] = None + self._spline_nodes: List[viser.SceneNodeHandle] = [] self._camera_edit_panel: Optional[viser.Gui3dContainerHandle] = None self._orientation_spline: Optional[splines.quaternion.KochanekBartels] = None @@ -64,10 +68,15 @@ def __init__(self, server: viser.ViserServer): self._fov_spline: Optional[splines.KochanekBartels] = None self._keyframes_visible: bool = True + self._duration_element = duration_element + # These parameters should be overridden externally. self.loop: bool = False - self.smoothness: float = 0.5 # Tension / alpha term. + self.framerate: float = 30.0 + self.tension: float = 0.5 # Tension / alpha term. self.default_fov: float = 0.0 + self.default_transition_sec: float = 0.0 + self.show_spline: bool = True def set_keyframes_visible(self, visible: bool) -> None: self._keyframes_visible = visible @@ -85,20 +94,28 @@ def add_camera(self, keyframe: Keyframe, keyframe_index: Optional[int] = None) - frustum_handle = server.add_camera_frustum( f"/render_cameras/{keyframe_index}", - fov=keyframe.override_fov_value if keyframe.override_fov_enabled else self.default_fov, + fov=keyframe.override_fov_rad if keyframe.override_fov_enabled else self.default_fov, aspect=keyframe.aspect, scale=0.1, - color=(127, 127, 127), + color=(200, 10, 30), wxyz=keyframe.wxyz, position=keyframe.position, visible=self._keyframes_visible, ) + self._server.add_icosphere( + f"/render_cameras/{keyframe_index}/sphere", + radius=0.03, + color=(200, 10, 30), + ) @frustum_handle.on_click def _(_) -> None: + if self._camera_edit_panel is not None: + self._camera_edit_panel.remove() + self._camera_edit_panel = None + with server.add_3d_gui_container( "/camera_edit_panel", - wxyz=keyframe.wxyz, position=keyframe.position, ) as camera_edit_panel: self._camera_edit_panel = camera_edit_panel @@ -108,80 +125,82 @@ def _(_) -> None: 5.0, 175.0, step=0.1, - initial_value=keyframe.override_fov_value * 180.0 / onp.pi, + initial_value=keyframe.override_fov_rad * 180.0 / np.pi, disabled=not keyframe.override_fov_enabled, ) delete_button = server.add_gui_button("Delete", color="red", icon=viser.Icon.TRASH) go_to_button = server.add_gui_button("Go to") close_button = server.add_gui_button("Close") - @override_fov.on_update - def _(_) -> None: - keyframe.override_fov_enabled = override_fov.value - override_fov_degrees.disabled = not override_fov.value - self.add_camera(keyframe, keyframe_index) + @override_fov.on_update + def _(_) -> None: + keyframe.override_fov_enabled = override_fov.value + override_fov_degrees.disabled = not override_fov.value + self.add_camera(keyframe, keyframe_index) - @override_fov_degrees.on_update - def _(_) -> None: - keyframe.override_fov_value = override_fov_degrees.value / 180.0 * onp.pi - self.add_camera(keyframe, keyframe_index) - - @delete_button.on_click - def _(event: viser.GuiEvent) -> None: - assert event.client is not None - with event.client.add_gui_modal("Confirm") as modal: - event.client.add_gui_markdown("Delete keyframe?") - confirm_button = event.client.add_gui_button("Yes", color="red", icon=viser.Icon.TRASH) - exit_button = event.client.add_gui_button("Cancel") - - @confirm_button.on_click - def _(_) -> None: - assert camera_edit_panel is not None - - keyframe_id = None - for i, keyframe_tuple in self._keyframes.items(): - if keyframe_tuple[1] is frustum_handle: - keyframe_id = i - break - assert keyframe_id is not None - - self._keyframes.pop(keyframe_id) - frustum_handle.remove() - camera_edit_panel.remove() - modal.close() - self.update_spline() - - @exit_button.on_click - def _(_) -> None: - modal.close() - - @go_to_button.on_click - def _(event: viser.GuiEvent) -> None: - assert event.client is not None - client = event.client - T_world_current = tf.SE3.from_rotation_and_translation( - tf.SO3(client.camera.wxyz), client.camera.position - ) - T_world_target = tf.SE3.from_rotation_and_translation( - tf.SO3(keyframe.wxyz), keyframe.position - ) @ tf.SE3.from_translation(onp.array([0.0, 0.0, -0.5])) + @override_fov_degrees.on_update + def _(_) -> None: + keyframe.override_fov_rad = override_fov_degrees.value / 180.0 * np.pi + self.add_camera(keyframe, keyframe_index) + + @delete_button.on_click + def _(event: viser.GuiEvent) -> None: + assert event.client is not None + with event.client.add_gui_modal("Confirm") as modal: + event.client.add_gui_markdown("Delete keyframe?") + confirm_button = event.client.add_gui_button("Yes", color="red", icon=viser.Icon.TRASH) + exit_button = event.client.add_gui_button("Cancel") + + @confirm_button.on_click + def _(_) -> None: + assert camera_edit_panel is not None + + keyframe_id = None + for i, keyframe_tuple in self._keyframes.items(): + if keyframe_tuple[1] is frustum_handle: + keyframe_id = i + break + assert keyframe_id is not None + + self._keyframes.pop(keyframe_id) + frustum_handle.remove() + camera_edit_panel.remove() + self._camera_edit_panel = None + modal.close() + self.update_spline() + + @exit_button.on_click + def _(_) -> None: + modal.close() + + @go_to_button.on_click + def _(event: viser.GuiEvent) -> None: + assert event.client is not None + client = event.client + T_world_current = tf.SE3.from_rotation_and_translation( + tf.SO3(client.camera.wxyz), client.camera.position + ) + T_world_target = tf.SE3.from_rotation_and_translation( + tf.SO3(keyframe.wxyz), keyframe.position + ) @ tf.SE3.from_translation(np.array([0.0, 0.0, -0.5])) - T_current_target = T_world_current.inverse() @ T_world_target + T_current_target = T_world_current.inverse() @ T_world_target - for j in range(10): - T_world_set = T_world_current @ tf.SE3.exp(T_current_target.log() * j / 9.0) + for j in range(10): + T_world_set = T_world_current @ tf.SE3.exp(T_current_target.log() * j / 9.0) - # Important bit: we atomically set both the orientation and the position - # of the camera. - with client.atomic(): - client.camera.wxyz = T_world_set.rotation().wxyz - client.camera.position = T_world_set.translation() - time.sleep(1.0 / 30.0) + # Important bit: we atomically set both the orientation and the position + # of the camera. + with client.atomic(): + client.camera.wxyz = T_world_set.rotation().wxyz + client.camera.position = T_world_set.translation() + time.sleep(1.0 / 30.0) - @close_button.on_click - def _(_) -> None: - assert camera_edit_panel is not None - camera_edit_panel.remove() + @close_button.on_click + def _(_) -> None: + assert camera_edit_panel is not None + camera_edit_panel.remove() + self._camera_edit_panel = None self._keyframes[keyframe_index] = (keyframe, frustum_handle) @@ -196,79 +215,220 @@ def reset(self) -> None: self._keyframes.clear() self.update_spline() - def interpolate_pose_and_fov(self, normalized_t: float) -> Optional[Tuple[tf.SE3, float]]: + def interpolate_pose_and_fov_rad(self, normalized_t: float) -> Optional[Tuple[tf.SE3, float]]: if len(self._keyframes) < 2: return None - # TODO: this doesn't need to be constantly re-instantiated. + + transition_times_cumsum = self.compute_transition_times_cumsum() + spline_indices = np.arange(transition_times_cumsum.shape[0]) + + def spline_t_from_t_sec(time: Union[float, np.ndarray]) -> np.ndarray: + return np.interp(time, transition_times_cumsum, spline_indices) + self._fov_spline = splines.KochanekBartels( [ - keyframe[0].override_fov_value if keyframe[0].override_fov_enabled else self.default_fov + keyframe[0].override_fov_rad if keyframe[0].override_fov_enabled else self.default_fov for keyframe in self._keyframes.values() ], - tcb=(self.smoothness, 0.0, 0.0), + tcb=(self.tension, 0.0, 0.0), endconditions="closed" if self.loop else "natural", ) assert self._orientation_spline is not None assert self._position_spline is not None assert self._fov_spline is not None - max_t = len(self._keyframes) if self.loop else len(self._keyframes) - 1 + max_t = self.compute_duration() t = max_t * normalized_t - quat = self._orientation_spline.evaluate(t) + + quat = self._orientation_spline.evaluate(spline_t_from_t_sec(t)) assert isinstance(quat, splines.quaternion.UnitQuaternion) return ( tf.SE3.from_rotation_and_translation( - tf.SO3(onp.array([quat.scalar, *quat.vector])), - self._position_spline.evaluate(t), + tf.SO3(np.array([quat.scalar, *quat.vector])), + self._position_spline.evaluate(spline_t_from_t_sec(t)), ), - float(self._fov_spline.evaluate(t)), + float(self._fov_spline.evaluate(spline_t_from_t_sec(t))), ) def update_spline(self) -> None: - keyframes = list(self._keyframes.values()) - if len(keyframes) <= 1: - if self._spline is not None: - self._spline.remove() - self._spline = None + num_frames = int(self.compute_duration() * self.framerate) + if num_frames <= 0 or not self.show_spline: + for node in self._spline_nodes: + node.remove() + self._spline_nodes.clear() return # Update internal splines. + keyframes = list(self._keyframes.values()) + transition_times_cumsum = self.compute_transition_times_cumsum() + spline_indices = np.arange(transition_times_cumsum.shape[0]) + + def spline_t_from_t_sec(time: Union[float, np.ndarray]) -> np.ndarray: + return np.interp(time, transition_times_cumsum, spline_indices) + self._orientation_spline = splines.quaternion.KochanekBartels( [ - splines.quaternion.UnitQuaternion.from_unit_xyzw(onp.roll(keyframe[0].wxyz, shift=-1)) + splines.quaternion.UnitQuaternion.from_unit_xyzw(np.roll(keyframe[0].wxyz, shift=-1)) for keyframe in keyframes ], - tcb=(self.smoothness, 0.0, 0.0), + tcb=(self.tension, 0.0, 0.0), endconditions="closed" if self.loop else "natural", ) self._position_spline = splines.KochanekBartels( [keyframe[0].position for keyframe in keyframes], - tcb=(self.smoothness, 0.0, 0.0), + tcb=(self.tension, 0.0, 0.0), endconditions="closed" if self.loop else "natural", ) # Update visualized spline. - num_keyframes = len(keyframes) + 1 if self.loop else len(keyframes) - points_array = onp.array( - [self._position_spline.evaluate(t) for t in onp.linspace(0, num_keyframes - 1, num_keyframes * 100)] + points_array = self._position_spline.evaluate( + spline_t_from_t_sec(np.linspace(0, transition_times_cumsum[-1], num_frames)) ) - colors_array = onp.array([colorsys.hls_to_rgb(h, 0.5, 1.0) for h in onp.linspace(0.0, 1.0, len(points_array))]) - self._spline = self._server.add_point_cloud( - "/render_camera_spline", - points=points_array, - colors=colors_array, - point_size=0.035, + colors_array = np.array([colorsys.hls_to_rgb(h, 0.5, 1.0) for h in np.linspace(0.0, 1.0, len(points_array))]) + + # Clear prior spline nodes. + for node in self._spline_nodes: + node.remove() + self._spline_nodes.clear() + + self._spline_nodes.append( + self._server.add_spline_catmull_rom( + "/render_camera_spline", + positions=points_array, + color=(220, 220, 220), + closed=self.loop, + line_width=1.0, + segments=points_array.shape[0] + 1, + ) + ) + self._spline_nodes.append( + self._server.add_point_cloud( + "/render_camera_spline/points", + points=points_array, + colors=colors_array, + point_size=0.04, + ) ) + def make_transition_handle(i: int) -> None: + assert self._position_spline is not None + transition_pos = self._position_spline.evaluate( + spline_t_from_t_sec((transition_times_cumsum[i] + transition_times_cumsum[i + 1]) / 2.0) + ) + transition_sphere = self._server.add_icosphere( + f"/render_camera_spline/transition_{i}", + radius=0.04, + color=(255, 0, 0), + position=transition_pos, + ) + self._spline_nodes.append(transition_sphere) + + @transition_sphere.on_click + def _(_) -> None: + server = self._server + + if self._camera_edit_panel is not None: + self._camera_edit_panel.remove() + self._camera_edit_panel = None + + keyframe_index = (i + 1) % len(self._keyframes) + keyframe = keyframes[keyframe_index][0] + + with server.add_3d_gui_container( + "/camera_edit_panel", + position=transition_pos, + ) as camera_edit_panel: + self._camera_edit_panel = camera_edit_panel + override_transition_enabled = server.add_gui_checkbox( + "Override transition", initial_value=keyframe.override_transition_enabled + ) + override_transition_sec = server.add_gui_number( + "Override transition (sec)", + initial_value=keyframe.override_transition_sec + if keyframe.override_transition_sec is not None + else self.default_transition_sec, + min=0.001, + max=30.0, + step=0.001, + disabled=not override_transition_enabled.value, + ) + close_button = server.add_gui_button("Close") + + @override_transition_enabled.on_update + def _(_) -> None: + keyframe.override_transition_enabled = override_transition_enabled.value + override_transition_sec.disabled = not override_transition_enabled.value + self._duration_element.value = self.compute_duration() + + @override_transition_sec.on_update + def _(_) -> None: + keyframe.override_transition_sec = override_transition_sec.value + self._duration_element.value = self.compute_duration() + + @close_button.on_click + def _(_) -> None: + assert camera_edit_panel is not None + camera_edit_panel.remove() + self._camera_edit_panel = None + + (num_transitions_plus_1,) = transition_times_cumsum.shape + for i in range(num_transitions_plus_1 - 1): + make_transition_handle(i) + + # for i in range(transition_times.shape[0]) + + def compute_duration(self) -> float: + """Compute the total duration of the trajectory.""" + total = 0.0 + for i, (keyframe, frustum) in enumerate(self._keyframes.values()): + if i == 0 and not self.loop: + continue + del frustum + total += ( + keyframe.override_transition_sec + if keyframe.override_transition_enabled and keyframe.override_transition_sec is not None + else self.default_transition_sec + ) + return total + + def compute_transition_times_cumsum(self) -> np.ndarray: + """Compute the total duration of the trajectory.""" + total = 0.0 + out = [0.0] + for i, (keyframe, frustum) in enumerate(self._keyframes.values()): + if i == 0: + continue + del frustum + total += ( + keyframe.override_transition_sec + if keyframe.override_transition_enabled and keyframe.override_transition_sec is not None + else self.default_transition_sec + ) + out.append(total) + + if self.loop: + keyframe = next(iter(self._keyframes.values()))[0] + total += ( + keyframe.override_transition_sec + if keyframe.override_transition_enabled and keyframe.override_transition_sec is not None + else self.default_transition_sec + ) + out.append(total) + + return np.array(out) + def populate_render_tab( - server: viser.ViserServer, config_path: Path, datapath: Path, control_panel: ControlPanel + server: viser.ViserServer, + config_path: Path, + datapath: Path, + control_panel: Optional[ControlPanel] = None, ) -> None: from nerfstudio.viewer_beta.viewer import VISER_NERFSTUDIO_SCALE_RATIO fov_degrees = server.add_gui_slider( - "FOV", - initial_value=90.0, + "Default FOV", + initial_value=75.0, min=0.1, max=175.0, step=0.01, @@ -277,7 +437,7 @@ def populate_render_tab( @fov_degrees.on_update def _(_) -> None: - fov_radians = fov_degrees.value / 180.0 * onp.pi + fov_radians = fov_degrees.value / 180.0 * np.pi for client in server.get_clients().values(): client.camera.fov = fov_radians camera_path.default_fov = fov_radians @@ -294,19 +454,14 @@ def _(_) -> None: step=1, hint="Render output resolution in pixels.", ) - - @resolution.on_update - def _(_) -> None: - """Update the aspect ratio for all cameras when the resolution changes.""" - camera_path.update_aspect(resolution.value[0] / resolution.value[1]) + resolution.on_update(lambda _: camera_path.update_aspect(resolution.value[0] / resolution.value[1])) camera_type = server.add_gui_dropdown( "Camera Type", ("Perspective", "Fisheye", "Equirectangular"), initial_value="Perspective", - hint="Camera model to render with.", + hint="Camera model to render with. This is applied to all keyframes.", ) - add_button = server.add_gui_button( "Add keyframe", icon=viser.Icon.PLUS, @@ -320,8 +475,12 @@ def _(event: viser.GuiEvent) -> None: # Add this camera to the path. camera_path.add_camera( - Keyframe.from_camera(camera, aspect=resolution.value[0] / resolution.value[1]), + Keyframe.from_camera( + camera, + aspect=resolution.value[0] / resolution.value[1], + ), ) + duration_number.value = camera_path.compute_duration() camera_path.update_spline() reset_up_button = server.add_gui_button( @@ -333,7 +492,7 @@ def _(event: viser.GuiEvent) -> None: @reset_up_button.on_click def _(event: viser.GuiEvent) -> None: assert event.client is not None - event.client.camera.up_direction = tf.SO3(event.client.camera.wxyz) @ onp.array([0.0, -1.0, 0.0]) + event.client.camera.up_direction = tf.SO3(event.client.camera.wxyz) @ np.array([0.0, -1.0, 0.0]) clear_keyframes_button = server.add_gui_button( "Clear keyframes", @@ -345,7 +504,7 @@ def _(event: viser.GuiEvent) -> None: def _(event: viser.GuiEvent) -> None: assert event.client_id is not None client = server.get_clients()[event.client_id] - with client.add_gui_modal("Confirm") as modal: + with client.atomic(), client.add_gui_modal("Confirm") as modal: client.add_gui_markdown("Clear all keyframes?") confirm_button = client.add_gui_button("Yes", color="red", icon=viser.Icon.TRASH) exit_button = client.add_gui_button("Cancel") @@ -355,6 +514,8 @@ def _(_) -> None: camera_path.reset() modal.close() + duration_number.value = camera_path.compute_duration() + # Clear move handles. if len(transform_controls) > 0: for t in transform_controls: @@ -366,14 +527,14 @@ def _(_) -> None: def _(_) -> None: modal.close() - loop = server.add_gui_checkbox("Loop", False) + loop = server.add_gui_checkbox("Loop", False, hint="Add a segment between the first and last keyframes.") @loop.on_update def _(_) -> None: camera_path.loop = loop.value - camera_path.update_spline() + duration_number.value = camera_path.compute_duration() - smoothness = server.add_gui_slider( + tension_slider = server.add_gui_slider( "Spline Tension", min=0.0, max=1.0, @@ -382,9 +543,9 @@ def _(_) -> None: hint="Tension parameter for adjusting smoothness of spline interpolation.", ) - @smoothness.on_update + @tension_slider.on_update def _(_) -> None: - camera_path.smoothness = smoothness.value + camera_path.tension = tension_slider.value camera_path.update_spline() move_checkbox = server.add_gui_checkbox( @@ -428,41 +589,80 @@ def _(_) -> None: transform_controls.append(controls) _make_transform_controls_callback(keyframe, controls) + show_keyframe_checkbox = server.add_gui_checkbox( + "Show keyframes", + initial_value=True, + hint="Show keyframes in the scene.", + ) + + @show_keyframe_checkbox.on_update + def _(_: viser.GuiEvent) -> None: + camera_path.set_keyframes_visible(show_keyframe_checkbox.value) + + show_spline_checkbox = server.add_gui_checkbox( + "Show spline", + initial_value=True, + hint="Show camera path spline in the scene.", + ) + + @show_spline_checkbox.on_update + def _(_) -> None: + camera_path.show_spline = show_spline_checkbox.value + camera_path.update_spline() + playback_folder = server.add_gui_folder("Playback") with playback_folder: - duration_number = server.add_gui_number("Duration (sec)", min=0.0, max=1e8, step=0.0001, initial_value=4.0) - framerate_number = server.add_gui_number("Frame rate (FPS)", min=0.1, max=240.0, step=1e-8, initial_value=30.0) + play_button = server.add_gui_button("Play", icon=viser.Icon.PLAYER_PLAY) + pause_button = server.add_gui_button("Pause", icon=viser.Icon.PLAYER_PAUSE, visible=False) + attach_viewport_checkbox = server.add_gui_checkbox("Attach viewport", initial_value=False) + transition_sec_number = server.add_gui_number( + "Transition (sec)", + min=0.001, + max=30.0, + step=0.001, + initial_value=0.5, + hint="Time in seconds between each keyframe, which can also be overridden on a per-transition basis.", + ) + framerate_number = server.add_gui_number("FPS", min=0.1, max=240.0, step=1e-2, initial_value=30.0) framerate_buttons = server.add_gui_button_group("", ("24", "30", "60")) + duration_number = server.add_gui_number( + "Duration (sec)", + min=0.0, + max=1e8, + step=0.001, + initial_value=0.0, + disabled=True, + ) @framerate_buttons.on_click def _(_) -> None: framerate_number.value = float(framerate_buttons.value) - play_button = server.add_gui_button("Play", icon=viser.Icon.PLAYER_PLAY) - pause_button = server.add_gui_button("Pause", icon=viser.Icon.PLAYER_PAUSE, visible=False) - attach_viewport_checkbox = server.add_gui_checkbox("Attach viewport", initial_value=False) - show_checkbox = server.add_gui_checkbox( - "Show keyframes", - initial_value=True, - hint="Show keyframes in the scene.", - ) + @transition_sec_number.on_update + def _(_) -> None: + camera_path.default_transition_sec = transition_sec_number.value + duration_number.value = camera_path.compute_duration() - @show_checkbox.on_update - def _(_: viser.GuiEvent) -> None: - camera_path.set_keyframes_visible(show_checkbox.value) + def get_max_frame_index() -> int: + return max(1, int(framerate_number.value * duration_number.value) - 1) + + preview_camera_handle: Optional[viser.SceneNodeHandle] = None + + def remove_preview_camera() -> None: + nonlocal preview_camera_handle + if preview_camera_handle is not None: + preview_camera_handle.remove() + preview_camera_handle = None def add_preview_frame_slider() -> Optional[viser.GuiInputHandle[int]]: """Helper for creating the current frame # slider. This is removed and re-added anytime the `max` value changes.""" - max_frame_index = int(framerate_number.value * duration_number.value) - 1 - if max_frame_index <= 0: - return None with playback_folder: preview_frame_slider = server.add_gui_slider( "Preview frame", min=0, - max=max_frame_index, + max=get_max_frame_index(), step=1, initial_value=0, # Place right after the pause button. @@ -471,16 +671,19 @@ def add_preview_frame_slider() -> Optional[viser.GuiInputHandle[int]]: @preview_frame_slider.on_update def _(_) -> None: - max_frame_index = int(framerate_number.value * duration_number.value) - 1 - maybe_pose_and_fov = camera_path.interpolate_pose_and_fov( - preview_frame_slider.value / max_frame_index if max_frame_index > 0 else 0 + nonlocal preview_camera_handle + + maybe_pose_and_fov_rad = camera_path.interpolate_pose_and_fov_rad( + preview_frame_slider.value / get_max_frame_index() ) - if maybe_pose_and_fov is None: + if maybe_pose_and_fov_rad is None: + remove_preview_camera() return - pose, fov = maybe_pose_and_fov - server.add_camera_frustum( + pose, fov_rad = maybe_pose_and_fov_rad + + preview_camera_handle = server.add_camera_frustum( "/preview_camera", - fov=fov, + fov=fov_rad, aspect=resolution.value[0] / resolution.value[1], scale=0.35, wxyz=pose.rotation().wxyz, @@ -497,21 +700,55 @@ def _(_) -> None: for client in server.get_clients().values(): client.camera.wxyz = pose.rotation().wxyz client.camera.position = pose.translation() - client.camera.fov = fov + client.camera.fov = fov_rad return preview_frame_slider @attach_viewport_checkbox.on_update def _(_) -> None: + if preview_frame_slider is None: + remove_preview_camera() + return + maybe_pose_and_fov_rad = camera_path.interpolate_pose_and_fov_rad( + preview_frame_slider.value / get_max_frame_index() + ) + if maybe_pose_and_fov_rad is None: + remove_preview_camera() + return + pose, fov = maybe_pose_and_fov_rad + server.add_camera_frustum( + "/preview_camera", + fov=fov, + aspect=resolution.value[0] / resolution.value[1], + scale=0.35, + wxyz=pose.rotation().wxyz, + position=pose.translation(), + color=(10, 200, 30), + # Hack: hide green frustum if the viewport is attached. + # This is a waste of bandwidth, but will ensure that any old + # frustums are removed/aren't rendered. + # + # Easy to fix with a global variable. + visible=not attach_viewport_checkbox.value, + ) if not attach_viewport_checkbox.value: for client in server.get_clients().values(): - client.camera.fov = fov_degrees.value + client.camera.fov = fov_degrees.value / 180 * np.pi + else: + if attach_viewport_checkbox.value: + for client in server.get_clients().values(): + client.camera.wxyz = pose.rotation().wxyz + client.camera.position = pose.translation() + client.camera.fov = fov preview_frame_slider = add_preview_frame_slider() + # Update the # of frames. @duration_number.on_update @framerate_number.on_update def _(_) -> None: + remove_preview_camera() # Will be re-added when slider is updated. + nonlocal preview_frame_slider old = preview_frame_slider assert old is not None @@ -522,6 +759,9 @@ def _(_) -> None: else: preview_frame_slider = old + camera_path.framerate = framerate_number.value + camera_path.update_spline() + # Play the camera trajectory when the play button is pressed. @play_button.on_click def _(_) -> None: @@ -580,20 +820,25 @@ def _(_) -> None: camera_path.reset() for i in range(len(keyframes)): frame = keyframes[i] - pose = tf.SE3.from_matrix(onp.array(frame["matrix"]).reshape(4, 4)) + pose = tf.SE3.from_matrix(np.array(frame["matrix"]).reshape(4, 4)) # apply the x rotation by 180 deg pose = tf.SE3.from_rotation_and_translation( - pose.rotation() @ tf.SO3.from_x_radians(onp.pi), pose.translation() + pose.rotation() @ tf.SO3.from_x_radians(np.pi), pose.translation() ) camera_path.add_camera( Keyframe( position=pose.translation() * VISER_NERFSTUDIO_SCALE_RATIO, wxyz=pose.rotation().wxyz, override_fov_enabled=True, - override_fov_value=frame["fov"] / 180.0 * onp.pi, + override_fov_rad=frame["fov"] / 180.0 * np.pi, aspect=frame["aspect"], + override_transition_enabled=frame.get("override_transition_enabled", None), + override_transition_sec=frame.get("override_transition_sec", None), ), ) + + transition_sec_number.value = json_data.get("default_transition_sec", 0.5) + # update the render name render_name_text.value = json_path.stem camera_path.update_spline() @@ -624,9 +869,9 @@ def _(event: viser.GuiEvent) -> None: json_data = {} # json data has the properties: # keyframes: list of keyframes with - # matrix : flattened 4x4 matrix - # fov: float in degrees - # aspect: float + # matrix : flattened 4x4 matrix + # fov: float in degrees + # aspect: float # camera_type: string of camera type # render_height: int # render_width: int @@ -642,18 +887,21 @@ def _(event: viser.GuiEvent) -> None: keyframes = [] for keyframe, dummy in camera_path._keyframes.values(): pose = tf.SE3.from_rotation_and_translation( - tf.SO3(keyframe.wxyz) @ tf.SO3.from_x_radians(onp.pi), + tf.SO3(keyframe.wxyz) @ tf.SO3.from_x_radians(np.pi), keyframe.position / VISER_NERFSTUDIO_SCALE_RATIO, ) keyframes.append( { "matrix": pose.as_matrix().flatten().tolist(), - "fov": onp.rad2deg(keyframe.override_fov_value) + "fov": np.rad2deg(keyframe.override_fov_rad) if keyframe.override_fov_enabled else fov_degrees.value, "aspect": keyframe.aspect, + "override_transition_enabled": keyframe.override_transition_enabled, + "override_transition_sec": keyframe.override_transition_sec, } ) + json_data["default_transition_sec"] = transition_sec_number.value json_data["keyframes"] = keyframes json_data["camera_type"] = camera_type.value.lower() json_data["render_height"] = resolution.value[1] @@ -661,38 +909,39 @@ def _(event: viser.GuiEvent) -> None: json_data["fps"] = framerate_number.value json_data["seconds"] = duration_number.value json_data["is_cycle"] = loop.value - json_data["smoothness_value"] = smoothness.value + json_data["smoothness_value"] = tension_slider.value # now populate the camera path: camera_path_list = [] for i in range(num_frames): - maybe_pose_and_fov = camera_path.interpolate_pose_and_fov(i / num_frames) + maybe_pose_and_fov = camera_path.interpolate_pose_and_fov_rad(i / num_frames) if maybe_pose_and_fov is None: return pose, fov = maybe_pose_and_fov # rotate the axis of the camera 180 about x axis pose = tf.SE3.from_rotation_and_translation( - pose.rotation() @ tf.SO3.from_x_radians(onp.pi), + pose.rotation() @ tf.SO3.from_x_radians(np.pi), pose.translation() / VISER_NERFSTUDIO_SCALE_RATIO, ) camera_path_list.append( { "camera_to_world": pose.as_matrix().flatten().tolist(), - "fov": onp.rad2deg(fov), + "fov": np.rad2deg(fov), "aspect": resolution.value[0] / resolution.value[1], } ) json_data["camera_path"] = camera_path_list # finally add crop data if crop is enabled - if control_panel.crop_viewport: - obb = control_panel.crop_obb - rpy = tf.SO3.from_matrix(obb.R.numpy()).as_rpy_radians() - color = control_panel.background_color - json_data["crop"] = { - "crop_center": obb.T.tolist(), - "crop_scale": obb.S.tolist(), - "crop_rot": [rpy.roll, rpy.pitch, rpy.yaw], - "crop_bg_color": {"r": color[0], "g": color[1], "b": color[2]}, - } + if control_panel is not None: + if control_panel.crop_viewport: + obb = control_panel.crop_obb + rpy = tf.SO3.from_matrix(obb.R.numpy()).as_rpy_radians() + color = control_panel.background_color + json_data["crop"] = { + "crop_center": obb.T.tolist(), + "crop_scale": obb.S.tolist(), + "crop_rot": [rpy.roll, rpy.pitch, rpy.yaw], + "crop_bg_color": {"r": color[0], "g": color[1], "b": color[2]}, + } # now write the json file json_outfile = datapath / "camera_paths" / f"{render_name_text.value}.json" @@ -727,7 +976,18 @@ def _(event: viser.GuiEvent) -> None: def _(_) -> None: modal.close() - camera_path = CameraPath(server) - camera_path.default_fov = fov_degrees.value / 180.0 * onp.pi + camera_path = CameraPath(server, duration_number) + camera_path.default_fov = fov_degrees.value / 180.0 * np.pi + camera_path.default_transition_sec = transition_sec_number.value transform_controls: List[viser.SceneNodeHandle] = [] + + +if __name__ == "__main__": + populate_render_tab( + server=viser.ViserServer(), + config_path=Path("."), + datapath=Path("."), + ) + while True: + time.sleep(10.0) diff --git a/pyproject.toml b/pyproject.toml index 0d8708ddab..780d2c6b5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ dependencies = [ "torchvision>=0.14.1", "torchmetrics[image]>=1.0.1", "typing_extensions>=4.4.0", - "viser==0.1.7", + "viser==0.1.12", "nuscenes-devkit>=1.1.1", "wandb>=0.13.3", "xatlas", From 8b85c44b276ac3ab482840c03b12b556ec98a18f Mon Sep 17 00:00:00 2001 From: Justin Kerr Date: Wed, 29 Nov 2023 19:42:42 -0800 Subject: [PATCH 074/101] Patch spiral rendering w/ parallel datamanager (#2637) * fix jittering in markdown in viewer beta * Revert "fix jittering in markdown in viewer beta" This reverts commit 70ade42e46f3dcf0e89e4efc445650f6d6525673. * print correctly formatted url in banner for viewer beta * allow parallel datamanager in spiral rendering * lint * Update render.py --------- Co-authored-by: Brent Yi --- nerfstudio/scripts/render.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/nerfstudio/scripts/render.py b/nerfstudio/scripts/render.py index d9d815ed71..79cc6c7bd9 100644 --- a/nerfstudio/scripts/render.py +++ b/nerfstudio/scripts/render.py @@ -61,6 +61,10 @@ VanillaDataManager, VanillaDataManagerConfig, ) +from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManager +from nerfstudio.data.datamanagers.random_cameras_datamanager import ( + RandomCamerasDataManager, +) from nerfstudio.data.datasets.base_dataset import Dataset from nerfstudio.data.scene_box import OrientedBox from nerfstudio.data.utils.dataloaders import FixedIndicesEvalDataloader @@ -666,7 +670,14 @@ def main(self) -> None: install_checks.check_ffmpeg_installed() - assert isinstance(pipeline.datamanager, VanillaDataManager) + assert isinstance( + pipeline.datamanager, + ( + VanillaDataManager, + ParallelDataManager, + RandomCamerasDataManager, + ), + ) steps = int(self.frame_rate * self.seconds) camera_start = pipeline.datamanager.eval_dataloader.get_camera(image_idx=0).flatten() camera_path = get_spiral_path(camera_start, steps=steps, radius=self.radius) From 5a772b177d2c8e28ec346758e57cc64eab5ec564 Mon Sep 17 00:00:00 2001 From: Yosshi999 Date: Thu, 30 Nov 2023 14:57:43 +0900 Subject: [PATCH 075/101] Bugfix: shape confusion in resizing (#2565) Co-authored-by: Justin Kerr Co-authored-by: Brent Yi --- nerfstudio/data/dataparsers/dycheck_dataparser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nerfstudio/data/dataparsers/dycheck_dataparser.py b/nerfstudio/data/dataparsers/dycheck_dataparser.py index 50a581f288..d9d4c8f4c6 100644 --- a/nerfstudio/data/dataparsers/dycheck_dataparser.py +++ b/nerfstudio/data/dataparsers/dycheck_dataparser.py @@ -322,7 +322,7 @@ def process_frames(self, frame_names: List[str], time_ids: np.ndarray) -> Tuple[ for frame in frame_names: cv2.imwrite( str(self.data / f"rgb/{d}x/{frame}.png"), - cv2.resize(cv2.imread(str(self.data / f"rgb/1x/{frame}.png")), (h, w)), + cv2.resize(cv2.imread(str(self.data / f"rgb/1x/{frame}.png")), (w, h)), ) CONSOLE.print("finished") From 3d8af872901318e8b2dd697857f210e8285f15b8 Mon Sep 17 00:00:00 2001 From: David Holtz <56723830+dmholtz@users.noreply.github.com> Date: Fri, 1 Dec 2023 00:00:31 +0100 Subject: [PATCH 076/101] Make nuScenes dataparser compatible with ParallelDataManager (#2635) The ParallelDataManager (see #2092) makes pytorch crash if the cameras instance's fx, fy, cx or cy tensors are loaded from a common shared tensor. This PR fixes the issue by cloning the respective tensors before passing them to the Cameras(...) constructor. --- nerfstudio/data/dataparsers/nuscenes_dataparser.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nerfstudio/data/dataparsers/nuscenes_dataparser.py b/nerfstudio/data/dataparsers/nuscenes_dataparser.py index 317717e860..0e899a68c9 100644 --- a/nerfstudio/data/dataparsers/nuscenes_dataparser.py +++ b/nerfstudio/data/dataparsers/nuscenes_dataparser.py @@ -200,10 +200,10 @@ def _generate_dataparser_outputs(self, split="train"): ) cameras = Cameras( - fx=intrinsics[:, 0, 0], - fy=intrinsics[:, 1, 1], - cx=intrinsics[:, 0, 2], - cy=intrinsics[:, 1, 2], + fx=intrinsics[:, 0, 0].detach().clone(), + fy=intrinsics[:, 1, 1].detach().clone(), + cx=intrinsics[:, 0, 2].detach().clone(), + cy=intrinsics[:, 1, 2].detach().clone(), height=900, width=1600, camera_to_worlds=poses[:, :3, :4], From 747dd557f67ddf66f9f0192b697e208780cffcb7 Mon Sep 17 00:00:00 2001 From: blacksino <44363764+blacksino@users.noreply.github.com> Date: Fri, 1 Dec 2023 07:07:26 +0800 Subject: [PATCH 077/101] fix include_input for NeRFEncoding (#2642) --- nerfstudio/field_components/encodings.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/nerfstudio/field_components/encodings.py b/nerfstudio/field_components/encodings.py index ee2aac93c8..15a2ce3ab4 100644 --- a/nerfstudio/field_components/encodings.py +++ b/nerfstudio/field_components/encodings.py @@ -169,17 +169,18 @@ def pytorch_fwd( encoded_inputs = expected_sin( torch.cat([scaled_inputs, scaled_inputs + torch.pi / 2.0], dim=-1), torch.cat(2 * [input_var], dim=-1) ) - - if self.include_input: - encoded_inputs = torch.cat([encoded_inputs, in_tensor], dim=-1) return encoded_inputs def forward( self, in_tensor: Float[Tensor, "*bs input_dim"], covs: Optional[Float[Tensor, "*bs input_dim input_dim"]] = None ) -> Float[Tensor, "*bs output_dim"]: if self.tcnn_encoding is not None: - return self.tcnn_encoding(in_tensor) - return self.pytorch_fwd(in_tensor, covs) + encoded_inputs = self.tcnn_encoding(in_tensor) + else: + encoded_inputs = self.pytorch_fwd(in_tensor, covs) + if self.include_input: + encoded_inputs = torch.cat([encoded_inputs, in_tensor], dim=-1) + return encoded_inputs class FFEncoding(Encoding): From 43a7da36a512cab9032c81b3b9496f908a7ab61c Mon Sep 17 00:00:00 2001 From: Jaggz H Date: Thu, 30 Nov 2023 15:51:36 -0800 Subject: [PATCH 078/101] Converted many @dataclass assignments for python 3.11 compatibility (#2630) Converted many @dataclass assignments to field()s for python 3.11 compatibility --- nerfstudio/configs/base_config.py | 4 ++-- nerfstudio/configs/experiment_config.py | 10 +++++----- nerfstudio/data/datamanagers/base_datamanager.py | 4 ++-- nerfstudio/data/dataparsers/base_dataparser.py | 2 +- nerfstudio/models/base_surface_model.py | 2 +- nerfstudio/models/nerfacto.py | 2 +- nerfstudio/models/tensorf.py | 2 +- nerfstudio/pipelines/base_pipeline.py | 4 ++-- 8 files changed, 15 insertions(+), 15 deletions(-) diff --git a/nerfstudio/configs/base_config.py b/nerfstudio/configs/base_config.py index b29661e116..c04c11aaac 100644 --- a/nerfstudio/configs/base_config.py +++ b/nerfstudio/configs/base_config.py @@ -17,7 +17,7 @@ from __future__ import annotations -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path from typing import Any, List, Literal, Optional, Tuple, Type @@ -113,7 +113,7 @@ class LoggingConfig(PrintableConfig): max_buffer_size: int = 20 """maximum history size to keep for computing running averages of stats. e.g. if 20, averages will be computed over past 20 occurrences.""" - local_writer: LocalWriterConfig = LocalWriterConfig(enable=True) + local_writer: LocalWriterConfig = field(default_factory=lambda: LocalWriterConfig(enable=True)) """if provided, will print stats locally. if None, will disable printing""" profiler: Literal["none", "basic", "pytorch"] = "basic" """how to profile the code; diff --git a/nerfstudio/configs/experiment_config.py b/nerfstudio/configs/experiment_config.py index d4b3d4de66..5686898b70 100644 --- a/nerfstudio/configs/experiment_config.py +++ b/nerfstudio/configs/experiment_config.py @@ -16,7 +16,7 @@ from __future__ import annotations -from dataclasses import dataclass +from dataclasses import dataclass, field from datetime import datetime from pathlib import Path from typing import Any, Dict, Literal, Optional @@ -51,13 +51,13 @@ class ExperimentConfig(InstantiateConfig): """Project name.""" timestamp: str = "{timestamp}" """Experiment timestamp.""" - machine: MachineConfig = MachineConfig() + machine: MachineConfig = field(default_factory=lambda: MachineConfig()) """Machine configuration""" - logging: LoggingConfig = LoggingConfig() + logging: LoggingConfig = field(default_factory=lambda: LoggingConfig()) """Logging configuration""" - viewer: ViewerConfig = ViewerConfig() + viewer: ViewerConfig = field(default_factory=lambda: ViewerConfig()) """Viewer configuration""" - pipeline: VanillaPipelineConfig = VanillaPipelineConfig() + pipeline: VanillaPipelineConfig = field(default_factory=lambda: VanillaPipelineConfig()) """Pipeline configuration""" optimizers: Dict[str, Any] = to_immutable_dict( { diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py index 9be95514c1..7971f46e56 100644 --- a/nerfstudio/data/datamanagers/base_datamanager.py +++ b/nerfstudio/data/datamanagers/base_datamanager.py @@ -317,7 +317,7 @@ class VanillaDataManagerConfig(DataManagerConfig): _target: Type = field(default_factory=lambda: VanillaDataManager) """Target class to instantiate.""" - dataparser: AnnotatedDataParserUnion = BlenderDataParserConfig() + dataparser: AnnotatedDataParserUnion = field(default_factory=lambda: BlenderDataParserConfig()) """Specifies the dataparser used to unpack the data.""" train_num_rays_per_batch: int = 1024 """Number of rays per batch to use per training iteration.""" @@ -345,7 +345,7 @@ class VanillaDataManagerConfig(DataManagerConfig): """Size of patch to sample from. If > 1, patch-based sampling will be used.""" camera_optimizer: Optional[CameraOptimizerConfig] = field(default=None) """Deprecated, has been moved to the model config.""" - pixel_sampler: PixelSamplerConfig = PixelSamplerConfig() + pixel_sampler: PixelSamplerConfig = field(default_factory=lambda: PixelSamplerConfig()) """Specifies the pixel sampler used to sample pixels from images.""" def __post_init__(self): diff --git a/nerfstudio/data/dataparsers/base_dataparser.py b/nerfstudio/data/dataparsers/base_dataparser.py index 80fab739f4..5cf1e6bdbf 100644 --- a/nerfstudio/data/dataparsers/base_dataparser.py +++ b/nerfstudio/data/dataparsers/base_dataparser.py @@ -57,7 +57,7 @@ class DataparserOutputs: """Camera object storing collection of camera information in dataset.""" alpha_color: Optional[Float[Tensor, "3"]] = None """Color of dataset background.""" - scene_box: SceneBox = SceneBox(aabb=torch.tensor([[-1, -1, -1], [1, 1, 1]])) + scene_box: SceneBox = field(default_factory=lambda: SceneBox(aabb=torch.tensor([[-1, -1, -1], [1, 1, 1]]))) """Scene box of dataset. Used to bound the scene or provide the scene scale depending on model.""" mask_filenames: Optional[List[Path]] = None """Filenames for any masks that are required""" diff --git a/nerfstudio/models/base_surface_model.py b/nerfstudio/models/base_surface_model.py index 638aa27130..97bde9d9f4 100644 --- a/nerfstudio/models/base_surface_model.py +++ b/nerfstudio/models/base_surface_model.py @@ -79,7 +79,7 @@ class SurfaceModelConfig(ModelConfig): """Monocular normal consistency loss multiplier.""" mono_depth_loss_mult: float = 0.0 """Monocular depth consistency loss multiplier.""" - sdf_field: SDFFieldConfig = SDFFieldConfig() + sdf_field: SDFFieldConfig = field(default_factory=lambda: SDFFieldConfig()) """Config for SDF Field""" background_model: Literal["grid", "mlp", "none"] = "mlp" """background models""" diff --git a/nerfstudio/models/nerfacto.py b/nerfstudio/models/nerfacto.py index df8eed9b65..667d23eb81 100644 --- a/nerfstudio/models/nerfacto.py +++ b/nerfstudio/models/nerfacto.py @@ -127,7 +127,7 @@ class NerfactoModelConfig(ModelConfig): """Which implementation to use for the model.""" appearance_embed_dim: int = 32 """Dimension of the appearance embedding.""" - camera_optimizer: CameraOptimizerConfig = CameraOptimizerConfig(mode="SO3xR3") + camera_optimizer: CameraOptimizerConfig = field(default_factory=lambda: CameraOptimizerConfig(mode="SO3xR3")) """Config of the camera optimizer to use""" diff --git a/nerfstudio/models/tensorf.py b/nerfstudio/models/tensorf.py index 53d986337e..0ca56f10de 100644 --- a/nerfstudio/models/tensorf.py +++ b/nerfstudio/models/tensorf.py @@ -90,7 +90,7 @@ class TensoRFModelConfig(ModelConfig): tensorf_encoding: Literal["triplane", "vm", "cp"] = "vm" regularization: Literal["none", "l1", "tv"] = "l1" """Regularization method used in tensorf paper""" - camera_optimizer: CameraOptimizerConfig = CameraOptimizerConfig(mode="SO3xR3") + camera_optimizer: CameraOptimizerConfig = field(default_factory=lambda: CameraOptimizerConfig(mode="SO3xR3")) """Config of the camera optimizer to use""" use_gradient_scaling: bool = False """Use gradient scaler where the gradients are lower for points closer to the camera.""" diff --git a/nerfstudio/pipelines/base_pipeline.py b/nerfstudio/pipelines/base_pipeline.py index 10aca0e70a..345a39d4b6 100644 --- a/nerfstudio/pipelines/base_pipeline.py +++ b/nerfstudio/pipelines/base_pipeline.py @@ -224,9 +224,9 @@ class VanillaPipelineConfig(cfg.InstantiateConfig): _target: Type = field(default_factory=lambda: VanillaPipeline) """target class to instantiate""" - datamanager: DataManagerConfig = DataManagerConfig() + datamanager: DataManagerConfig = field(default_factory=lambda: DataManagerConfig()) """specifies the datamanager config""" - model: ModelConfig = ModelConfig() + model: ModelConfig = field(default_factory=lambda: ModelConfig()) """specifies the model config""" From 49503c2175a71c3d71b75d5702c4ff9e8b0342cb Mon Sep 17 00:00:00 2001 From: Jose <34888496+Jerry-Master@users.noreply.github.com> Date: Fri, 1 Dec 2023 01:04:01 +0100 Subject: [PATCH 079/101] Mps fix (#2436) * mps bugfix * removed docs change * Double quotes --------- Co-authored-by: Brent Yi --- nerfstudio/engine/trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py index fe4aa85cea..4cc296063e 100644 --- a/nerfstudio/engine/trainer.py +++ b/nerfstudio/engine/trainer.py @@ -464,6 +464,7 @@ def train_iteration(self, step: int) -> TRAIN_INTERATION_OUTPUT: self.optimizers.zero_grad_all() cpu_or_cuda_str: str = self.device.split(":")[0] + cpu_or_cuda_str = "cpu" if cpu_or_cuda_str == "mps" else cpu_or_cuda_str assert ( self.gradient_accumulation_steps > 0 ), f"gradient_accumulation_steps must be > 0, not {self.gradient_accumulation_steps}" From 0cb410001b1fff668e51b12e7ff579711c1187f9 Mon Sep 17 00:00:00 2001 From: Panteleris Paschalis Date: Fri, 1 Dec 2023 03:11:11 +0200 Subject: [PATCH 080/101] Fixes bug in PairPixelSampler when working with masked dataset (#2368) * Fixes bug in PairPixelSampler when working with masked dataset * Formatting * Black formatting now ok. --------- Co-authored-by: AdamRashid96 <71362382+AdamRashid96@users.noreply.github.com> Co-authored-by: Brent Yi --- nerfstudio/data/pixel_samplers.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/nerfstudio/data/pixel_samplers.py b/nerfstudio/data/pixel_samplers.py index 9234a5d420..07bab5d826 100644 --- a/nerfstudio/data/pixel_samplers.py +++ b/nerfstudio/data/pixel_samplers.py @@ -17,13 +17,7 @@ """ import random - -import torch -from jaxtyping import Int -from torch import Tensor - from dataclasses import dataclass, field -from nerfstudio.data.utils.pixel_sampling_utils import erode_mask from typing import ( Dict, Optional, @@ -31,9 +25,14 @@ Union, ) +import torch +from jaxtyping import Int +from torch import Tensor + from nerfstudio.configs.base_config import ( InstantiateConfig, ) +from nerfstudio.data.utils.pixel_sampling_utils import erode_mask @dataclass @@ -398,19 +397,18 @@ def sample_method( # pylint: disable=no-self-use device: Union[torch.device, str] = "cpu", ) -> Int[Tensor, "batch_size 3"]: rays_to_sample = self.rays_to_sample + if batch_size is not None: + assert ( + int(batch_size) % 2 == 0 + ), f"PairPixelSampler can only return batch sizes in multiples of two (got {batch_size})" + rays_to_sample = batch_size // 2 + if isinstance(mask, Tensor): m = erode_mask(mask.permute(0, 3, 1, 2).float(), pixel_radius=self.radius) nonzero_indices = torch.nonzero(m[:, 0], as_tuple=False).to(device) chosen_indices = random.sample(range(len(nonzero_indices)), k=rays_to_sample) indices = nonzero_indices[chosen_indices] else: - rays_to_sample = self.rays_to_sample - if batch_size is not None: - assert ( - int(batch_size) % 2 == 0 - ), f"PairPixelSampler can only return batch sizes in multiples of two (got {batch_size})" - rays_to_sample = batch_size // 2 - s = (rays_to_sample, 1) ns = torch.randint(0, num_images, s, dtype=torch.long, device=device) hs = torch.randint(self.radius, image_height - self.radius, s, dtype=torch.long, device=device) From 4c627edbacceec7d6e9c001e94ae3469b59c7a91 Mon Sep 17 00:00:00 2001 From: Ethan Weber Date: Thu, 30 Nov 2023 19:16:42 -0800 Subject: [PATCH 081/101] Encoder + MLP combo (#2063) * adding encoder + mlp combo * update documentation * minor fixes * fixed issue when tcnn isn't installed --------- Co-authored-by: Brent Yi --- nerfstudio/field_components/encodings.py | 88 ++++++++---- nerfstudio/field_components/mlp.py | 176 +++++++++++++++++++---- nerfstudio/fields/nerfacto_field.py | 13 +- 3 files changed, 215 insertions(+), 62 deletions(-) diff --git a/nerfstudio/field_components/encodings.py b/nerfstudio/field_components/encodings.py index 15a2ce3ab4..dc410673b9 100644 --- a/nerfstudio/field_components/encodings.py +++ b/nerfstudio/field_components/encodings.py @@ -48,6 +48,11 @@ def __init__(self, in_dim: int) -> None: raise ValueError("Input dimension should be greater than zero") super().__init__(in_dim=in_dim) + @classmethod + def get_tcnn_encoding_config(cls) -> dict: + """Get the encoding configuration for tcnn if implemented""" + raise NotImplementedError("Encoding does not have a TCNN implementation") + @abstractmethod def forward(self, in_tensor: Shaped[Tensor, "*bs input_dim"]) -> Shaped[Tensor, "*bs output_dim"]: """Call forward and returns and processed tensor @@ -126,14 +131,20 @@ def __init__( if implementation == "tcnn" and not TCNN_EXISTS: print_tcnn_speed_warning("NeRFEncoding") elif implementation == "tcnn": - encoding_config = {"otype": "Frequency", "n_frequencies": num_frequencies} assert min_freq_exp == 0, "tcnn only supports min_freq_exp = 0" assert max_freq_exp == num_frequencies - 1, "tcnn only supports max_freq_exp = num_frequencies - 1" + encoding_config = self.get_tcnn_encoding_config(num_frequencies=self.num_frequencies) self.tcnn_encoding = tcnn.Encoding( n_input_dims=in_dim, encoding_config=encoding_config, ) + @classmethod + def get_tcnn_encoding_config(cls, num_frequencies) -> dict: + """Get the encoding configuration for tcnn if implemented""" + encoding_config = {"otype": "Frequency", "n_frequencies": num_frequencies} + return encoding_config + def get_out_dim(self) -> int: if self.in_dim is None: raise ValueError("Input dimension has not been set") @@ -327,48 +338,67 @@ def __init__( ) -> None: super().__init__(in_dim=3) self.num_levels = num_levels + self.min_res = min_res self.features_per_level = features_per_level + self.hash_init_scale = hash_init_scale self.log2_hashmap_size = log2_hashmap_size self.hash_table_size = 2**log2_hashmap_size levels = torch.arange(num_levels) - growth_factor = np.exp((np.log(max_res) - np.log(min_res)) / (num_levels - 1)) if num_levels > 1 else 1 - self.scalings = torch.floor(min_res * growth_factor**levels) + self.growth_factor = np.exp((np.log(max_res) - np.log(min_res)) / (num_levels - 1)) if num_levels > 1 else 1 + self.scalings = torch.floor(min_res * self.growth_factor**levels) self.hash_offset = levels * self.hash_table_size self.tcnn_encoding = None self.hash_table = torch.empty(0) - if implementation == "tcnn" and not TCNN_EXISTS: + if implementation == "torch": + self.build_nn_modules() + elif implementation == "tcnn" and not TCNN_EXISTS: print_tcnn_speed_warning("HashEncoding") - implementation = "torch" - - if implementation == "tcnn": - encoding_config = { - "otype": "HashGrid", - "n_levels": self.num_levels, - "n_features_per_level": self.features_per_level, - "log2_hashmap_size": self.log2_hashmap_size, - "base_resolution": min_res, - "per_level_scale": growth_factor, - } - if interpolation is not None: - encoding_config["interpolation"] = interpolation - + self.build_nn_modules() + elif implementation == "tcnn": + encoding_config = self.get_tcnn_encoding_config( + num_levels=self.num_levels, + features_per_level=self.features_per_level, + log2_hashmap_size=self.log2_hashmap_size, + min_res=self.min_res, + growth_factor=self.growth_factor, + interpolation=interpolation, + ) self.tcnn_encoding = tcnn.Encoding( n_input_dims=3, encoding_config=encoding_config, ) - elif implementation == "torch": - self.hash_table = torch.rand(size=(self.hash_table_size * num_levels, features_per_level)) * 2 - 1 - self.hash_table *= hash_init_scale - self.hash_table = nn.Parameter(self.hash_table) if self.tcnn_encoding is None: assert ( interpolation is None or interpolation == "Linear" ), f"interpolation '{interpolation}' is not supported for torch encoding backend" + def build_nn_modules(self) -> None: + """Initialize the torch version of the hash encoding.""" + self.hash_table = torch.rand(size=(self.hash_table_size * self.num_levels, self.features_per_level)) * 2 - 1 + self.hash_table *= self.hash_init_scale + self.hash_table = nn.Parameter(self.hash_table) + + @classmethod + def get_tcnn_encoding_config( + cls, num_levels, features_per_level, log2_hashmap_size, min_res, growth_factor, interpolation=None + ) -> dict: + """Get the encoding configuration for tcnn if implemented""" + encoding_config = { + "otype": "HashGrid", + "n_levels": num_levels, + "n_features_per_level": features_per_level, + "log2_hashmap_size": log2_hashmap_size, + "base_resolution": min_res, + "per_level_scale": growth_factor, + } + if interpolation is not None: + encoding_config["interpolation"] = interpolation + return encoding_config + def get_out_dim(self) -> int: return self.num_levels * self.features_per_level @@ -745,15 +775,21 @@ def __init__(self, levels: int = 4, implementation: Literal["tcnn", "torch"] = " if implementation == "tcnn" and not TCNN_EXISTS: print_tcnn_speed_warning("SHEncoding") elif implementation == "tcnn": - encoding_config = { - "otype": "SphericalHarmonics", - "degree": levels, - } + encoding_config = self.get_tcnn_encoding_config(levels=self.levels) self.tcnn_encoding = tcnn.Encoding( n_input_dims=3, encoding_config=encoding_config, ) + @classmethod + def get_tcnn_encoding_config(cls, levels) -> dict: + """Get the encoding configuration for tcnn if implemented""" + encoding_config = { + "otype": "SphericalHarmonics", + "degree": levels, + } + return encoding_config + def get_out_dim(self) -> int: return self.levels**2 diff --git a/nerfstudio/field_components/mlp.py b/nerfstudio/field_components/mlp.py index 2585a49e4e..94e6b1eb49 100644 --- a/nerfstudio/field_components/mlp.py +++ b/nerfstudio/field_components/mlp.py @@ -17,12 +17,14 @@ """ from typing import Literal, Optional, Set, Tuple, Union +import numpy as np import torch from jaxtyping import Float from torch import Tensor, nn from nerfstudio.field_components.base_field_component import FieldComponent from nerfstudio.utils.printing import print_tcnn_speed_warning +from nerfstudio.field_components.encodings import HashEncoding from nerfstudio.utils.rich_utils import CONSOLE from nerfstudio.utils.external import TCNN_EXISTS, tcnn @@ -66,6 +68,7 @@ class MLP(FieldComponent): out_dim: Output layer dimension. Uses layer_width if None. activation: intermediate layer activation function. out_activation: output activation function. + implementation: Implementation of hash encoding. Fallback to torch if tcnn not available. """ def __init__( @@ -98,39 +101,47 @@ def __init__( print_tcnn_speed_warning("MLP") self.build_nn_modules() elif implementation == "tcnn": - activation_str = activation_to_tcnn_string(activation) - output_activation_str = activation_to_tcnn_string(out_activation) - if layer_width in [16, 32, 64, 128]: - network_config = { - "otype": "FullyFusedMLP", - "activation": activation_str, - "output_activation": output_activation_str, - "n_neurons": layer_width, - "n_hidden_layers": num_layers - 1, - } - else: - CONSOLE.line() - CONSOLE.print("[bold yellow]WARNING: Using slower TCNN CutlassMLP instead of TCNN FullyFusedMLP") - CONSOLE.print( - "[bold yellow]Use layer width of 16, 32, 64, or 128 to use the faster TCNN FullyFusedMLP." - ) - CONSOLE.line() - network_config = { - "otype": "CutlassMLP", - "activation": activation_str, - "output_activation": output_activation_str, - "n_neurons": layer_width, - "n_hidden_layers": num_layers - 1, - } - + network_config = self.get_tcnn_network_config( + activation=self.activation, + out_activation=self.out_activation, + layer_width=self.layer_width, + num_layers=self.num_layers, + ) self.tcnn_encoding = tcnn.Network( n_input_dims=in_dim, - n_output_dims=out_dim, + n_output_dims=self.out_dim, network_config=network_config, ) + @classmethod + def get_tcnn_network_config(cls, activation, out_activation, layer_width, num_layers) -> dict: + """Get the network configuration for tcnn if implemented""" + activation_str = activation_to_tcnn_string(activation) + output_activation_str = activation_to_tcnn_string(out_activation) + if layer_width in [16, 32, 64, 128]: + network_config = { + "otype": "FullyFusedMLP", + "activation": activation_str, + "output_activation": output_activation_str, + "n_neurons": layer_width, + "n_hidden_layers": num_layers - 1, + } + else: + CONSOLE.line() + CONSOLE.print("[bold yellow]WARNING: Using slower TCNN CutlassMLP instead of TCNN FullyFusedMLP") + CONSOLE.print("[bold yellow]Use layer width of 16, 32, 64, or 128 to use the faster TCNN FullyFusedMLP.") + CONSOLE.line() + network_config = { + "otype": "CutlassMLP", + "activation": activation_str, + "output_activation": output_activation_str, + "n_neurons": layer_width, + "n_hidden_layers": num_layers - 1, + } + return network_config + def build_nn_modules(self) -> None: - """Initialize multi-layer perceptron.""" + """Initialize the torch version of the multi-layer perceptron.""" layers = [] if self.num_layers == 1: layers.append(nn.Linear(self.in_dim, self.out_dim)) @@ -171,3 +182,114 @@ def forward(self, in_tensor: Float[Tensor, "*bs in_dim"]) -> Float[Tensor, "*bs if self.tcnn_encoding is not None: return self.tcnn_encoding(in_tensor) return self.pytorch_fwd(in_tensor) + + +class MLPWithHashEncoding(FieldComponent): + """Multilayer perceptron with hash encoding + + Args: + num_levels: Number of feature grids. + min_res: Resolution of smallest feature grid. + max_res: Resolution of largest feature grid. + log2_hashmap_size: Size of hash map is 2^log2_hashmap_size. + features_per_level: Number of features per level. + hash_init_scale: Value to initialize hash grid. + interpolation: Interpolation override for tcnn hashgrid. Not supported for torch unless linear. + num_layers: Number of network layers + layer_width: Width of each MLP layer + out_dim: Output layer dimension. Uses layer_width if None. + activation: intermediate layer activation function. + out_activation: output activation function. + implementation: Implementation of hash encoding. Fallback to torch if tcnn not available. + """ + + def __init__( + self, + num_levels: int = 16, + min_res: int = 16, + max_res: int = 1024, + log2_hashmap_size: int = 19, + features_per_level: int = 2, + hash_init_scale: float = 0.001, + interpolation: Optional[Literal["Nearest", "Linear", "Smoothstep"]] = None, + num_layers: int = 2, + layer_width: int = 64, + out_dim: Optional[int] = None, + skip_connections: Optional[Tuple[int]] = None, + activation: Optional[nn.Module] = nn.ReLU(), + out_activation: Optional[nn.Module] = None, + implementation: Literal["tcnn", "torch"] = "torch", + ) -> None: + super().__init__() + self.in_dim = 3 + + self.num_levels = num_levels + self.min_res = min_res + self.max_res = max_res + self.features_per_level = features_per_level + self.hash_init_scale = hash_init_scale + self.log2_hashmap_size = log2_hashmap_size + self.hash_table_size = 2**log2_hashmap_size + + self.growth_factor = np.exp((np.log(max_res) - np.log(min_res)) / (num_levels - 1)) if num_levels > 1 else 1 + + self.out_dim = out_dim if out_dim is not None else layer_width + self.num_layers = num_layers + self.layer_width = layer_width + self.skip_connections = skip_connections + self._skip_connections: Set[int] = set(skip_connections) if skip_connections else set() + self.activation = activation + self.out_activation = out_activation + self.net = None + + self.tcnn_encoding = None + if implementation == "torch": + self.build_nn_modules() + elif implementation == "tcnn" and not TCNN_EXISTS: + print_tcnn_speed_warning("MLPWithHashEncoding") + self.build_nn_modules() + elif implementation == "tcnn": + self.model = tcnn.NetworkWithInputEncoding( + n_input_dims=self.in_dim, + n_output_dims=self.out_dim, + encoding_config=HashEncoding.get_tcnn_encoding_config( + num_levels=self.num_levels, + features_per_level=self.features_per_level, + log2_hashmap_size=self.log2_hashmap_size, + min_res=self.min_res, + growth_factor=self.growth_factor, + interpolation=interpolation, + ), + network_config=MLP.get_tcnn_network_config( + activation=self.activation, + out_activation=self.out_activation, + layer_width=self.layer_width, + num_layers=self.num_layers, + ), + ) + + def build_nn_modules(self) -> None: + """Initialize the torch version of the MLP with hash encoding.""" + encoder = HashEncoding( + num_levels=self.num_levels, + min_res=self.min_res, + max_res=self.max_res, + log2_hashmap_size=self.log2_hashmap_size, + features_per_level=self.features_per_level, + hash_init_scale=self.hash_init_scale, + implementation="torch", + ) + mlp = MLP( + in_dim=encoder.get_out_dim(), + num_layers=self.num_layers, + layer_width=self.layer_width, + out_dim=self.out_dim, + skip_connections=self.skip_connections, + activation=self.activation, + out_activation=self.out_activation, + implementation="torch", + ) + self.model = torch.nn.Sequential(encoder, mlp) + + def forward(self, in_tensor: Float[Tensor, "*bs in_dim"]) -> Float[Tensor, "*bs out_dim"]: + return self.model(in_tensor) diff --git a/nerfstudio/fields/nerfacto_field.py b/nerfstudio/fields/nerfacto_field.py index f215a00458..910bba8465 100644 --- a/nerfstudio/fields/nerfacto_field.py +++ b/nerfstudio/fields/nerfacto_field.py @@ -26,7 +26,7 @@ from nerfstudio.data.scene_box import SceneBox from nerfstudio.field_components.activations import trunc_exp from nerfstudio.field_components.embedding import Embedding -from nerfstudio.field_components.encodings import HashEncoding, NeRFEncoding, SHEncoding +from nerfstudio.field_components.encodings import NeRFEncoding, SHEncoding from nerfstudio.field_components.field_heads import ( FieldHeadNames, PredNormalsFieldHead, @@ -35,13 +35,13 @@ TransientRGBFieldHead, UncertaintyFieldHead, ) -from nerfstudio.field_components.mlp import MLP +from nerfstudio.field_components.mlp import MLP, MLPWithHashEncoding from nerfstudio.field_components.spatial_distortions import SpatialDistortion from nerfstudio.fields.base_field import Field, get_normalized_directions class NerfactoField(Field): - """Compound Field that uses TCNN + """Compound Field Args: aabb: parameters of scene aabb bounds @@ -127,16 +127,12 @@ def __init__( in_dim=3, num_frequencies=2, min_freq_exp=0, max_freq_exp=2 - 1, implementation=implementation ) - self.mlp_base_grid = HashEncoding( + self.mlp_base = MLPWithHashEncoding( num_levels=num_levels, min_res=base_res, max_res=max_res, log2_hashmap_size=log2_hashmap_size, features_per_level=features_per_level, - implementation=implementation, - ) - self.mlp_base_mlp = MLP( - in_dim=self.mlp_base_grid.get_out_dim(), num_layers=num_layers, layer_width=hidden_dim, out_dim=1 + self.geo_feat_dim, @@ -144,7 +140,6 @@ def __init__( out_activation=None, implementation=implementation, ) - self.mlp_base = torch.nn.Sequential(self.mlp_base_grid, self.mlp_base_mlp) # transients if self.use_transient_embedding: From 64f0b2547ba02b1e2aeef719de95b4e7b1aa188a Mon Sep 17 00:00:00 2001 From: AdamRashid96 <71362382+AdamRashid96@users.noreply.github.com> Date: Wed, 6 Dec 2023 22:56:39 -0800 Subject: [PATCH 082/101] Ns-process-data bug with dev version of colmap (#2651) * only parse digit and decimal * formatting --- nerfstudio/process_data/colmap_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nerfstudio/process_data/colmap_utils.py b/nerfstudio/process_data/colmap_utils.py index 83188c005b..27d6e86150 100644 --- a/nerfstudio/process_data/colmap_utils.py +++ b/nerfstudio/process_data/colmap_utils.py @@ -55,7 +55,9 @@ def get_colmap_version(colmap_cmd: str, default_version=3.8) -> float: assert output is not None for line in output.split("\n"): if line.startswith("COLMAP"): - return float(line.split(" ")[1]) + version = line.split(" ")[1] + version = "".join([c for c in version if c.isdigit() or c == "."]) + return float(version) CONSOLE.print(f"[bold red]Could not find COLMAP version. Using default {default_version}") return default_version From 4c966b9bd4bab5a4089e5ca767b4a6af9b3e1bf6 Mon Sep 17 00:00:00 2001 From: omahs <73983677+omahs@users.noreply.github.com> Date: Fri, 8 Dec 2023 12:27:05 +0100 Subject: [PATCH 083/101] Fix typos (#2655) * fix typo * fix typo * fix typos * fix typo --- docs/developer_guides/config.md | 2 +- docs/developer_guides/new_methods.md | 2 +- docs/developer_guides/viewer/local_viewer.md | 2 +- docs/index.md | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/developer_guides/config.md b/docs/developer_guides/config.md index bf216278f3..acf6914d7f 100644 --- a/docs/developer_guides/config.md +++ b/docs/developer_guides/config.md @@ -89,7 +89,7 @@ Often times, you just want to play with the parameters of an existing model with ns-train --help ``` -- List out all exist configurable parameters for `{METHOD_NAME}` +- List out all existing configurable parameters for `{METHOD_NAME}` ```bash ns-train {METHOD_NAME} --help diff --git a/docs/developer_guides/new_methods.md b/docs/developer_guides/new_methods.md index 4b56d85810..dd1ef31e76 100644 --- a/docs/developer_guides/new_methods.md +++ b/docs/developer_guides/new_methods.md @@ -132,7 +132,7 @@ finally run the following to register the dataparser. pip install -e . ``` -Similarly to the method develomement, you can also use environment variables to register dataparsers. +Similarly to the method development, you can also use environment variables to register dataparsers. Use the `NERFSTUDIO_DATAPARSER_CONFIGS` environment variable: ``` diff --git a/docs/developer_guides/viewer/local_viewer.md b/docs/developer_guides/viewer/local_viewer.md index d5912bd891..2931f8c0d6 100644 --- a/docs/developer_guides/viewer/local_viewer.md +++ b/docs/developer_guides/viewer/local_viewer.md @@ -1,6 +1,6 @@ # Local Server -If you are unable to connect to `https://viewer.nerf.studio`, want to use Safari, or want develop the viewer codebase, you can launch your own local viewer. +If you are unable to connect to `https://viewer.nerf.studio`, want to use Safari, or want to develop the viewer codebase, you can launch your own local viewer. ## Installing Dependencies diff --git a/docs/index.md b/docs/index.md index 9e294c3ac2..6b160fcecc 100644 --- a/docs/index.md +++ b/docs/index.md @@ -135,7 +135,7 @@ This documentation is organized into 3 parts: ### Included Methods -- [**Nerfacto**](nerfology/methods/nerfacto.md): Recommended method, integrates mutiple methods into one. +- [**Nerfacto**](nerfology/methods/nerfacto.md): Recommended method, integrates multiple methods into one. - [Instant-NGP](nerfology/methods/instant_ngp.md): Instant Neural Graphics Primitives with a Multiresolution Hash Encoding - [NeRF](nerfology/methods/nerf.md): OG Neural Radiance Fields - [Mip-NeRF](nerfology/methods/mipnerf.md): A Multiscale Representation for Anti-Aliasing Neural Radiance Fields From 1d9cc7dd8a4f1c5fdc8f05dd4ea3b5c3ef6bb85a Mon Sep 17 00:00:00 2001 From: Alexandru Kis Date: Fri, 8 Dec 2023 18:58:51 +0200 Subject: [PATCH 084/101] Update hloc and add new matcher types (#2658) * Update hloc and add new matcher types * Update dockerfile comment to better reflect what the command is doing --------- Co-authored-by: mfischer --- Dockerfile | 3 ++- .../colmap_converter_to_nerfstudio_dataset.py | 2 ++ nerfstudio/process_data/hloc_utils.py | 9 +++++++- nerfstudio/process_data/process_data_utils.py | 23 +++++++++++++++++-- 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 300d00df3a..8c9fe4f31c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -135,9 +135,10 @@ RUN git clone --branch v0.4.0 --recursive https://github.com/colmap/pycolmap.git python3.10 -m pip install . && \ cd .. -# Install hloc master (last release (1.3) is too old) as alternative feature detector and matcher option for nerfstudio. +# Install hloc 1.4 as alternative feature detector and matcher option for nerfstudio. RUN git clone --branch master --recursive https://github.com/cvg/Hierarchical-Localization.git && \ cd Hierarchical-Localization && \ + git checkout v1.4 && \ python3.10 -m pip install -e . && \ cd .. diff --git a/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py b/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py index b4dccbdd82..924c4b752d 100644 --- a/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py +++ b/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py @@ -66,6 +66,8 @@ class ColmapConverterToNerfstudioDataset(BaseConverterToNerfstudioDataset): "NN-ratio", "NN-mutual", "adalam", + "disk+lightglue", + "superpoint+lightglue", ] = "any" """Matching algorithm.""" num_downscales: int = 3 diff --git a/nerfstudio/process_data/hloc_utils.py b/nerfstudio/process_data/hloc_utils.py index a833f51edd..b7167efd88 100644 --- a/nerfstudio/process_data/hloc_utils.py +++ b/nerfstudio/process_data/hloc_utils.py @@ -60,7 +60,14 @@ def run_hloc( "sift", "superpoint_aachen", "superpoint_max", "superpoint_inloc", "r2d2", "d2net-ss", "sosnet", "disk" ] = "superpoint_aachen", matcher_type: Literal[ - "superglue", "superglue-fast", "NN-superpoint", "NN-ratio", "NN-mutual", "adalam" + "superglue", + "superglue-fast", + "NN-superpoint", + "NN-ratio", + "NN-mutual", + "adalam", + "disk+lightglue", + "superpoint+lightglue", ] = "superglue", num_matched: int = 50, refine_pixsfm: bool = False, diff --git a/nerfstudio/process_data/process_data_utils.py b/nerfstudio/process_data/process_data_utils.py index 334dab4de0..fe24e55f27 100644 --- a/nerfstudio/process_data/process_data_utils.py +++ b/nerfstudio/process_data/process_data_utils.py @@ -477,7 +477,16 @@ def find_tool_feature_matcher_combination( "disk", ], matcher_type: Literal[ - "any", "NN", "superglue", "superglue-fast", "NN-superpoint", "NN-ratio", "NN-mutual", "adalam" + "any", + "NN", + "superglue", + "superglue-fast", + "NN-superpoint", + "NN-ratio", + "NN-mutual", + "adalam", + "disk+lightglue", + "superpoint+lightglue", ], ) -> Union[ Tuple[None, None, None], @@ -493,7 +502,17 @@ def find_tool_feature_matcher_combination( "sosnet", "disk", ], - Literal["NN", "superglue", "superglue-fast", "NN-superpoint", "NN-ratio", "NN-mutual", "adalam"], + Literal[ + "NN", + "superglue", + "superglue-fast", + "NN-superpoint", + "NN-ratio", + "NN-mutual", + "adalam", + "disk+lightglue", + "superpoint+lightglue", + ], ], ]: """Find a valid combination of sfm tool, feature type, and matcher type. From 390b63373dd65c7ecab42e43392fa93420d1e6b0 Mon Sep 17 00:00:00 2001 From: Paul Wais Date: Mon, 11 Dec 2023 05:55:36 -0800 Subject: [PATCH 085/101] Trainer can now always save dataparser transform (#2653) * Trainer can now always save dataparser transform * appease the linter --------- Co-authored-by: Brent Yi --- nerfstudio/engine/trainer.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py index 4cc296063e..2930db892e 100644 --- a/nerfstudio/engine/trainer.py +++ b/nerfstudio/engine/trainer.py @@ -28,7 +28,6 @@ import torch from nerfstudio.configs.experiment_config import ExperimentConfig -from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes, TrainingCallbackLocation from nerfstudio.engine.optimizers import Optimizers from nerfstudio.pipelines.base_pipeline import VanillaPipeline @@ -225,11 +224,9 @@ def train(self) -> None: """Train the model.""" assert self.pipeline.datamanager.train_dataset is not None, "Missing DatsetInputs" - # don't want to call save_dataparser_transform if pipeline's datamanager does not have a dataparser - if isinstance(self.pipeline.datamanager, VanillaDataManager): - self.pipeline.datamanager.train_dataparser_outputs.save_dataparser_transform( - self.base_dir / "dataparser_transforms.json" - ) + self.pipeline.datamanager.train_dataparser_outputs.save_dataparser_transform( + self.base_dir / "dataparser_transforms.json" + ) self._init_viewer_state() with TimeWriter(writer, EventName.TOTAL_TRAIN_TIME): From 73fc3dcd6305001818f45ca7a1392de330a2c337 Mon Sep 17 00:00:00 2001 From: Brent Yi Date: Mon, 11 Dec 2023 18:08:17 +0000 Subject: [PATCH 086/101] Update dependencies for M1 Macs (#2665) * Update dependencies for M1 Macs * import newrawpy as rawpy * Comment * Add back rawpy for Linux * Run black --- nerfstudio/process_data/process_data_utils.py | 25 +++++++++++++++---- pyproject.toml | 7 ++++-- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/nerfstudio/process_data/process_data_utils.py b/nerfstudio/process_data/process_data_utils.py index fe24e55f27..c1946305b3 100644 --- a/nerfstudio/process_data/process_data_utils.py +++ b/nerfstudio/process_data/process_data_utils.py @@ -24,9 +24,13 @@ import cv2 import imageio -import numpy as np -import rawpy +try: + import rawpy +except ImportError: + import newrawpy as rawpy # type: ignore + +import numpy as np from nerfstudio.utils.rich_utils import CONSOLE, status from nerfstudio.utils.scripts import run_command @@ -352,7 +356,11 @@ def copy_and_upscale_polycam_depth_maps_list( depth_dir.mkdir(parents=True, exist_ok=True) # copy and upscale them to new directory - with status(msg="[bold yellow] Upscaling depth maps...", spinner="growVertical", verbose=verbose): + with status( + msg="[bold yellow] Upscaling depth maps...", + spinner="growVertical", + verbose=verbose, + ): upscale_factor = 2**POLYCAM_UPSCALING_TIMES assert upscale_factor > 1 assert isinstance(upscale_factor, int) @@ -437,7 +445,11 @@ def downscale_images( if num_downscales == 0: return "No downscaling performed." - with status(msg="[bold yellow]Downscaling images...", spinner="growVertical", verbose=verbose): + with status( + msg="[bold yellow]Downscaling images...", + spinner="growVertical", + verbose=verbose, + ): downscale_factors = [2**i for i in range(num_downscales + 1)[1:]] for downscale_factor in downscale_factors: assert downscale_factor > 1 @@ -600,7 +612,10 @@ def generate_crop_mask(height: int, width: int, crop_factor: Tuple[float, float, def generate_mask( - height: int, width: int, crop_factor: Tuple[float, float, float, float], percent_radius: float + height: int, + width: int, + crop_factor: Tuple[float, float, float, float], + percent_radius: float, ) -> Optional[np.ndarray]: """generate a mask of the given size. diff --git a/pyproject.toml b/pyproject.toml index 780d2c6b5a..4c589f6808 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,11 +39,14 @@ dependencies = [ "protobuf<=3.20.3,!=3.20.0", # TODO(1480) enable when pycolmap windows wheels are available # "pycolmap==0.3.0", - "pymeshlab>=2022.2.post2", + "pymeshlab>=2022.2.post2; platform_machine != 'arm64'", "pyngrok>=5.1.0", "python-socketio>=5.7.1", "pyquaternion>=0.9.9", - "rawpy>=0.18.1", + # TODO we can switch back to (non-new) rawpy if they start releasing arm64 + # wheels. https://github.com/letmaik/rawpy/issues/171#issuecomment-1572627747 + "rawpy>=0.18.1; platform_machine != 'arm64'", + "newrawpy>=0.18.1; platform_machine == 'arm64'", "requests", "rich>=12.5.1", "scikit-image>=0.19.3", From 281f81f1ec75c2be6d5793d617896b9b1ca8114d Mon Sep 17 00:00:00 2001 From: Mohit Motwani Date: Tue, 12 Dec 2023 16:43:50 +0530 Subject: [PATCH 087/101] Update cameras.py - removed duplicate lines (#2661) The self.times was being computed twice in cameras.py in the same function which was unnecessary --- nerfstudio/cameras/cameras.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/nerfstudio/cameras/cameras.py b/nerfstudio/cameras/cameras.py index c1b988dbbd..467c8fcc01 100644 --- a/nerfstudio/cameras/cameras.py +++ b/nerfstudio/cameras/cameras.py @@ -872,8 +872,6 @@ def _compute_rays_for_vr180( else: metadata = {"directions_norm": directions_norm[0].detach()} - times = self.times[camera_indices, 0] if self.times is not None else None - return RayBundle( origins=origins, directions=directions, From 01d57b90dc37566f1cf10754cd745226d700af95 Mon Sep 17 00:00:00 2001 From: Brent Yi Date: Tue, 12 Dec 2023 11:44:26 +0000 Subject: [PATCH 088/101] Fix large dataset (> 512 images) training in viewer_beta (#2669) * Fix large dataset (> 512 images) training in viewer_beta (#2586) * fix index bug in update_camera_poses of viewer * Set Ruff line lenth --------- Co-authored-by: Gina Wu <42229107+ginazhouhuiwu@users.noreply.github.com> --- nerfstudio/viewer_beta/viewer.py | 10 +++++----- pyproject.toml | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py index e577ca5c37..b01c35f4b5 100644 --- a/nerfstudio/viewer_beta/viewer.py +++ b/nerfstudio/viewer_beta/viewer.py @@ -259,15 +259,15 @@ def update_camera_poses(self): with torch.no_grad(): assert isinstance(camera_optimizer, CameraOptimizer) c2ws_delta = camera_optimizer(torch.tensor(idxs, device=camera_optimizer.device)).cpu().numpy() - for idx in idxs: + for i, key in enumerate(idxs): # both are numpy arrays - c2w_orig = self.original_c2w[idx] - c2w_delta = c2ws_delta[idx, ...] + c2w_orig = self.original_c2w[key] + c2w_delta = c2ws_delta[i, ...] c2w = c2w_orig @ np.concatenate((c2w_delta, np.array([[0, 0, 0, 1]])), axis=0) R = vtf.SO3.from_matrix(c2w[:3, :3]) # type: ignore R = R @ vtf.SO3.from_x_radians(np.pi) - self.camera_handles[idx].position = c2w[:3, 3] * VISER_NERFSTUDIO_SCALE_RATIO - self.camera_handles[idx].wxyz = R.wxyz + self.camera_handles[key].position = c2w[:3, 3] * VISER_NERFSTUDIO_SCALE_RATIO + self.camera_handles[key].wxyz = R.wxyz def _interrupt_render(self, _) -> None: """Interrupt current render.""" diff --git a/pyproject.toml b/pyproject.toml index 4c589f6808..b6d11c60c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -162,6 +162,7 @@ pythonVersion = "3.8" pythonPlatform = "Linux" [tool.ruff] +line-length = 120 select = [ "E", # pycodestyle errors. "F", # Pyflakes rules. From 6ccbc2de45d31befed6f96d007a8e20f942d5e60 Mon Sep 17 00:00:00 2001 From: Chris Sweeney Date: Tue, 12 Dec 2023 14:18:13 -0800 Subject: [PATCH 089/101] Add support for Project Aria datasets. (#2617) * Add support for Project Aria datasets. 1) An export script for processing Project Aria datasets 2) Add support for Fisheye624 cameras 3) Add support for center-crop masking and sampling Example commands for running nerfstudio on Project Aria data: python scripts/datasets/process_project_aria.py --vrs_file= --mps_data_dir= --output_dir= ns-train nerfacto --data nerfstudio-data --orientation-method none * Clean up type errors * Fix formatting * Fix linter errors, import, and use absolute file paths * Add aria subcommand to ns-process-data * Appease ruff * Add Aria to docs * Fix docs, add back import try/except * nit: path.as_posix() -> str(path) --------- Co-authored-by: Brent Yi --- README.md | 49 ++-- docs/quickstart/custom_dataset.md | 30 ++- nerfstudio/cameras/camera_utils.py | 222 ++++++++++++++++- nerfstudio/cameras/cameras.py | 36 ++- .../data/datamanagers/base_datamanager.py | 8 +- .../data/dataparsers/nerfstudio_dataparser.py | 27 +- nerfstudio/data/pixel_samplers.py | 41 ++++ .../scripts/datasets/process_project_aria.py | 231 ++++++++++++++++++ nerfstudio/scripts/process_data.py | 29 +++ pyproject.toml | 1 + 10 files changed, 634 insertions(+), 40 deletions(-) create mode 100644 nerfstudio/scripts/datasets/process_project_aria.py diff --git a/README.md b/README.md index ab18747680..1ea4ca8fad 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,7 @@ Have feedback? We'd love for you to fill out our [Nerfstudio Feedback Form](http We hope nerfstudio enables you to build faster :hammer: learn together :books: and contribute to our NeRF community :sparkling_heart:. ## Sponsors + Sponsors of this work includes [Luma AI](https://lumalabs.ai/) and the [BAIR commons](https://bcommons.berkeley.edu/home).

@@ -97,7 +98,6 @@ Sponsors of this work includes [Luma AI](https://lumalabs.ai/) and the [BAIR com

- # Quickstart The quickstart will help you get started with the default vanilla NeRF trained on the classic Blender Lego scene. @@ -233,18 +233,19 @@ ns-export pointcloud --help Using an existing dataset is great, but likely you want to use your own data! We support various methods for using your own data. Before it can be used in nerfstudio, the camera location and orientations must be determined and then converted into our format using `ns-process-data`. We rely on external tools for this, instructions and information can be found in the documentation. -| Data | Capture Device | Requirements | `ns-process-data` Speed | -| ---------------------------------------------------------------------------------------------------- | -------------- | ----------------------------------------------------------------- | ----------------------- | -| πŸ“· [Images](https://docs.nerf.studio/quickstart/custom_dataset.html#images-or-video) | Any | [COLMAP](https://colmap.github.io/install.html) | 🐒 | -| πŸ“Ή [Video](https://docs.nerf.studio/quickstart/custom_dataset.html#images-or-video) | Any | [COLMAP](https://colmap.github.io/install.html) | 🐒 | -| 🌎 [360 Data](https://docs.nerf.studio/quickstart/custom_dataset.html#data-equirectangular) | Any | [COLMAP](https://colmap.github.io/install.html) | 🐒 | -| πŸ“± [Polycam](https://docs.nerf.studio/quickstart/custom_dataset.html#polycam-capture) | IOS with LiDAR | [Polycam App](https://poly.cam/) | πŸ‡ | -| πŸ“± [KIRI Engine](https://docs.nerf.studio/quickstart/custom_dataset.html#kiri-engine-capture) | IOS or Android | [KIRI Engine App](https://www.kiriengine.com/) | πŸ‡ | -| πŸ“± [Record3D](https://docs.nerf.studio/quickstart/custom_dataset.html#record3d-capture) | IOS with LiDAR | [Record3D app](https://record3d.app/) | πŸ‡ | -| πŸ–₯ [Metashape](https://docs.nerf.studio/quickstart/custom_dataset.html#metashape) | Any | [Metashape](https://www.agisoft.com/) | πŸ‡ | -| πŸ–₯ [RealityCapture](https://docs.nerf.studio/quickstart/custom_dataset.html#realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | πŸ‡ | -| πŸ–₯ [ODM](https://docs.nerf.studio/quickstart/custom_dataset.html#ODM) | Any | [ODM](https://github.com/OpenDroneMap/ODM) | πŸ‡ | -| πŸ›  [Custom](https://docs.nerf.studio/quickstart/data_conventions.html) | Any | Camera Poses | πŸ‡ | +| Data | Capture Device | Requirements | `ns-process-data` Speed | +| --------------------------------------------------------------------------------------------- | -------------- | ----------------------------------------------------------------- | ----------------------- | +| πŸ“· [Images](https://docs.nerf.studio/quickstart/custom_dataset.html#images-or-video) | Any | [COLMAP](https://colmap.github.io/install.html) | 🐒 | +| πŸ“Ή [Video](https://docs.nerf.studio/quickstart/custom_dataset.html#images-or-video) | Any | [COLMAP](https://colmap.github.io/install.html) | 🐒 | +| 🌎 [360 Data](https://docs.nerf.studio/quickstart/custom_dataset.html#data-equirectangular) | Any | [COLMAP](https://colmap.github.io/install.html) | 🐒 | +| πŸ“± [Polycam](https://docs.nerf.studio/quickstart/custom_dataset.html#polycam-capture) | IOS with LiDAR | [Polycam App](https://poly.cam/) | πŸ‡ | +| πŸ“± [KIRI Engine](https://docs.nerf.studio/quickstart/custom_dataset.html#kiri-engine-capture) | IOS or Android | [KIRI Engine App](https://www.kiriengine.com/) | πŸ‡ | +| πŸ“± [Record3D](https://docs.nerf.studio/quickstart/custom_dataset.html#record3d-capture) | IOS with LiDAR | [Record3D app](https://record3d.app/) | πŸ‡ | +| πŸ–₯ [Metashape](https://docs.nerf.studio/quickstart/custom_dataset.html#metashape) | Any | [Metashape](https://www.agisoft.com/) | πŸ‡ | +| πŸ–₯ [RealityCapture](https://docs.nerf.studio/quickstart/custom_dataset.html#realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | πŸ‡ | +| πŸ–₯ [ODM](https://docs.nerf.studio/quickstart/custom_dataset.html#ODM) | Any | [ODM](https://github.com/OpenDroneMap/ODM) | πŸ‡ | +| πŸ‘“ [Aria](https://docs.nerf.studio/quickstart/custom_dataset.html#Aria) | Aria glasses | [Project Aria](https://projectaria.com/) | πŸ‡ | +| πŸ›  [Custom](https://docs.nerf.studio/quickstart/data_conventions.html) | Any | Camera Poses | πŸ‡ | ## 5. Advanced Options @@ -276,25 +277,25 @@ And that's it for getting started with the basics of nerfstudio. If you're interested in learning more on how to create your own pipelines, develop with the viewer, run benchmarks, and more, please check out some of the quicklinks below or visit our [documentation](https://docs.nerf.studio/) directly. -| Section | Description | -| -------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------- | +| Section | Description | +| ---------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------- | | [Documentation](https://docs.nerf.studio/) | Full API documentation and tutorials | -| [Viewer](https://viewer.nerf.studio/) | Home page for our web viewer | -| πŸŽ’ **Educational** | +| [Viewer](https://viewer.nerf.studio/) | Home page for our web viewer | +| πŸŽ’ **Educational** | | [Model Descriptions](https://docs.nerf.studio/nerfology/methods/index.html) | Description of all the models supported by nerfstudio and explanations of component parts. | | [Component Descriptions](https://docs.nerf.studio/nerfology/model_components/index.html) | Interactive notebooks that explain notable/commonly used modules in various models. | -| πŸƒ **Tutorials** | +| πŸƒ **Tutorials** | | [Getting Started](https://docs.nerf.studio/quickstart/installation.html) | A more in-depth guide on how to get started with nerfstudio from installation to contributing. | | [Using the Viewer](https://docs.nerf.studio/quickstart/viewer_quickstart.html) | A quick demo video on how to navigate the viewer. | -| [Using Record3D](https://www.youtube.com/watch?v=XwKq7qDQCQk) | Demo video on how to run nerfstudio without using COLMAP. | -| πŸ’» **For Developers** | +| [Using Record3D](https://www.youtube.com/watch?v=XwKq7qDQCQk) | Demo video on how to run nerfstudio without using COLMAP. | +| πŸ’» **For Developers** | | [Creating pipelines](https://docs.nerf.studio/developer_guides/pipelines/index.html) | Learn how to easily build new neural rendering pipelines by using and/or implementing new modules. | | [Creating datasets](https://docs.nerf.studio/quickstart/custom_dataset.html) | Have a new dataset? Learn how to run it with nerfstudio. | | [Contributing](https://docs.nerf.studio/reference/contributing.html) | Walk-through for how you can start contributing now. | -| πŸ’– **Community** | -| [Discord](https://discord.gg/uMbNqcraFc) | Join our community to discuss more. We would love to hear from you! | -| [Twitter](https://twitter.com/nerfstudioteam) | Follow us on Twitter @nerfstudioteam to see cool updates and announcements | -| [Feedback Form](TODO) | We welcome any feedback! This is our chance to learn what you all are using Nerfstudio for. | +| πŸ’– **Community** | +| [Discord](https://discord.gg/uMbNqcraFc) | Join our community to discuss more. We would love to hear from you! | +| [Twitter](https://twitter.com/nerfstudioteam) | Follow us on Twitter @nerfstudioteam to see cool updates and announcements | +| [Feedback Form](TODO) | We welcome any feedback! This is our chance to learn what you all are using Nerfstudio for. | # Supported Features diff --git a/docs/quickstart/custom_dataset.md b/docs/quickstart/custom_dataset.md index 46aaa907d6..fddbe1d56b 100644 --- a/docs/quickstart/custom_dataset.md +++ b/docs/quickstart/custom_dataset.md @@ -10,7 +10,7 @@ ns-process-data {video,images,polycam,record3d} --data {DATA_PATH} --output-dir A full set of arguments can be found {doc}`here`. -We Currently support the following custom data types: +We currently support the following custom data types: | Data | Capture Device | Requirements | `ns-process-data` Speed | | ----------------------------- | -------------- | ----------------------------------------------- | ----------------------- | | πŸ“· [Images](images_and_video) | Any | [COLMAP](https://colmap.github.io/install.html) | 🐒 | @@ -22,6 +22,7 @@ We Currently support the following custom data types: | πŸ–₯ [Metashape](metashape) | Any | [Metashape](https://www.agisoft.com/) | πŸ‡ | | πŸ–₯ [RealityCapture](realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | πŸ‡ | | πŸ–₯ [ODM](odm) | Any | [ODM](https://github.com/OpenDroneMap/ODM) | πŸ‡ | +| πŸ‘“ [Aria](aria) | Aria glasses | [Project Aria](https://projectaria.com/) | πŸ‡ | (images_and_video)= @@ -348,6 +349,31 @@ ns-process-data odm --data /path/to/dataset --output-dir {output directory} ns-train nerfacto --data {output directory} ``` +(aria)= + +## Aria + +1. Install projectaria_tools: + +```bash +conda activate nerfstudio +pip install projectaria-tools'[all]' +``` + +2. Download a VRS file from Project Aria glasses, and run Machine Perception Services to extract poses. + +3. Convert to nerfstudio format. + +```bash +ns-process-data aria --vrs-file /path/to/vrs/file --mps-data-dir /path/to/mps/data --output-dir {output directory} +``` + +4. Train! + +```bash +ns-train nerfacto --data {output directory} +``` + (360_data)= ## 360 Data (Equirectangular) @@ -459,4 +485,4 @@ If the depth of the scene is unviewable and looks too close or expanded when vie - The IPD can be modified in the `cameras.py` script as the variable `vr_ipd` (default is 64 mm). - Compositing with Blender Objects and VR180 or ODS Renders - Configure the Blender camera as panoramic and equirectangular. For the VR180 Blender camera, set the panoramic longitude min and max to -90 and 90. - - Change the Stereoscopy mode to "Parallel" set the Interocular Distance to 0.064 m. \ No newline at end of file + - Change the Stereoscopy mode to "Parallel" set the Interocular Distance to 0.064 m. diff --git a/nerfstudio/cameras/camera_utils.py b/nerfstudio/cameras/camera_utils.py index ffb557589b..4456566237 100644 --- a/nerfstudio/cameras/camera_utils.py +++ b/nerfstudio/cameras/camera_utils.py @@ -25,7 +25,6 @@ from numpy.typing import NDArray from torch import Tensor - _EPS = np.finfo(float).eps * 4.0 @@ -622,3 +621,224 @@ def auto_orient_and_center_poses( raise ValueError(f"Unknown value for method: {method}") return oriented_poses, transform + + +@torch.jit.script +def fisheye624_project(xyz, params): + """ + Batched implementation of the FisheyeRadTanThinPrism (aka Fisheye624) camera + model project() function. + Inputs: + xyz: BxNx3 tensor of 3D points to be projected + params: Bx16 tensor of Fisheye624 parameters formatted like this: + [f_u f_v c_u c_v {k_0 ... k_5} {p_0 p_1} {s_0 s_1 s_2 s_3}] + or Bx15 tensor of Fisheye624 parameters formatted like this: + [f c_u c_v {k_0 ... k_5} {p_0 p_1} {s_0 s_1 s_2 s_3}] + Outputs: + uv: BxNx2 tensor of 2D projections of xyz in image plane + Model for fisheye cameras with radial, tangential, and thin-prism distortion. + This model allows fu != fv. + Specifically, the model is: + uvDistorted = [x_r] + tangentialDistortion + thinPrismDistortion + [y_r] + proj = diag(fu,fv) * uvDistorted + [cu;cv]; + where: + a = x/z, b = y/z, r = (a^2+b^2)^(1/2) + th = atan(r) + cosPhi = a/r, sinPhi = b/r + [x_r] = (th+ k0 * th^3 + k1* th^5 + ...) [cosPhi] + [y_r] [sinPhi] + the number of terms in the series is determined by the template parameter numK. + tangentialDistortion = [(2 x_r^2 + rd^2)*p_0 + 2*x_r*y_r*p_1] + [(2 y_r^2 + rd^2)*p_1 + 2*x_r*y_r*p_0] + where rd^2 = x_r^2 + y_r^2 + thinPrismDistortion = [s0 * rd^2 + s1 rd^4] + [s2 * rd^2 + s3 rd^4] + Author: Daniel DeTone (ddetone@meta.com) + """ + + assert xyz.ndim == 3 + assert params.ndim == 2 + assert params.shape[-1] == 16 or params.shape[-1] == 15, "This model allows fx != fy" + eps = 1e-9 + B, N = xyz.shape[0], xyz.shape[1] + + # Radial correction. + z = xyz[:, :, 2].reshape(B, N, 1) + z = torch.where(torch.abs(z) < eps, eps * torch.sign(z), z) + ab = xyz[:, :, :2] / z + r = torch.norm(ab, dim=-1, p=2, keepdim=True) + th = torch.atan(r) + th_divr = torch.where(r < eps, torch.ones_like(ab), ab / r) + th_k = th.reshape(B, N, 1).clone() + for i in range(6): + th_k = th_k + params[:, -12 + i].reshape(B, 1, 1) * torch.pow(th, 3 + i * 2) + xr_yr = th_k * th_divr + uv_dist = xr_yr + + # Tangential correction. + p0 = params[:, -6].reshape(B, 1) + p1 = params[:, -5].reshape(B, 1) + xr = xr_yr[:, :, 0].reshape(B, N) + yr = xr_yr[:, :, 1].reshape(B, N) + xr_yr_sq = torch.square(xr_yr) + xr_sq = xr_yr_sq[:, :, 0].reshape(B, N) + yr_sq = xr_yr_sq[:, :, 1].reshape(B, N) + rd_sq = xr_sq + yr_sq + uv_dist_tu = uv_dist[:, :, 0] + ((2.0 * xr_sq + rd_sq) * p0 + 2.0 * xr * yr * p1) + uv_dist_tv = uv_dist[:, :, 1] + ((2.0 * yr_sq + rd_sq) * p1 + 2.0 * xr * yr * p0) + uv_dist = torch.stack([uv_dist_tu, uv_dist_tv], dim=-1) # Avoids in-place complaint. + + # Thin Prism correction. + s0 = params[:, -4].reshape(B, 1) + s1 = params[:, -3].reshape(B, 1) + s2 = params[:, -2].reshape(B, 1) + s3 = params[:, -1].reshape(B, 1) + rd_4 = torch.square(rd_sq) + uv_dist[:, :, 0] = uv_dist[:, :, 0] + (s0 * rd_sq + s1 * rd_4) + uv_dist[:, :, 1] = uv_dist[:, :, 1] + (s2 * rd_sq + s3 * rd_4) + + # Finally, apply standard terms: focal length and camera centers. + if params.shape[-1] == 15: + fx_fy = params[:, 0].reshape(B, 1, 1) + cx_cy = params[:, 1:3].reshape(B, 1, 2) + else: + fx_fy = params[:, 0:2].reshape(B, 1, 2) + cx_cy = params[:, 2:4].reshape(B, 1, 2) + result = uv_dist * fx_fy + cx_cy + + return result + + +# Core implementation of fisheye 624 unprojection. More details are documented here: +# https://facebookresearch.github.io/projectaria_tools/docs/tech_insights/camera_intrinsic_models#the-fisheye62-model +@torch.jit.script +def fisheye624_unproject_helper(uv, params, max_iters: int = 5): + """ + Batched implementation of the FisheyeRadTanThinPrism (aka Fisheye624) camera + model. There is no analytical solution for the inverse of the project() + function so this solves an optimization problem using Newton's method to get + the inverse. + Inputs: + uv: BxNx3 tensor of 2D pixels to be projected + params: Bx16 tensor of Fisheye624 parameters formatted like this: + [f_u f_v c_u c_v {k_0 ... k_5} {p_0 p_1} {s_0 s_1 s_2 s_3}] + or Bx15 tensor of Fisheye624 parameters formatted like this: + [f c_u c_v {k_0 ... k_5} {p_0 p_1} {s_0 s_1 s_2 s_3}] + Outputs: + xyz: BxNx3 tensor of 3D rays of uv points with z = 1. + Model for fisheye cameras with radial, tangential, and thin-prism distortion. + This model assumes fu=fv. This unproject function holds that: + X = unproject(project(X)) [for X=(x,y,z) in R^3, z>0] + and + x = project(unproject(s*x)) [for s!=0 and x=(u,v) in R^2] + Author: Daniel DeTone (ddetone@meta.com) + """ + + assert uv.ndim == 3, "Expected batched input shaped BxNx3" + assert params.ndim == 2 + assert params.shape[-1] == 16 or params.shape[-1] == 15, "This model allows fx != fy" + eps = 1e-6 + B, N = uv.shape[0], uv.shape[1] + + if params.shape[-1] == 15: + fx_fy = params[:, 0].reshape(B, 1, 1) + cx_cy = params[:, 1:3].reshape(B, 1, 2) + else: + fx_fy = params[:, 0:2].reshape(B, 1, 2) + cx_cy = params[:, 2:4].reshape(B, 1, 2) + + uv_dist = (uv - cx_cy) / fx_fy + + # Compute xr_yr using Newton's method. + xr_yr = uv_dist.clone() # Initial guess. + for _ in range(max_iters): + uv_dist_est = xr_yr.clone() + # Tangential terms. + p0 = params[:, -6].reshape(B, 1) + p1 = params[:, -5].reshape(B, 1) + xr = xr_yr[:, :, 0].reshape(B, N) + yr = xr_yr[:, :, 1].reshape(B, N) + xr_yr_sq = torch.square(xr_yr) + xr_sq = xr_yr_sq[:, :, 0].reshape(B, N) + yr_sq = xr_yr_sq[:, :, 1].reshape(B, N) + rd_sq = xr_sq + yr_sq + uv_dist_est[:, :, 0] = uv_dist_est[:, :, 0] + ((2.0 * xr_sq + rd_sq) * p0 + 2.0 * xr * yr * p1) + uv_dist_est[:, :, 1] = uv_dist_est[:, :, 1] + ((2.0 * yr_sq + rd_sq) * p1 + 2.0 * xr * yr * p0) + # Thin Prism terms. + s0 = params[:, -4].reshape(B, 1) + s1 = params[:, -3].reshape(B, 1) + s2 = params[:, -2].reshape(B, 1) + s3 = params[:, -1].reshape(B, 1) + rd_4 = torch.square(rd_sq) + uv_dist_est[:, :, 0] = uv_dist_est[:, :, 0] + (s0 * rd_sq + s1 * rd_4) + uv_dist_est[:, :, 1] = uv_dist_est[:, :, 1] + (s2 * rd_sq + s3 * rd_4) + # Compute the derivative of uv_dist w.r.t. xr_yr. + duv_dist_dxr_yr = uv.new_ones(B, N, 2, 2) + duv_dist_dxr_yr[:, :, 0, 0] = 1.0 + 6.0 * xr_yr[:, :, 0] * p0 + 2.0 * xr_yr[:, :, 1] * p1 + offdiag = 2.0 * (xr_yr[:, :, 0] * p1 + xr_yr[:, :, 1] * p0) + duv_dist_dxr_yr[:, :, 0, 1] = offdiag + duv_dist_dxr_yr[:, :, 1, 0] = offdiag + duv_dist_dxr_yr[:, :, 1, 1] = 1.0 + 6.0 * xr_yr[:, :, 1] * p1 + 2.0 * xr_yr[:, :, 0] * p0 + xr_yr_sq_norm = xr_yr_sq[:, :, 0] + xr_yr_sq[:, :, 1] + temp1 = 2.0 * (s0 + 2.0 * s1 * xr_yr_sq_norm) + duv_dist_dxr_yr[:, :, 0, 0] = duv_dist_dxr_yr[:, :, 0, 0] + (xr_yr[:, :, 0] * temp1) + duv_dist_dxr_yr[:, :, 0, 1] = duv_dist_dxr_yr[:, :, 0, 1] + (xr_yr[:, :, 1] * temp1) + temp2 = 2.0 * (s2 + 2.0 * s3 * xr_yr_sq_norm) + duv_dist_dxr_yr[:, :, 1, 0] = duv_dist_dxr_yr[:, :, 1, 0] + (xr_yr[:, :, 0] * temp2) + duv_dist_dxr_yr[:, :, 1, 1] = duv_dist_dxr_yr[:, :, 1, 1] + (xr_yr[:, :, 1] * temp2) + # Compute 2x2 inverse manually here since torch.inverse() is very slow. + # Because this is slow: inv = duv_dist_dxr_yr.inverse() + # About a 10x reduction in speed with above line. + mat = duv_dist_dxr_yr.reshape(-1, 2, 2) + a = mat[:, 0, 0].reshape(-1, 1, 1) + b = mat[:, 0, 1].reshape(-1, 1, 1) + c = mat[:, 1, 0].reshape(-1, 1, 1) + d = mat[:, 1, 1].reshape(-1, 1, 1) + det = 1.0 / ((a * d) - (b * c)) + top = torch.cat([d, -b], dim=2) + bot = torch.cat([-c, a], dim=2) + inv = det * torch.cat([top, bot], dim=1) + inv = inv.reshape(B, N, 2, 2) + # Manually compute 2x2 @ 2x1 matrix multiply. + # Because this is slow: step = (inv @ (uv_dist - uv_dist_est)[..., None])[..., 0] + diff = uv_dist - uv_dist_est + a = inv[:, :, 0, 0] + b = inv[:, :, 0, 1] + c = inv[:, :, 1, 0] + d = inv[:, :, 1, 1] + e = diff[:, :, 0] + f = diff[:, :, 1] + step = torch.stack([a * e + b * f, c * e + d * f], dim=-1) + # Newton step. + xr_yr = xr_yr + step + + # Compute theta using Newton's method. + xr_yr_norm = xr_yr.norm(p=2, dim=2).reshape(B, N, 1) + th = xr_yr_norm.clone() + for _ in range(max_iters): + th_radial = uv.new_ones(B, N, 1) + dthd_th = uv.new_ones(B, N, 1) + for k in range(6): + r_k = params[:, -12 + k].reshape(B, 1, 1) + th_radial = th_radial + (r_k * torch.pow(th, 2 + k * 2)) + dthd_th = dthd_th + ((3.0 + 2.0 * k) * r_k * torch.pow(th, 2 + k * 2)) + th_radial = th_radial * th + step = (xr_yr_norm - th_radial) / dthd_th + # handle dthd_th close to 0. + step = torch.where(dthd_th.abs() > eps, step, torch.sign(step) * eps * 10.0) + th = th + step + # Compute the ray direction using theta and xr_yr. + close_to_zero = torch.logical_and(th.abs() < eps, xr_yr_norm.abs() < eps) + ray_dir = torch.where(close_to_zero, xr_yr, torch.tan(th) / xr_yr_norm * xr_yr) + ray = torch.cat([ray_dir, uv.new_ones(B, N, 1)], dim=2) + return ray + + +# unproject 2D point to 3D with fisheye624 model +def fisheye624_unproject(coords: torch.Tensor, distortion_params: torch.Tensor) -> torch.Tensor: + dirs = fisheye624_unproject_helper(coords.unsqueeze(0), distortion_params[0].unsqueeze(0)) + # correct for camera space differences: + dirs[..., 1] = -dirs[..., 1] + dirs[..., 2] = -dirs[..., 2] + return dirs diff --git a/nerfstudio/cameras/cameras.py b/nerfstudio/cameras/cameras.py index 467c8fcc01..46ec42bdc2 100644 --- a/nerfstudio/cameras/cameras.py +++ b/nerfstudio/cameras/cameras.py @@ -32,7 +32,7 @@ import nerfstudio.utils.poses as pose_utils from nerfstudio.cameras import camera_utils from nerfstudio.cameras.rays import RayBundle -from nerfstudio.data.scene_box import SceneBox, OrientedBox +from nerfstudio.data.scene_box import OrientedBox, SceneBox from nerfstudio.utils.tensor_dataclass import TensorDataclass TORCH_DEVICE = Union[torch.device, str] @@ -48,6 +48,7 @@ class CameraType(Enum): OMNIDIRECTIONALSTEREO_R = auto() VR180_L = auto() VR180_R = auto() + FISHEYE624 = auto() CAMERA_MODEL_TO_TYPE = { @@ -62,6 +63,7 @@ class CameraType(Enum): "OMNIDIRECTIONALSTEREO_R": CameraType.OMNIDIRECTIONALSTEREO_R, "VR180_L": CameraType.VR180_L, "VR180_R": CameraType.VR180_R, + "FISHEYE624": CameraType.FISHEYE624, } @@ -79,7 +81,7 @@ class Cameras(TensorDataclass): cy: Principal point y width: Image width height: Image height - distortion_params: OpenCV 6 radial distortion coefficients + distortion_params: distortion coefficients (OpenCV 6 radial or 6-2-4 radial, tangential, thin-prism for Fisheye624) camera_type: Type of camera model. This will be an int corresponding to the CameraType enum. times: Timestamps for each camera metadata: Additional metadata or data needed for interpolation, will mimic shape of the cameras @@ -629,8 +631,8 @@ def _generate_rays_from_coords( assert coord_stack.shape == (3,) + num_rays_shape + (2,) # Undistorts our images according to our distortion parameters + distortion_params = None if not disable_distortion: - distortion_params = None if self.distortion_params is not None: distortion_params = self.distortion_params[true_indices] if distortion_params_delta is not None: @@ -832,6 +834,34 @@ def _compute_rays_for_vr180( # assign final camera origins c2w[..., :3, 3] = vr180_origins + elif CameraType.FISHEYE624.value in cam_types: + mask = (self.camera_type[true_indices] == CameraType.FISHEYE624.value).squeeze(-1) # (num_rays) + coord_mask = torch.stack([mask, mask, mask], dim=0) + + # fisheye624 requires pixel coordinates to unproject, so we need to recomput the offsets in pixel coords. + pcoord = torch.stack([x, y], -1) # (num_rays, 2) + pcoord_x_offset = torch.stack([x + 1, y], -1) # (num_rays, 2) + pcoord_y_offset = torch.stack([x, y + 1], -1) # (num_rays, 2) + + # Stack image coordinates and image coordinates offset by 1, check shapes too + pcoord_stack = torch.stack([pcoord, pcoord_x_offset, pcoord_y_offset], dim=0) # (3, num_rays, 2) + + assert distortion_params is not None + masked_coords = pcoord_stack[coord_mask, :] + # The fisheye unprojection does not rely on planar/pinhold unprojection, thus the method needs + # to access the focal length and principle points directly. + camera_params = torch.cat( + [ + fx[mask].unsqueeze(1), + fy[mask].unsqueeze(1), + cx[mask].unsqueeze(1), + cy[mask].unsqueeze(1), + distortion_params[mask, :], + ], + dim=1, + ) + directions_stack[coord_mask] = camera_utils.fisheye624_unproject(masked_coords, camera_params) + else: raise ValueError(f"Camera type {cam} not supported.") diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py index 7971f46e56..47144c00e2 100644 --- a/nerfstudio/data/datamanagers/base_datamanager.py +++ b/nerfstudio/data/datamanagers/base_datamanager.py @@ -475,8 +475,14 @@ def _get_pixel_sampler(self, dataset: TDataset, num_rays_per_batch: int) -> Pixe is_equirectangular = (dataset.cameras.camera_type == CameraType.EQUIRECTANGULAR.value).all() if is_equirectangular.any(): CONSOLE.print("[bold yellow]Warning: Some cameras are equirectangular, but using default pixel sampler.") + + fisheye_crop_radius = ( + None if dataset.cameras.metadata is None else dataset.cameras.metadata["fisheye_crop_radius"] + ) return self.config.pixel_sampler.setup( - is_equirectangular=is_equirectangular, num_rays_per_batch=num_rays_per_batch + is_equirectangular=is_equirectangular, + num_rays_per_batch=num_rays_per_batch, + fisheye_crop_radius=fisheye_crop_radius, ) def setup_train(self): diff --git a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py index eaeda0bce6..9b4c2951ac 100644 --- a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py +++ b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py @@ -104,10 +104,11 @@ def _generate_dataparser_outputs(self, split="train"): height_fixed = "h" in meta width_fixed = "w" in meta distort_fixed = False - for distort_key in ["k1", "k2", "k3", "p1", "p2"]: + for distort_key in ["k1", "k2", "k3", "p1", "p2", "distortion_params"]: if distort_key in meta: distort_fixed = True break + fisheye_crop_radius = meta.get("fisheye_crop_radius", None) fx = [] fy = [] cx = [] @@ -149,7 +150,9 @@ def _generate_dataparser_outputs(self, split="train"): width.append(int(frame["w"])) if not distort_fixed: distort.append( - camera_utils.get_distortion_params( + torch.tensor(frame["distortion_params"], dtype=torch.float32) + if "distortion_params" in frame + else camera_utils.get_distortion_params( k1=float(frame["k1"]) if "k1" in frame else 0.0, k2=float(frame["k2"]) if "k2" in frame else 0.0, k3=float(frame["k3"]) if "k3" in frame else 0.0, @@ -274,17 +277,22 @@ def _generate_dataparser_outputs(self, split="train"): height = int(meta["h"]) if height_fixed else torch.tensor(height, dtype=torch.int32)[idx_tensor] width = int(meta["w"]) if width_fixed else torch.tensor(width, dtype=torch.int32)[idx_tensor] if distort_fixed: - distortion_params = camera_utils.get_distortion_params( - k1=float(meta["k1"]) if "k1" in meta else 0.0, - k2=float(meta["k2"]) if "k2" in meta else 0.0, - k3=float(meta["k3"]) if "k3" in meta else 0.0, - k4=float(meta["k4"]) if "k4" in meta else 0.0, - p1=float(meta["p1"]) if "p1" in meta else 0.0, - p2=float(meta["p2"]) if "p2" in meta else 0.0, + distortion_params = ( + torch.tensor(meta["distortion_params"], dtype=torch.float32) + if "distortion_params" in meta + else camera_utils.get_distortion_params( + k1=float(meta["k1"]) if "k1" in meta else 0.0, + k2=float(meta["k2"]) if "k2" in meta else 0.0, + k3=float(meta["k3"]) if "k3" in meta else 0.0, + k4=float(meta["k4"]) if "k4" in meta else 0.0, + p1=float(meta["p1"]) if "p1" in meta else 0.0, + p2=float(meta["p2"]) if "p2" in meta else 0.0, + ) ) else: distortion_params = torch.stack(distort, dim=0)[idx_tensor] + metadata = {"fisheye_crop_radius": fisheye_crop_radius} if fisheye_crop_radius is not None else None cameras = Cameras( fx=fx, fy=fy, @@ -295,6 +303,7 @@ def _generate_dataparser_outputs(self, split="train"): width=width, camera_to_worlds=poses[:, :3, :4], camera_type=camera_type, + metadata=metadata, ) assert self.downscale_factor is not None diff --git a/nerfstudio/data/pixel_samplers.py b/nerfstudio/data/pixel_samplers.py index 07bab5d826..c7ce9e073e 100644 --- a/nerfstudio/data/pixel_samplers.py +++ b/nerfstudio/data/pixel_samplers.py @@ -47,6 +47,8 @@ class PixelSamplerConfig(InstantiateConfig): """Whether or not to include a reference to the full image in returned batch.""" is_equirectangular: bool = False """List of whether or not camera i is equirectangular.""" + fisheye_crop_radius: Optional[float] = None + """Set to the radius (in pixels) for fisheye cameras.""" class PixelSampler: @@ -65,6 +67,7 @@ def __init__(self, config: PixelSamplerConfig, **kwargs) -> None: self.config.num_rays_per_batch = self.kwargs.get("num_rays_per_batch", self.config.num_rays_per_batch) self.config.keep_full_image = self.kwargs.get("keep_full_image", self.config.keep_full_image) self.config.is_equirectangular = self.kwargs.get("is_equirectangular", self.config.is_equirectangular) + self.config.fisheye_crop_radius = self.kwargs.get("fisheye_crop_radius", self.config.fisheye_crop_radius) self.set_num_rays_per_batch(self.config.num_rays_per_batch) def set_num_rays_per_batch(self, num_rays_per_batch: int): @@ -134,6 +137,36 @@ def sample_method_equirectangular( return indices + def sample_method_fisheye( + self, + batch_size: int, + num_images: int, + image_height: int, + image_width: int, + mask: Optional[Tensor] = None, + device: Union[torch.device, str] = "cpu", + ) -> Int[Tensor, "batch_size 3"]: + if isinstance(mask, torch.Tensor): + indices = self.sample_method(batch_size, num_images, image_height, image_width, mask=mask, device=device) + else: + rand_samples = torch.rand((batch_size, 3), device=device) + # convert random samples tto radius and theta + radii = self.config.fisheye_crop_radius * torch.sqrt(rand_samples[:, 1]) + theta = 2.0 * torch.pi * rand_samples[:, 2] + + # convert radius and theta to x and y between -radii and radii + x = radii * torch.cos(theta) + y = radii * torch.sin(theta) + + # Multiply by the batch size and height/width to get pixel indices. + indices = torch.floor( + torch.stack([rand_samples[:, 0], y, x], dim=1) + * torch.tensor([num_images, image_height // 2, image_width // 2], device=device) + + torch.tensor([0, image_height // 2, image_width // 2], device=device) + ).long() + + return indices + def collate_image_dataset_batch(self, batch: Dict, num_rays_per_batch: int, keep_full_image: bool = False): """ Operates on a batch of images and samples pixels to use for generating rays. @@ -154,6 +187,10 @@ def collate_image_dataset_batch(self, batch: Dict, num_rays_per_batch: int, keep indices = self.sample_method_equirectangular( num_rays_per_batch, num_images, image_height, image_width, mask=batch["mask"], device=device ) + elif self.config.fisheye_crop_radius is not None: + indices = self.sample_method_fisheye( + num_rays_per_batch, num_images, image_height, image_width, mask=batch["mask"], device=device + ) else: indices = self.sample_method( num_rays_per_batch, num_images, image_height, image_width, mask=batch["mask"], device=device @@ -163,6 +200,10 @@ def collate_image_dataset_batch(self, batch: Dict, num_rays_per_batch: int, keep indices = self.sample_method_equirectangular( num_rays_per_batch, num_images, image_height, image_width, device=device ) + elif self.config.fisheye_crop_radius is not None: + indices = self.sample_method_fisheye( + num_rays_per_batch, num_images, image_height, image_width, device=device + ) else: indices = self.sample_method(num_rays_per_batch, num_images, image_height, image_width, device=device) diff --git a/nerfstudio/scripts/datasets/process_project_aria.py b/nerfstudio/scripts/datasets/process_project_aria.py new file mode 100644 index 0000000000..f2fdcc94c6 --- /dev/null +++ b/nerfstudio/scripts/datasets/process_project_aria.py @@ -0,0 +1,231 @@ +# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import sys +import threading +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, List + +import numpy as np +import tyro +from PIL import Image + +try: + from projectaria_tools.core import mps + from projectaria_tools.core.data_provider import VrsDataProvider, create_vrs_data_provider + from projectaria_tools.core.sophus import SE3 +except ImportError: + print("projectaria_tools import failed, please install with pip3 install projectaria-tools'[all]'") + sys.exit(1) + +ARIA_CAMERA_MODEL = "FISHEYE624" + +# The Aria coordinate system is different than the Blender/NerfStudio coordinate system. +# Blender / Nerfstudio: +Z = back, +Y = up, +X = right +# Surreal: +Z = forward, +Y = down, +X = right +T_ARIA_NERFSTUDIO = SE3.from_matrix( + np.array( + [ + [1.0, 0.0, 0.0, 0.0], + [0.0, -1.0, 0.0, 0.0], + [0.0, 0.0, -1.0, 0.0], + [0.0, 0.0, 0.0, 1.0], + ] + ) +) + + +@dataclass +class AriaCameraCalibration: + fx: float + fy: float + cx: float + cy: float + distortion_params: np.ndarray + width: int + height: int + t_device_camera: SE3 + + +@dataclass +class AriaImageFrame: + camera: AriaCameraCalibration + file_path: str + t_world_camera: SE3 + timestamp_ns: float + + +@dataclass +class TimedPoses: + timestamps_ns: np.ndarray + t_world_devices: List[SE3] + + +def get_camera_calibs(provider: VrsDataProvider) -> Dict[str, AriaCameraCalibration]: + """Retrieve the per-camera factory calibration from within the VRS.""" + + factory_calib = {} + name = "camera-rgb" + device_calib = provider.get_device_calibration() + assert device_calib is not None, "Could not find device calibration" + sensor_calib = device_calib.get_camera_calib(name) + assert sensor_calib is not None, f"Could not find sensor calibration for {name}" + + width = sensor_calib.get_image_size()[0].item() + height = sensor_calib.get_image_size()[1].item() + intrinsics = sensor_calib.projection_params() + + factory_calib[name] = AriaCameraCalibration( + fx=intrinsics[0], + fy=intrinsics[0], + cx=intrinsics[1], + cy=intrinsics[2], + distortion_params=intrinsics[3:15], + width=width, + height=height, + t_device_camera=sensor_calib.get_transform_device_camera(), + ) + + return factory_calib + + +def read_trajectory_csv_to_dict(file_iterable_csv: str) -> TimedPoses: + closed_loop_traj = mps.read_closed_loop_trajectory(file_iterable_csv) + + timestamps_secs, poses = zip( + *[(it.tracking_timestamp.total_seconds(), it.transform_world_device) for it in closed_loop_traj] + ) + + SEC_TO_NANOSEC = 1e9 + return TimedPoses( + timestamps_ns=(np.array(timestamps_secs) * SEC_TO_NANOSEC).astype(int), + t_world_devices=poses, + ) + + +def to_aria_image_frame( + provider: VrsDataProvider, + index: int, + name_to_camera: Dict[str, AriaCameraCalibration], + t_world_devices: TimedPoses, + output_dir: Path, +) -> AriaImageFrame: + name = "camera-rgb" + + camera_calibration = name_to_camera[name] + stream_id = provider.get_stream_id_from_label(name) + assert stream_id is not None, f"Could not find stream {name}" + + # Get the image corresponding to this index + image_data = provider.get_image_data_by_index(stream_id, index) + img = Image.fromarray(image_data[0].to_numpy_array()) + capture_time_ns = image_data[1].capture_timestamp_ns + + file_path = f"{output_dir}/{name}_{capture_time_ns}.jpg" + threading.Thread(target=lambda: img.save(file_path)).start() + + # Find the nearest neighbor pose with the closest timestamp to the capture time. + nearest_pose_idx = np.searchsorted(t_world_devices.timestamps_ns, capture_time_ns) + nearest_pose_idx = np.minimum(nearest_pose_idx, len(t_world_devices.timestamps_ns) - 1) + assert nearest_pose_idx != -1, f"Could not find pose for {capture_time_ns}" + t_world_device = t_world_devices.t_world_devices[nearest_pose_idx] + + # Compute the world to camera transform. + t_world_camera = t_world_device @ camera_calibration.t_device_camera @ T_ARIA_NERFSTUDIO + + return AriaImageFrame( + camera=camera_calibration, + file_path=file_path, + t_world_camera=t_world_camera, + timestamp_ns=capture_time_ns, + ) + + +def to_nerfstudio_frame(frame: AriaImageFrame) -> Dict: + return { + "fl_x": frame.camera.fx, + "fl_y": frame.camera.fy, + "cx": frame.camera.cx, + "cy": frame.camera.cy, + "distortion_params": frame.camera.distortion_params.tolist(), + "w": frame.camera.width, + "h": frame.camera.height, + "file_path": frame.file_path, + "transform_matrix": frame.t_world_camera.to_matrix().tolist(), + "timestamp": frame.timestamp_ns, + } + + +@dataclass +class ProcessProjectAria: + """Processes Project Aria data i.e. a VRS of the raw recording streams and the MPS attachments + that provide poses, calibration, and 3d points. More information on MPS data can be found at: + https://facebookresearch.github.io/projectaria_tools/docs/ARK/mps. + """ + + vrs_file: Path + """Path to the VRS file.""" + mps_data_dir: Path + """Path to Project Aria Machine Perception Services (MPS) attachments.""" + output_dir: Path + """Path to the output directory.""" + + def main(self) -> None: + """Generate a nerfstudio dataset from ProjectAria data (VRS) and MPS attachments.""" + # Create output directory if it doesn't exist. + self.output_dir = self.output_dir.absolute() + self.output_dir.mkdir(parents=True, exist_ok=True) + + provider = create_vrs_data_provider(str(self.vrs_file.absolute())) + assert provider is not None, "Cannot open file" + + name_to_camera = get_camera_calibs(provider) + + print("Getting poses from closed loop trajectory CSV...") + trajectory_csv = self.mps_data_dir / "closed_loop_trajectory.csv" + t_world_devices = read_trajectory_csv_to_dict(str(trajectory_csv.absolute())) + + name = "camera-rgb" + stream_id = provider.get_stream_id_from_label(name) + + # create an AriaImageFrame for each image in the VRS. + print("Creating Aria frames...") + aria_frames = [ + to_aria_image_frame(provider, index, name_to_camera, t_world_devices, self.output_dir) + for index in range(0, provider.get_num_data(stream_id)) + ] + + # create the NerfStudio frames from the AriaImageFrames. + print("Creating NerfStudio frames...") + CANONICAL_RGB_VALID_RADIUS = 707.5 + CANONICAL_RGB_WIDTH = 1408 + rgb_valid_radius = CANONICAL_RGB_VALID_RADIUS * (aria_frames[0].camera.width / CANONICAL_RGB_WIDTH) + nerfstudio_frames = { + "camera_model": ARIA_CAMERA_MODEL, + "frames": [to_nerfstudio_frame(frame) for frame in aria_frames], + "fisheye_crop_radius": rgb_valid_radius, + } + + # write the json out to disk as transforms.json + print("Writing transforms.json") + transform_file = self.output_dir / "transforms.json" + with open(transform_file, "w", encoding="UTF-8"): + transform_file.write_text(json.dumps(nerfstudio_frames)) + + +if __name__ == "__main__": + tyro.extras.set_accent_color("bright_yellow") + tyro.cli(ProcessProjectAria).main() diff --git a/nerfstudio/scripts/process_data.py b/nerfstudio/scripts/process_data.py index 83cdcca473..2c2cd7a381 100644 --- a/nerfstudio/scripts/process_data.py +++ b/nerfstudio/scripts/process_data.py @@ -475,6 +475,12 @@ def main(self) -> None: CONSOLE.rule() +@dataclass +class NotInstalled: + def main(self) -> None: + ... + + Commands = Union[ Annotated[ImagesToNerfstudioDataset, tyro.conf.subcommand(name="images")], Annotated[VideoToNerfstudioDataset, tyro.conf.subcommand(name="video")], @@ -485,6 +491,29 @@ def main(self) -> None: Annotated[ProcessODM, tyro.conf.subcommand(name="odm")], ] +# Add aria subcommand if projectaria_tools is installed. +try: + import projectaria_tools +except ImportError: + projectaria_tools = None + +if projectaria_tools is not None: + from nerfstudio.scripts.datasets.process_project_aria import ProcessProjectAria + + # Note that Union[A, Union[B, C]] == Union[A, B, C]. + Commands = Union[Commands, Annotated[ProcessProjectAria, tyro.conf.subcommand(name="aria")]] +else: + Commands = Union[ + Commands, + Annotated[ + NotInstalled, + tyro.conf.subcommand( + name="aria", + description="**Not installed.** Processing Project Aria data requires `pip install projectaria_tools'[all]'`.", + ), + ], + ] + def entrypoint(): """Entrypoint for use with pyproject scripts.""" diff --git a/pyproject.toml b/pyproject.toml index b6d11c60c3..3b6d69a84f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,6 +94,7 @@ dev = [ "opencv-stubs==0.0.7", "transformers==4.29.2", "pyright==1.1.331", + "projectaria_tools[all]>=1.2.0", ] # Documentation related packages From ae00061cf5506ee42e6fbc5f34340ffaabf574cd Mon Sep 17 00:00:00 2001 From: Brent Yi Date: Tue, 12 Dec 2023 22:31:12 +0000 Subject: [PATCH 090/101] Fix custom data links in README (#2671) --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1ea4ca8fad..b6c5bb6ff2 100644 --- a/README.md +++ b/README.md @@ -243,8 +243,8 @@ Using an existing dataset is great, but likely you want to use your own data! We | πŸ“± [Record3D](https://docs.nerf.studio/quickstart/custom_dataset.html#record3d-capture) | IOS with LiDAR | [Record3D app](https://record3d.app/) | πŸ‡ | | πŸ–₯ [Metashape](https://docs.nerf.studio/quickstart/custom_dataset.html#metashape) | Any | [Metashape](https://www.agisoft.com/) | πŸ‡ | | πŸ–₯ [RealityCapture](https://docs.nerf.studio/quickstart/custom_dataset.html#realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | πŸ‡ | -| πŸ–₯ [ODM](https://docs.nerf.studio/quickstart/custom_dataset.html#ODM) | Any | [ODM](https://github.com/OpenDroneMap/ODM) | πŸ‡ | -| πŸ‘“ [Aria](https://docs.nerf.studio/quickstart/custom_dataset.html#Aria) | Aria glasses | [Project Aria](https://projectaria.com/) | πŸ‡ | +| πŸ–₯ [ODM](https://docs.nerf.studio/quickstart/custom_dataset.html#odm) | Any | [ODM](https://github.com/OpenDroneMap/ODM) | πŸ‡ | +| πŸ‘“ [Aria](https://docs.nerf.studio/quickstart/custom_dataset.html#aria) | Aria glasses | [Project Aria](https://projectaria.com/) | πŸ‡ | | πŸ›  [Custom](https://docs.nerf.studio/quickstart/data_conventions.html) | Any | Camera Poses | πŸ‡ | ## 5. Advanced Options From 17f785758b8bb9e30c5dec0dcb4c085b28c85401 Mon Sep 17 00:00:00 2001 From: Justin Kerr Date: Wed, 13 Dec 2023 11:17:36 -0800 Subject: [PATCH 091/101] Parity of layout between viewer beta and legacy viewer (#2639) * alter layout of beta viewer to match old viewer --------- Co-authored-by: Brent Yi --- nerfstudio/viewer_beta/control_panel.py | 51 ++------------ nerfstudio/viewer_beta/render_panel.py | 2 +- .../viewer_beta/render_state_machine.py | 2 + nerfstudio/viewer_beta/viewer.py | 68 +++++++++++++++---- pyproject.toml | 2 +- 5 files changed, 66 insertions(+), 59 deletions(-) diff --git a/nerfstudio/viewer_beta/control_panel.py b/nerfstudio/viewer_beta/control_panel.py index d118836849..c94b4745f6 100644 --- a/nerfstudio/viewer_beta/control_panel.py +++ b/nerfstudio/viewer_beta/control_panel.py @@ -20,11 +20,10 @@ import torch import viser.transforms as vtf from viser import ViserServer - +import viser from nerfstudio.data.scene_box import OrientedBox from nerfstudio.utils.colormaps import ColormapOptions, Colormaps from nerfstudio.viewer_beta.viewer_elements import ( # ViewerButtonGroup, - ViewerButton, ViewerButtonGroup, ViewerCheckbox, ViewerDropdown, @@ -57,8 +56,6 @@ def __init__( crop_update_cb: Callable, update_output_cb: Callable, update_split_output_cb: Callable, - toggle_training_state_cb: Callable, - camera_vis: Callable, default_composite_depth: bool = True, ): self.viser_scale_ratio = scale_ratio @@ -73,10 +70,6 @@ def __init__( options=["Slow", "Mid", "Fast"], cb_hook=lambda han: self._train_speed_cb(), ) - self._reset_camera = ViewerButton( - name="Reset Up Direction", - cb_hook=lambda han: self._reset_camera_cb(), - ) self._output_render = ViewerDropdown( "Output Render", "not set", @@ -189,28 +182,9 @@ def _update_crop_handle(han): self._time = ViewerSlider("Time", 0.0, 0.0, 1.0, 0.01, cb_hook=rerender_cb, hint="Time to render") self._time_enabled = time_enabled - self.stat_folder = self.viser_server.add_gui_folder("Stats") - with self.stat_folder: - self.markdown = self.viser_server.add_gui_markdown("Step: 0") - self.pause_train = viser_server.add_gui_button(label="Pause Training", disabled=False) - self.pause_train.on_click(lambda _: self.toggle_pause_button()) - self.pause_train.on_click(lambda han: toggle_training_state_cb(han)) - self.resume_train = viser_server.add_gui_button(label="Resume Training", disabled=False) - self.resume_train.on_click(lambda _: self.toggle_pause_button()) - self.resume_train.on_click(lambda han: toggle_training_state_cb(han)) - self.resume_train.visible = False - # Add buttons to toggle training image visibility - self.hide_images = viser_server.add_gui_button(label="Hide Train Cams", disabled=False) - self.hide_images.on_click(lambda _: camera_vis(False)) - self.hide_images.on_click(lambda _: self.toggle_cameravis_button()) - self.show_images = viser_server.add_gui_button(label="Show Train Cams", disabled=False) - self.show_images.on_click(lambda _: camera_vis(True)) - self.show_images.on_click(lambda _: self.toggle_cameravis_button()) - self.show_images.visible = False - self.add_element(self._train_speed) self.add_element(self._train_util) - self.add_element(self._reset_camera) + with self.viser_server.add_gui_folder("Render Options"): self.add_element(self._max_res) self.add_element(self._output_render) @@ -244,6 +218,10 @@ def _update_crop_handle(han): self.add_element(self._crop_rot, additional_tags=("crop",)) self.add_element(self._time, additional_tags=("time",)) + self._reset_camera = viser_server.add_gui_button( + label="Reset Up Dir", disabled=False, icon=viser.Icon.ARROW_BIG_UP_LINES, color="gray" + ) + self._reset_camera.on_click(self._reset_camera_cb) def _train_speed_cb(self) -> None: pass @@ -259,25 +237,10 @@ def _train_speed_cb(self) -> None: self._train_util.value = 0.5 self._max_res.value = 1024 - def _reset_camera_cb(self) -> None: + def _reset_camera_cb(self, _) -> None: for client in self.viser_server.get_clients().values(): client.camera.up_direction = vtf.SO3(client.camera.wxyz) @ np.array([0.0, -1.0, 0.0]) - def toggle_pause_button(self) -> None: - self.pause_train.visible = not self.pause_train.visible - self.resume_train.visible = not self.resume_train.visible - - def toggle_cameravis_button(self) -> None: - self.hide_images.visible = not self.hide_images.visible - self.show_images.visible = not self.show_images.visible - - def update_step(self, step): - """ - Args: - step: the train step to set the model to - """ - self.markdown.content = f"Step: {step}" - def update_output_options(self, new_options: List[str]): """ Args: diff --git a/nerfstudio/viewer_beta/render_panel.py b/nerfstudio/viewer_beta/render_panel.py index b5f4e1bfb2..0e666038ef 100644 --- a/nerfstudio/viewer_beta/render_panel.py +++ b/nerfstudio/viewer_beta/render_panel.py @@ -485,7 +485,7 @@ def _(event: viser.GuiEvent) -> None: reset_up_button = server.add_gui_button( "Reset up direction", - icon=viser.Icon.ARROW_AUTOFIT_UP, + icon=viser.Icon.ARROW_BIG_UP_LINES, hint="Reset the orbit up direction.", ) diff --git a/nerfstudio/viewer_beta/render_state_machine.py b/nerfstudio/viewer_beta/render_state_machine.py index 46ceeec39c..53c73c34e8 100644 --- a/nerfstudio/viewer_beta/render_state_machine.py +++ b/nerfstudio/viewer_beta/render_state_machine.py @@ -246,6 +246,8 @@ def _send_output_to_viewer(self, outputs: Dict[str, Any]): jpeg_quality=self.viewer.config.jpeg_quality, depth=depth, ) + res = f"{selected_output.shape[0]}x{selected_output.shape[1]}px" + self.viewer.stats_markdown.content = self.viewer.make_stats_markdown(None, res) def _calculate_image_res(self, aspect_ratio: float) -> Tuple[int, int]: """Calculate the maximum image height that can be rendered in the time budget diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py index b01c35f4b5..92e609f076 100644 --- a/nerfstudio/viewer_beta/viewer.py +++ b/nerfstudio/viewer_beta/viewer.py @@ -103,18 +103,13 @@ def __init__( self._prev_train_state: Literal["training", "paused", "completed"] = "training" self.last_move_time = 0 - self.viser_server = viser.ViserServer(host=config.websocket_host, port=websocket_port, share=share) + self.viser_server = viser.ViserServer(host=config.websocket_host, port=websocket_port) # Set the name of the URL either to the share link if available, or the localhost - # TODO: we should revisit this once a public API for share URL status is exposed in viser. - # https://github.com/nerfstudio-project/viser/issues/124 if share: - assert self.viser_server._share_tunnel is not None - while self.viser_server._share_tunnel._shared_state["status"] == "connecting": - # wait for connection before grabbing URL - time.sleep(0.01) - url_maybe = self.viser_server._share_tunnel.get_url() - if url_maybe is not None: - self.viewer_url = url_maybe + url = self.viser_server.request_share_url() + if url is not None: + print("Couldn't make share URL") + self.viewer_url = url else: self.viewer_url = f"http://{config.websocket_host}:{websocket_port}" else: @@ -154,6 +149,32 @@ def __init__( self.viser_server.on_client_disconnect(self.handle_disconnect) self.viser_server.on_client_connect(self.handle_new_client) + # Populate the header, which includes the pause button, train cam button, and stats + self.pause_train = self.viser_server.add_gui_button( + label="Pause Training", disabled=False, icon=viser.Icon.PLAYER_PAUSE_FILLED + ) + self.pause_train.on_click(lambda _: self.toggle_pause_button()) + self.pause_train.on_click(lambda han: self._toggle_training_state(han)) + self.resume_train = self.viser_server.add_gui_button( + label="Resume Training", disabled=False, icon=viser.Icon.PLAYER_PLAY_FILLED + ) + self.resume_train.on_click(lambda _: self.toggle_pause_button()) + self.resume_train.on_click(lambda han: self._toggle_training_state(han)) + self.resume_train.visible = False + # Add buttons to toggle training image visibility + self.hide_images = self.viser_server.add_gui_button( + label="Hide Train Cams", disabled=False, icon=viser.Icon.EYE_OFF, color=None + ) + self.hide_images.on_click(lambda _: self.set_camera_visibility(False)) + self.hide_images.on_click(lambda _: self.toggle_cameravis_button()) + self.show_images = self.viser_server.add_gui_button( + label="Show Train Cams", disabled=False, icon=viser.Icon.EYE, color=None + ) + self.show_images.on_click(lambda _: self.set_camera_visibility(True)) + self.show_images.on_click(lambda _: self.toggle_cameravis_button()) + self.show_images.visible = False + mkdown = self.make_stats_markdown(0, "0x0px") + self.stats_markdown = self.viser_server.add_gui_markdown(mkdown) tabs = self.viser_server.add_gui_tab_group() control_tab = tabs.add_tab("Control", viser.Icon.SETTINGS) with control_tab: @@ -165,8 +186,6 @@ def __init__( self._crop_params_update, self._output_type_change, self._output_split_type_change, - self._toggle_training_state, - self.set_camera_visibility, default_composite_depth=self.config.default_composite_depth, ) config_path = self.log_filename.parents[0] / "config.yml" @@ -218,6 +237,29 @@ def nested_folder_install(folder_labels: List[str], prev_labels: List[str], elem for c in self.viewer_controls: c._setup(self) + def toggle_pause_button(self) -> None: + self.pause_train.visible = not self.pause_train.visible + self.resume_train.visible = not self.resume_train.visible + + def toggle_cameravis_button(self) -> None: + self.hide_images.visible = not self.hide_images.visible + self.show_images.visible = not self.show_images.visible + + def make_stats_markdown(self, step: Optional[int], res: Optional[str]) -> str: + # if either are None, read it from the current stats_markdown content + if step is None: + step = int(self.stats_markdown.content.split("\n")[0].split(": ")[1]) + if res is None: + res = (self.stats_markdown.content.split("\n")[1].split(": ")[1]).strip() + return f"Step: {step} \nResolution: {res}" + + def update_step(self, step): + """ + Args: + step: the train step to set the model to + """ + self.stats_markdown.content = self.make_stats_markdown(step, None) + def get_camera_state(self, client: viser.ClientHandle) -> CameraState: R = vtf.SO3(wxyz=client.camera.wxyz) R = R @ vtf.SO3.from_x_radians(np.pi) @@ -403,7 +445,7 @@ def update_scene(self, step: int, num_rays_per_batch: Optional[int] = None) -> N if camera_state is not None: self.render_statemachines[id].action(RenderAction("step", camera_state)) self.update_camera_poses() - self.control_panel.update_step(step) + self.update_step(step) def update_colormap_options(self, dimensions: int, dtype: type) -> None: """update the colormap options based on the current render diff --git a/pyproject.toml b/pyproject.toml index 3b6d69a84f..019ef6b6a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,7 +56,7 @@ dependencies = [ "torchvision>=0.14.1", "torchmetrics[image]>=1.0.1", "typing_extensions>=4.4.0", - "viser==0.1.12", + "viser==0.1.13", "nuscenes-devkit>=1.1.1", "wandb>=0.13.3", "xatlas", From a894c6d9383340de1642debea46e4e79dd66bb5c Mon Sep 17 00:00:00 2001 From: Boris Feld Date: Thu, 14 Dec 2023 17:47:51 +0100 Subject: [PATCH 092/101] Update first_nerf.md (#2677) Update documentation to mention that Comet visualizer can be used without the viwer --- docs/quickstart/first_nerf.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/quickstart/first_nerf.md b/docs/quickstart/first_nerf.md index 137bd0b235..67a84783fb 100644 --- a/docs/quickstart/first_nerf.md +++ b/docs/quickstart/first_nerf.md @@ -98,7 +98,7 @@ Each script will have some other minor quirks (like the training script datapars ## Comet / Tensorboard / WandB / Viewer -We support four different methods to track training progress, using the viewer [tensorboard](https://www.tensorflow.org/tensorboard), [Weights and Biases](https://wandb.ai/site), and [Comet](https://comet.com/?utm_source=nerf&utm_medium=referral&utm_content=nerf_docs). You can specify which visualizer to use by appending `--vis {viewer, tensorboard, wandb, viewer+wandb, viewer+tensorboard, viewer+comet}` to the training command. Simultaneously utilizing the viewer alongside wandb or tensorboard may cause stuttering issues during evaluation steps. The viewer only works for methods that are fast (ie. nerfacto, instant-ngp), for slower methods like NeRF, use the other loggers. +We support four different methods to track training progress, using the viewer [tensorboard](https://www.tensorflow.org/tensorboard), [Weights and Biases](https://wandb.ai/site), and [Comet](https://comet.com/?utm_source=nerf&utm_medium=referral&utm_content=nerf_docs). You can specify which visualizer to use by appending `--vis {viewer, tensorboard, wandb, comet, viewer+wandb, viewer+tensorboard, viewer+comet}` to the training command. Simultaneously utilizing the viewer alongside wandb or tensorboard may cause stuttering issues during evaluation steps. The viewer only works for methods that are fast (ie. nerfacto, instant-ngp), for slower methods like NeRF, use the other loggers. ## Evaluating Runs From 622e26b511545bc583ee5122bf4a9dc6bfb63034 Mon Sep 17 00:00:00 2001 From: Tobias Fischer <36965290+tobiasfshr@users.noreply.github.com> Date: Thu, 14 Dec 2023 19:07:41 +0100 Subject: [PATCH 093/101] [Fix] Pixel Sampler setup in VanillaDataManager for Cameras incl. metadata (#2678) fix pixel sampler setup for cameras with metadata other than fisheye crop radius Co-authored-by: Matias Turkulainen <30566358+maturk@users.noreply.github.com> --- nerfstudio/data/datamanagers/base_datamanager.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py index 47144c00e2..bf12bd6041 100644 --- a/nerfstudio/data/datamanagers/base_datamanager.py +++ b/nerfstudio/data/datamanagers/base_datamanager.py @@ -476,9 +476,10 @@ def _get_pixel_sampler(self, dataset: TDataset, num_rays_per_batch: int) -> Pixe if is_equirectangular.any(): CONSOLE.print("[bold yellow]Warning: Some cameras are equirectangular, but using default pixel sampler.") - fisheye_crop_radius = ( - None if dataset.cameras.metadata is None else dataset.cameras.metadata["fisheye_crop_radius"] - ) + fisheye_crop_radius = None + if dataset.cameras.metadata is not None and "fisheye_crop_radius" in dataset.cameras.metadata: + fisheye_crop_radius = dataset.cameras.metadata["fisheye_crop_radius"] + return self.config.pixel_sampler.setup( is_equirectangular=is_equirectangular, num_rays_per_batch=num_rays_per_batch, From 88873ef3abac208d0fe6e90d06c044900762b01a Mon Sep 17 00:00:00 2001 From: Rohan Mathur Date: Thu, 14 Dec 2023 17:22:20 -0800 Subject: [PATCH 094/101] Added support for .tif and .tiff files in equirect utils (#2613) * added tiff to equirect utils * added tif files --- nerfstudio/process_data/equirect_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nerfstudio/process_data/equirect_utils.py b/nerfstudio/process_data/equirect_utils.py index 497cb32331..232d967332 100644 --- a/nerfstudio/process_data/equirect_utils.py +++ b/nerfstudio/process_data/equirect_utils.py @@ -304,7 +304,7 @@ def generate_planar_projections_from_equirectangular( with progress: for i in progress.track(os.listdir(frame_dir), description="", total=num_ims): - if i.lower().endswith((".jpg", ".png", ".jpeg")): + if i.lower().endswith((".jpg", ".png", ".jpeg", ".tiff", ".tif")): im = np.array(cv2.imread(os.path.join(frame_dir, i))) im = torch.tensor(im, dtype=torch.float32, device=device) im = torch.permute(im, (2, 0, 1)).unsqueeze(0) / 255.0 @@ -331,7 +331,7 @@ def compute_resolution_from_equirect(image_dir: Path, num_images: int) -> Tuple[ """ for i in os.listdir(image_dir): - if i.lower().endswith((".jpg", ".png", ".jpeg")): + if i.lower().endswith((".jpg", ".png", ".jpeg", ".tiff", ".tif")): im = np.array(cv2.imread(os.path.join(image_dir, i))) res_squared = (im.shape[0] * im.shape[1]) / num_images return (int(np.sqrt(res_squared)), int(np.sqrt(res_squared))) From ddedb920dc7c2d5cd4be4a042f9718f5bff22981 Mon Sep 17 00:00:00 2001 From: Justin Kerr Date: Thu, 14 Dec 2023 22:14:09 -0800 Subject: [PATCH 095/101] Gaussian splatting (#2521) * Adds Gaussian Splatting integration --------- Co-authored-by: Jake Austin jake-austin@berkeley.edu Co-authored-by: Brent Yi Co-authored-by: Zhuoyang Co-authored-by: Vickie Ye Co-authored-by: adamrashid96 Co-authored-by: Paul Wais Co-authored-by: Gina Wu Co-authored-by: Alexander Kristoffersen Co-authored-by: machenmusik Co-authored-by: Riley Co-authored-by: Gina Wu <42229107+ginazhouhuiwu@users.noreply.github.com> Co-authored-by: maturk Co-authored-by: Guangyun-Xu <38279883+Guangyun-Xu@users.noreply.github.com> Co-authored-by: Zhuoyang Pan <102644383+Zhuoyang-Pan@users.noreply.github.com> --- nerfstudio/cameras/camera_optimizers.py | 11 + nerfstudio/configs/base_config.py | 2 +- nerfstudio/configs/method_configs.py | 54 +- .../data/datamanagers/base_datamanager.py | 36 +- .../datamanagers/full_images_datamanager.py | 410 ++++++++++ .../data/datamanagers/parallel_datamanager.py | 11 +- .../random_cameras_datamanager.py | 9 +- .../data/dataparsers/colmap_dataparser.py | 2 +- .../data/dataparsers/nerfstudio_dataparser.py | 31 + nerfstudio/data/utils/dataloaders.py | 16 +- nerfstudio/engine/optimizers.py | 21 + nerfstudio/engine/trainer.py | 37 +- nerfstudio/exporter/exporter_utils.py | 2 + nerfstudio/model_components/losses.py | 4 + nerfstudio/models/base_model.py | 21 +- nerfstudio/models/gaussian_splatting.py | 764 ++++++++++++++++++ nerfstudio/pipelines/base_pipeline.py | 53 +- nerfstudio/scripts/exporter.py | 78 +- nerfstudio/scripts/render.py | 16 +- nerfstudio/scripts/viewer/run_viewer.py | 2 +- .../viewer/server/render_state_machine.py | 25 +- nerfstudio/viewer_beta/export_panel.py | 6 + .../viewer_beta/render_state_machine.py | 41 +- nerfstudio/viewer_beta/utils.py | 2 +- nerfstudio/viewer_beta/viewer.py | 10 +- pyproject.toml | 6 +- tests/test_train.py | 1 + 27 files changed, 1544 insertions(+), 127 deletions(-) create mode 100644 nerfstudio/data/datamanagers/full_images_datamanager.py create mode 100644 nerfstudio/models/gaussian_splatting.py diff --git a/nerfstudio/cameras/camera_optimizers.py b/nerfstudio/cameras/camera_optimizers.py index 7b76488842..4aa69a69b6 100644 --- a/nerfstudio/cameras/camera_optimizers.py +++ b/nerfstudio/cameras/camera_optimizers.py @@ -33,6 +33,7 @@ from nerfstudio.utils import poses as pose_utils from nerfstudio.engine.optimizers import OptimizerConfig from nerfstudio.engine.schedulers import SchedulerConfig +from nerfstudio.cameras.cameras import Cameras @dataclass @@ -146,6 +147,16 @@ def apply_to_raybundle(self, raybundle: RayBundle) -> None: raybundle.origins = raybundle.origins + correction_matrices[:, :3, 3] raybundle.directions = torch.bmm(correction_matrices[:, :3, :3], raybundle.directions[..., None]).squeeze() + def apply_to_camera(self, camera: Cameras) -> None: + """Apply the pose correction to the raybundle""" + if self.config.mode != "off": + assert camera.metadata is not None, "Must provide id of camera in its metadata" + assert "cam_idx" in camera.metadata, "Must provide id of camera in its metadata" + camera_idx = camera.metadata["cam_idx"] + adj = self([camera_idx]) # type: ignore + adj = torch.cat([adj, torch.Tensor([0, 0, 0, 1])[None, None].to(adj)], dim=1) + camera.camera_to_worlds = torch.bmm(camera.camera_to_worlds, adj) + def get_loss_dict(self, loss_dict: dict) -> None: """Add regularization""" if self.config.mode != "off": diff --git a/nerfstudio/configs/base_config.py b/nerfstudio/configs/base_config.py index c04c11aaac..85c205c5c5 100644 --- a/nerfstudio/configs/base_config.py +++ b/nerfstudio/configs/base_config.py @@ -144,7 +144,7 @@ class ViewerConfig(PrintableConfig): """Whether to kill the training job when it has completed. Note this will stop rendering in the viewer.""" image_format: Literal["jpeg", "png"] = "jpeg" """Image format viewer should use; jpeg is lossy compression, while png is lossless.""" - jpeg_quality: int = 90 + jpeg_quality: int = 75 """Quality tradeoff to use for jpeg compression.""" make_share_url: bool = False """Viewer beta feature: print a shareable URL. `vis` must be set to viewer_beta; this flag is otherwise ignored.""" diff --git a/nerfstudio/configs/method_configs.py b/nerfstudio/configs/method_configs.py index 74cabbafcb..2c3a8aac74 100644 --- a/nerfstudio/configs/method_configs.py +++ b/nerfstudio/configs/method_configs.py @@ -22,7 +22,6 @@ from typing import Dict import tyro -from nerfstudio.data.pixel_samplers import PairPixelSamplerConfig from nerfstudio.cameras.camera_optimizers import CameraOptimizerConfig from nerfstudio.configs.base_config import ViewerConfig @@ -49,6 +48,7 @@ from nerfstudio.data.datasets.depth_dataset import DepthDataset from nerfstudio.data.datasets.sdf_dataset import SDFDataset from nerfstudio.data.datasets.semantic_dataset import SemanticDataset +from nerfstudio.data.pixel_samplers import PairPixelSamplerConfig from nerfstudio.engine.optimizers import AdamOptimizerConfig, RAdamOptimizerConfig from nerfstudio.engine.schedulers import ( CosineDecaySchedulerConfig, @@ -59,8 +59,10 @@ from nerfstudio.field_components.temporal_distortions import TemporalDistortionKind from nerfstudio.fields.sdf_field import SDFFieldConfig from nerfstudio.models.depth_nerfacto import DepthNerfactoModelConfig +from nerfstudio.models.gaussian_splatting import GaussianSplattingModelConfig from nerfstudio.models.generfacto import GenerfactoModelConfig from nerfstudio.models.instant_ngp import InstantNGPModelConfig +from nerfstudio.data.dataparsers.colmap_dataparser import ColmapDataParserConfig from nerfstudio.models.mipnerf import MipNerfModel from nerfstudio.models.nerfacto import NerfactoModelConfig from nerfstudio.models.neus import NeuSModelConfig @@ -69,6 +71,7 @@ from nerfstudio.models.tensorf import TensoRFModelConfig from nerfstudio.models.vanilla_nerf import NeRFModel, VanillaModelConfig from nerfstudio.pipelines.base_pipeline import VanillaPipelineConfig +from nerfstudio.data.datamanagers.full_images_datamanager import FullImageDatamanagerConfig from nerfstudio.pipelines.dynamic_batch import DynamicBatchPipelineConfig from nerfstudio.plugins.registry import discover_methods @@ -87,6 +90,7 @@ "generfacto": "Generative Text to NeRF model", "neus": "Implementation of NeuS. (slow)", "neus-facto": "Implementation of NeuS-Facto. (slow)", + "gaussian-splatting": "Gaussian Splatting model", } method_configs["nerfacto"] = TrainerConfig( @@ -594,6 +598,54 @@ vis="viewer", ) +method_configs["gaussian-splatting"] = TrainerConfig( + method_name="gaussian-splatting", + steps_per_eval_image=100, + steps_per_eval_batch=100, + steps_per_save=2000, + steps_per_eval_all_images=1000, + max_num_iterations=30000, + mixed_precision=False, + gradient_accumulation_steps={"camera_opt": 100, "color": 2}, + pipeline=VanillaPipelineConfig( + datamanager=FullImageDatamanagerConfig( + dataparser=ColmapDataParserConfig(load_3D_points=True), + ), + model=GaussianSplattingModelConfig(), + ), + optimizers={ + "xyz": { + "optimizer": AdamOptimizerConfig(lr=1.6e-4, eps=1e-15), + "scheduler": ExponentialDecaySchedulerConfig( + lr_final=1.6e-6, + max_steps=30000, + ), + }, + "color": { + "optimizer": AdamOptimizerConfig(lr=5e-4, eps=1e-15), + "scheduler": ExponentialDecaySchedulerConfig( + lr_final=1e-4, + max_steps=30000, + ), + }, + "opacity": { + "optimizer": AdamOptimizerConfig(lr=0.05, eps=1e-15), + "scheduler": None, + }, + "scaling": { + "optimizer": AdamOptimizerConfig(lr=0.005, eps=1e-15), + "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-3, max_steps=30000), + }, + "rotation": {"optimizer": AdamOptimizerConfig(lr=0.001, eps=1e-15), "scheduler": None}, + "camera_opt": { + "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15), + "scheduler": ExponentialDecaySchedulerConfig(lr_final=5e-5, max_steps=30000), + }, + }, + viewer=ViewerConfig(num_rays_per_chunk=1 << 15), + vis="viewer_beta", +) + def merge_methods(methods, method_descriptions, new_methods, new_descriptions, overwrite=True): """Merge new methods and descriptions into existing methods and descriptions. diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py index bf12bd6041..67180bea60 100644 --- a/nerfstudio/data/datamanagers/base_datamanager.py +++ b/nerfstudio/data/datamanagers/base_datamanager.py @@ -21,12 +21,13 @@ from abc import abstractmethod from collections import defaultdict from dataclasses import dataclass, field -from pathlib import Path from functools import cached_property +from pathlib import Path from typing import ( Any, Callable, Dict, + ForwardRef, Generic, List, Literal, @@ -35,9 +36,8 @@ Type, Union, cast, - ForwardRef, - get_origin, get_args, + get_origin, ) import torch @@ -47,7 +47,7 @@ from typing_extensions import TypeVar from nerfstudio.cameras.camera_optimizers import CameraOptimizerConfig -from nerfstudio.cameras.cameras import CameraType +from nerfstudio.cameras.cameras import Cameras, CameraType from nerfstudio.cameras.rays import RayBundle from nerfstudio.configs.base_config import InstantiateConfig from nerfstudio.configs.dataparser_configs import AnnotatedDataParserUnion @@ -55,9 +55,9 @@ from nerfstudio.data.dataparsers.blender_dataparser import BlenderDataParserConfig from nerfstudio.data.datasets.base_dataset import InputDataset from nerfstudio.data.pixel_samplers import ( + PatchPixelSamplerConfig, PixelSampler, PixelSamplerConfig, - PatchPixelSamplerConfig, ) from nerfstudio.data.utils.dataloaders import ( CacheDataloader, @@ -67,9 +67,8 @@ from nerfstudio.data.utils.nerfstudio_collate import nerfstudio_collate from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes from nerfstudio.model_components.ray_generators import RayGenerator -from nerfstudio.utils.misc import IterableWrapper +from nerfstudio.utils.misc import IterableWrapper, get_orig_class from nerfstudio.utils.rich_utils import CONSOLE -from nerfstudio.utils.misc import get_orig_class def variable_res_collate(batch: List[Dict]) -> Dict: @@ -131,7 +130,7 @@ class DataManager(nn.Module): To get data, use the next_train and next_eval functions. This data manager's next_train and next_eval methods will return 2 things: - 1. A Raybundle: This will contain the rays we are sampling, with latents and + 1. 'rays': This will contain the rays or camera we are sampling, with latents and conditionals attached (everything needed at inference) 2. A "batch" of auxiliary information: This will contain the mask, the ground truth pixels, etc needed to actually train, score, etc the model @@ -246,7 +245,7 @@ def setup_eval(self): """Sets up the data manager for evaluation""" @abstractmethod - def next_train(self, step: int) -> Tuple[RayBundle, Dict]: + def next_train(self, step: int) -> Tuple[Union[RayBundle, Cameras], Dict]: """Returns the next batch of data from the train data manager. Args: @@ -258,25 +257,25 @@ def next_train(self, step: int) -> Tuple[RayBundle, Dict]: raise NotImplementedError @abstractmethod - def next_eval(self, step: int) -> Tuple[RayBundle, Dict]: + def next_eval(self, step: int) -> Tuple[Union[RayBundle, Cameras], Dict]: """Returns the next batch of data from the eval data manager. Args: step: the step number of the eval image to retrieve Returns: - A tuple of the ray bundle for the image, and a dictionary of additional batch information + A tuple of the ray/camera for the image, and a dictionary of additional batch information such as the groundtruth image. """ raise NotImplementedError @abstractmethod - def next_eval_image(self, step: int) -> Tuple[int, RayBundle, Dict]: + def next_eval_image(self, step: int) -> Tuple[Cameras, Dict]: """Retrieve the next eval image. Args: step: the step number of the eval image to retrieve Returns: - A tuple of the step number, the ray bundle for the image, and a dictionary of + A tuple of the step number, the ray/camera for the image, and a dictionary of additional batch information such as the groundtruth image. """ raise NotImplementedError @@ -313,7 +312,7 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]: @dataclass class VanillaDataManagerConfig(DataManagerConfig): - """A basic data manager""" + """A basic data manager for a ray-based model""" _target: Type = field(default_factory=lambda: VanillaDataManager) """Target class to instantiate.""" @@ -555,11 +554,10 @@ def next_eval(self, step: int) -> Tuple[RayBundle, Dict]: ray_bundle = self.eval_ray_generator(ray_indices) return ray_bundle, batch - def next_eval_image(self, step: int) -> Tuple[int, RayBundle, Dict]: - for camera_ray_bundle, batch in self.eval_dataloader: - assert camera_ray_bundle.camera_indices is not None - image_idx = int(camera_ray_bundle.camera_indices[0, 0, 0]) - return image_idx, camera_ray_bundle, batch + def next_eval_image(self, step: int) -> Tuple[Cameras, Dict]: + for camera, batch in self.eval_dataloader: + assert camera.shape[0] == 1 + return camera, batch raise ValueError("No more eval images") def get_train_rays_per_batch(self) -> int: diff --git a/nerfstudio/data/datamanagers/full_images_datamanager.py b/nerfstudio/data/datamanagers/full_images_datamanager.py new file mode 100644 index 0000000000..35837f05a1 --- /dev/null +++ b/nerfstudio/data/datamanagers/full_images_datamanager.py @@ -0,0 +1,410 @@ +# Copyright 2022 The Nerfstudio Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Data manager that outputs cameras / images instead of raybundles + +Good for things like gaussian splatting which require full cameras instead of the standard ray +paradigm +""" + +from __future__ import annotations + +import random +from dataclasses import dataclass, field +from functools import cached_property +from pathlib import Path +from typing import Dict, ForwardRef, Generic, List, Literal, Optional, Tuple, Type, Union, cast, get_args, get_origin + +import cv2 +import numpy as np +import torch +from copy import deepcopy +from torch.nn import Parameter +from tqdm import tqdm + +from nerfstudio.cameras.cameras import Cameras, CameraType +from nerfstudio.configs.dataparser_configs import AnnotatedDataParserUnion +from nerfstudio.data.datamanagers.base_datamanager import DataManager, DataManagerConfig, TDataset +from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs +from nerfstudio.data.dataparsers.nerfstudio_dataparser import NerfstudioDataParserConfig +from nerfstudio.data.datasets.base_dataset import InputDataset +from nerfstudio.utils.misc import get_orig_class +from nerfstudio.utils.rich_utils import CONSOLE + + +@dataclass +class FullImageDatamanagerConfig(DataManagerConfig): + _target: Type = field(default_factory=lambda: FullImageDatamanager) + dataparser: AnnotatedDataParserUnion = NerfstudioDataParserConfig() + camera_res_scale_factor: float = 1.0 + """The scale factor for scaling spatial data such as images, mask, semantics + along with relevant information about camera intrinsics + """ + eval_num_images_to_sample_from: int = -1 + """Number of images to sample during eval iteration.""" + eval_num_times_to_repeat_images: int = -1 + """When not evaluating on all images, number of iterations before picking + new images. If -1, never pick new images.""" + eval_image_indices: Optional[Tuple[int, ...]] = (0,) + """Specifies the image indices to use during eval; if None, uses all.""" + cache_images: Literal["no-cache", "cpu", "gpu"] = "cpu" + """Whether to cache images in memory. If "numpy", caches as numpy arrays, if "torch", caches as torch tensors.""" + + +class FullImageDatamanager(DataManager, Generic[TDataset]): + """ + A datamanager that outputs full images and cameras instead of raybundles. This makes the + datamanager more lightweight since we don't have to do generate rays. Useful for full-image + training e.g. rasterization pipelines + """ + + config: FullImageDatamanagerConfig + train_dataset: TDataset + eval_dataset: TDataset + + def __init__( + self, + config: FullImageDatamanagerConfig, + device: Union[torch.device, str] = "cpu", + test_mode: Literal["test", "val", "inference"] = "val", + world_size: int = 1, + local_rank: int = 0, + **kwargs, + ): + self.config = config + self.device = device + self.world_size = world_size + self.local_rank = local_rank + self.sampler = None + self.test_mode = test_mode + self.test_split = "test" if test_mode in ["test", "inference"] else "val" + self.dataparser_config = self.config.dataparser + if self.config.data is not None: + self.config.dataparser.data = Path(self.config.data) + else: + self.config.data = self.config.dataparser.data + self.dataparser = self.dataparser_config.setup() + if test_mode == "inference": + self.dataparser.downscale_factor = 1 # Avoid opening images + self.includes_time = self.dataparser.includes_time + + self.train_dataparser_outputs: DataparserOutputs = self.dataparser.get_dataparser_outputs(split="train") + self.train_dataset = self.create_train_dataset() + self.eval_dataset = self.create_eval_dataset() + if len(self.train_dataset) > 500 and self.config.cache_images == "gpu": + CONSOLE.print("Train dataset has over 500 images, overriding cach_images to cpu", style="bold yellow") + self.config.cache_images = "cpu" + self.cached_train, self.cached_eval = self.cache_images(self.config.cache_images) + self.exclude_batch_keys_from_device = self.train_dataset.exclude_batch_keys_from_device + if self.config.masks_on_gpu is True: + self.exclude_batch_keys_from_device.remove("mask") + if self.config.images_on_gpu is True: + self.exclude_batch_keys_from_device.remove("image") + + # Some logic to make sure we sample every camera in equal amounts + self.train_unseen_cameras = [i for i in range(len(self.train_dataset))] + self.eval_unseen_cameras = [i for i in range(len(self.eval_dataset))] + assert len(self.train_unseen_cameras) > 0, "No data found in dataset" + + super().__init__() + + def cache_images(self, cache_images_option): + cached_train = [] + CONSOLE.log("Caching / undistorting train images") + for i in tqdm(range(len(self.train_dataset)), leave=False): + # cv2.undistort the images / cameras + data = self.train_dataset.get_data(i) + camera = self.train_dataset.cameras[i].reshape(()) + K = camera.get_intrinsics_matrices().numpy() + if camera.distortion_params is None: + continue + distortion_params = camera.distortion_params.numpy() + image = data["image"].numpy() + + if camera.camera_type.item() == CameraType.PERSPECTIVE.value: + distortion_params = np.array( + [ + distortion_params[0], + distortion_params[1], + distortion_params[4], + distortion_params[5], + distortion_params[2], + distortion_params[3], + 0, + 0, + ] + ) + newK, roi = cv2.getOptimalNewCameraMatrix(K, distortion_params, (image.shape[1], image.shape[0]), 0) + image = cv2.undistort(image, K, distortion_params, None, newK) # type: ignore + # crop the image and update the intrinsics accordingly + x, y, w, h = roi + image = image[y : y + h, x : x + w] + if "mask" in data: + data["mask"] = data["mask"][y : y + h, x : x + w] + if "depth_image" in data: + data["depth_image"] = data["depth_image"][y : y + h, x : x + w] + # update the width, height + self.train_dataset.cameras.width[i] = w + self.train_dataset.cameras.height[i] = h + if "mask" in data: + mask = data["mask"].numpy() + mask = mask.astype(np.uint8) * 255 + mask = cv2.undistort(mask, K, distortion_params, None, newK) # type: ignore + mask = mask[y : y + h, x : x + w] + data["mask"] = torch.from_numpy(mask).bool() + K = newK + + elif camera.camera_type.item() == CameraType.FISHEYE.value: + distortion_params = np.array( + [distortion_params[0], distortion_params[1], distortion_params[2], distortion_params[3]] + ) + newK = cv2.fisheye.estimateNewCameraMatrixForUndistortRectify( + K, distortion_params, (image.shape[1], image.shape[0]), np.eye(3), balance=0 + ) + map1, map2 = cv2.fisheye.initUndistortRectifyMap( + K, distortion_params, np.eye(3), newK, (image.shape[1], image.shape[0]), cv2.CV_32FC1 + ) + # and then remap: + image = cv2.remap(image, map1, map2, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT) + if "mask" in data: + mask = data["mask"].numpy() + mask = mask.astype(np.uint8) * 255 + mask = cv2.fisheye.undistortImage(mask, K, distortion_params, None, newK) + data["mask"] = torch.from_numpy(mask).bool() + K = newK + else: + raise NotImplementedError("Only perspective and fisheye cameras are supported") + data["image"] = torch.from_numpy(image) + + cached_train.append(data) + + self.train_dataset.cameras.fx[i] = float(K[0, 0]) + self.train_dataset.cameras.fy[i] = float(K[1, 1]) + self.train_dataset.cameras.cx[i] = float(K[0, 2]) + self.train_dataset.cameras.cy[i] = float(K[1, 2]) + + cached_eval = [] + CONSOLE.log("Caching / undistorting eval images") + for i in tqdm(range(len(self.eval_dataset)), leave=False): + # cv2.undistort the images / cameras + data = self.eval_dataset.get_data(i) + camera = self.eval_dataset.cameras[i].reshape(()) + K = camera.get_intrinsics_matrices().numpy() + if camera.distortion_params is None: + continue + distortion_params = camera.distortion_params.numpy() + image = data["image"].numpy() + + if camera.camera_type.item() == CameraType.PERSPECTIVE.value: + distortion_params = np.array( + [ + distortion_params[0], + distortion_params[1], + distortion_params[4], + distortion_params[5], + distortion_params[2], + distortion_params[3], + 0, + 0, + ] + ) + newK, roi = cv2.getOptimalNewCameraMatrix(K, distortion_params, (image.shape[1], image.shape[0]), 0) + image = cv2.undistort(image, K, distortion_params, None, newK) # type: ignore + # crop the image and update the intrinsics accordingly + x, y, w, h = roi + image = image[y : y + h, x : x + w] + # update the width, height + self.eval_dataset.cameras.width[i] = w + self.eval_dataset.cameras.height[i] = h + if "mask" in data: + mask = data["mask"].numpy() + mask = mask.astype(np.uint8) * 255 + mask = cv2.undistort(mask, K, distortion_params, None, newK) # type: ignore + mask = mask[y : y + h, x : x + w] + data["mask"] = torch.from_numpy(mask).bool() + K = newK + + elif camera.camera_type.item() == CameraType.FISHEYE.value: + distortion_params = np.array( + [distortion_params[0], distortion_params[1], distortion_params[2], distortion_params[3]] + ) + newK = cv2.fisheye.estimateNewCameraMatrixForUndistortRectify( + K, distortion_params, (image.shape[1], image.shape[0]), np.eye(3), balance=0 + ) + map1, map2 = cv2.fisheye.initUndistortRectifyMap( + K, distortion_params, np.eye(3), newK, (image.shape[1], image.shape[0]), cv2.CV_32FC1 + ) + # and then remap: + image = cv2.remap(image, map1, map2, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT) + if "mask" in data: + mask = data["mask"].numpy() + mask = mask.astype(np.uint8) * 255 + mask = cv2.fisheye.undistortImage(mask, K, distortion_params, None, newK) + data["mask"] = torch.from_numpy(mask).bool() + K = newK + else: + raise NotImplementedError("Only perspective and fisheye cameras are supported") + data["image"] = torch.from_numpy(image) + + cached_eval.append(data) + + self.eval_dataset.cameras.fx[i] = float(K[0, 0]) + self.eval_dataset.cameras.fy[i] = float(K[1, 1]) + self.eval_dataset.cameras.cx[i] = float(K[0, 2]) + self.eval_dataset.cameras.cy[i] = float(K[1, 2]) + + if cache_images_option == "gpu": + for cache in cached_train: + cache["image"] = cache["image"].to(self.device) + if "mask" in cache: + cache["mask"] = cache["mask"].to(self.device) + for cache in cached_eval: + cache["image"] = cache["image"].to(self.device) + if "mask" in cache: + cache["mask"] = cache["mask"].to(self.device) + else: + for cache in cached_train: + cache["image"] = cache["image"].pin_memory() + if "mask" in cache: + cache["mask"] = cache["mask"].pin_memory() + for cache in cached_eval: + cache["image"] = cache["image"].pin_memory() + if "mask" in cache: + cache["mask"] = cache["mask"].pin_memory() + + return cached_train, cached_eval + + def create_train_dataset(self) -> TDataset: + """Sets up the data loaders for training""" + return self.dataset_type( + dataparser_outputs=self.train_dataparser_outputs, + scale_factor=self.config.camera_res_scale_factor, + ) + + def create_eval_dataset(self) -> TDataset: + """Sets up the data loaders for evaluation""" + return self.dataset_type( + dataparser_outputs=self.dataparser.get_dataparser_outputs(split=self.test_split), + scale_factor=self.config.camera_res_scale_factor, + ) + + @cached_property + def dataset_type(self) -> Type[TDataset]: + """Returns the dataset type passed as the generic argument""" + default: Type[TDataset] = cast(TDataset, TDataset.__default__) # type: ignore + orig_class: Type[FullImageDatamanager] = get_orig_class(self, default=None) # type: ignore + if type(self) is FullImageDatamanager and orig_class is None: + return default + if orig_class is not None and get_origin(orig_class) is FullImageDatamanager: + return get_args(orig_class)[0] + + # For inherited classes, we need to find the correct type to instantiate + for base in getattr(self, "__orig_bases__", []): + if get_origin(base) is FullImageDatamanager: + for value in get_args(base): + if isinstance(value, ForwardRef): + if value.__forward_evaluated__: + value = value.__forward_value__ + elif value.__forward_module__ is None: + value.__forward_module__ = type(self).__module__ + value = getattr(value, "_evaluate")(None, None, set()) + assert isinstance(value, type) + if issubclass(value, InputDataset): + return cast(Type[TDataset], value) + return default + + def get_datapath(self) -> Path: + return self.config.dataparser.data + + def setup_train(self): + """Sets up the data loaders for training""" + + def setup_eval(self): + """Sets up the data loader for evaluation""" + + @property + def fixed_indices_eval_dataloader(self) -> List[Tuple[Cameras, Dict]]: + """ + Pretends to be the dataloader for evaluation, it returns a list of (camera, data) tuples + """ + image_indices = list(range(len(self.eval_unseen_cameras))) + data = deepcopy(self.cached_eval) + _cameras = deepcopy(self.eval_dataset.cameras).to(self.device) + cameras = [] + for i in image_indices: + data[i]["image"] = data[i]["image"].to(self.device) + cameras.append(_cameras[i : i + 1]) + assert len(self.eval_dataset.cameras.shape) == 1, "Assumes single batch dimension" + return list(zip(cameras, data)) + + def get_param_groups(self) -> Dict[str, List[Parameter]]: + """Get the param groups for the data manager. + Returns: + A list of dictionaries containing the data manager's param groups. + """ + return {} + + def get_train_rays_per_batch(self): + # TODO: fix this to be the resolution of the last image rendered + return 800 * 800 + + def next_train(self, step: int) -> Tuple[Cameras, Dict]: + """Returns the next training batch + + Returns a Camera instead of raybundle""" + image_idx = self.train_unseen_cameras.pop(random.randint(0, len(self.train_unseen_cameras) - 1)) + # Make sure to re-populate the unseen cameras list if we have exhausted it + if len(self.train_unseen_cameras) == 0: + self.train_unseen_cameras = [i for i in range(len(self.train_dataset))] + + data = deepcopy(self.cached_train[image_idx]) + data["image"] = data["image"].to(self.device) + + assert len(self.train_dataset.cameras.shape) == 1, "Assumes single batch dimension" + camera = self.train_dataset.cameras[image_idx : image_idx + 1].to(self.device) + if camera.metadata is None: + camera.metadata = {} + camera.metadata["cam_idx"] = image_idx + return camera, data + + def next_eval(self, step: int) -> Tuple[Cameras, Dict]: + """Returns the next evaluation batch + + Returns a Camera instead of raybundle""" + image_idx = self.eval_unseen_cameras.pop(random.randint(0, len(self.eval_unseen_cameras) - 1)) + # Make sure to re-populate the unseen cameras list if we have exhausted it + if len(self.eval_unseen_cameras) == 0: + self.eval_unseen_cameras = [i for i in range(len(self.eval_dataset))] + data = deepcopy(self.cached_eval[image_idx]) + data["image"] = data["image"].to(self.device) + assert len(self.eval_dataset.cameras.shape) == 1, "Assumes single batch dimension" + camera = self.eval_dataset.cameras[image_idx : image_idx + 1].to(self.device) + return camera, data + + def next_eval_image(self, step: int) -> Tuple[Cameras, Dict]: + """Returns the next evaluation batch + + Returns a Camera instead of raybundle + + TODO: Make sure this logic is consistent with the vanilladatamanager""" + image_idx = self.eval_unseen_cameras.pop(random.randint(0, len(self.eval_unseen_cameras) - 1)) + # Make sure to re-populate the unseen cameras list if we have exhausted it + if len(self.eval_unseen_cameras) == 0: + self.eval_unseen_cameras = [i for i in range(len(self.eval_dataset))] + data = deepcopy(self.cached_eval[image_idx]) + data["image"] = data["image"].to(self.device) + assert len(self.eval_dataset.cameras.shape) == 1, "Assumes single batch dimension" + camera = self.eval_dataset.cameras[image_idx : image_idx + 1].to(self.device) + return camera, data diff --git a/nerfstudio/data/datamanagers/parallel_datamanager.py b/nerfstudio/data/datamanagers/parallel_datamanager.py index 5c4d5c4a27..4e22bee15b 100644 --- a/nerfstudio/data/datamanagers/parallel_datamanager.py +++ b/nerfstudio/data/datamanagers/parallel_datamanager.py @@ -38,7 +38,7 @@ from rich.progress import track from torch.nn import Parameter -from nerfstudio.cameras.cameras import CameraType +from nerfstudio.cameras.cameras import Cameras, CameraType from nerfstudio.cameras.rays import RayBundle from nerfstudio.data.datamanagers.base_datamanager import ( DataManager, @@ -293,12 +293,11 @@ def next_eval(self, step: int) -> Tuple[RayBundle, Dict]: ray_bundle = self.eval_ray_generator(ray_indices) return ray_bundle, batch - def next_eval_image(self, step: int) -> Tuple[int, RayBundle, Dict]: + def next_eval_image(self, step: int) -> Tuple[Cameras, Dict]: """Retrieve the next eval image.""" - for camera_ray_bundle, batch in self.eval_dataloader: - assert camera_ray_bundle.camera_indices is not None - image_idx = int(camera_ray_bundle.camera_indices[0, 0, 0]) - return image_idx, camera_ray_bundle, batch + for camera, batch in self.eval_dataloader: + assert camera.shape[0] == 1 + return camera, batch raise ValueError("No more eval images") def get_train_rays_per_batch(self) -> int: diff --git a/nerfstudio/data/datamanagers/random_cameras_datamanager.py b/nerfstudio/data/datamanagers/random_cameras_datamanager.py index 4269b428ab..7704f2f161 100644 --- a/nerfstudio/data/datamanagers/random_cameras_datamanager.py +++ b/nerfstudio/data/datamanagers/random_cameras_datamanager.py @@ -290,11 +290,10 @@ def next_eval(self, step: int) -> Tuple[RayBundle, Dict]: return ray_bundle, {"vertical": vertical_rotation, "central": central_rotation} - def next_eval_image(self, step: int) -> Tuple[int, RayBundle, Dict]: - for camera_ray_bundle, batch in self.eval_dataloader: - assert camera_ray_bundle.camera_indices is not None - image_idx = int(camera_ray_bundle.camera_indices[0, 0, 0]) - return image_idx, camera_ray_bundle, batch + def next_eval_image(self, step: int) -> Tuple[Cameras, Dict]: + for camera, batch in self.eval_dataloader: + assert camera.shape[0] == 1 + return camera, batch raise ValueError("No more eval images") def get_train_rays_per_batch(self) -> int: diff --git a/nerfstudio/data/dataparsers/colmap_dataparser.py b/nerfstudio/data/dataparsers/colmap_dataparser.py index 071a11e1ea..02ddca2934 100644 --- a/nerfstudio/data/dataparsers/colmap_dataparser.py +++ b/nerfstudio/data/dataparsers/colmap_dataparser.py @@ -70,7 +70,7 @@ class ColmapDataParserConfig(DataParserConfig): """Path to masks directory. If not set, masks are not loaded.""" depths_path: Optional[Path] = None """Path to depth maps directory. If not set, depths are not loaded.""" - colmap_path: Path = Path("sparse/0") + colmap_path: Path = Path("colmap/sparse/0") """Path to the colmap reconstruction directory relative to the data path.""" load_3D_points: bool = False """Whether to load the 3D points from the colmap reconstruction.""" diff --git a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py index 9b4c2951ac..7f6f21b885 100644 --- a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py +++ b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py @@ -20,6 +20,7 @@ from typing import Literal, Optional, Type import numpy as np +import open3d as o3d import torch from PIL import Image @@ -318,6 +319,12 @@ def _generate_dataparser_outputs(self, split="train"): applied_scale = float(meta["applied_scale"]) scale_factor *= applied_scale + # Load 3D points + metadata = {} + if "ply_file_path" in meta: + ply_file_path = data_dir / meta["ply_file_path"] + metadata.update(self._load_3D_points(ply_file_path, transform_matrix, scale_factor)) + dataparser_outputs = DataparserOutputs( image_filenames=image_filenames, cameras=cameras, @@ -328,10 +335,34 @@ def _generate_dataparser_outputs(self, split="train"): metadata={ "depth_filenames": depth_filenames if len(depth_filenames) > 0 else None, "depth_unit_scale_factor": self.config.depth_unit_scale_factor, + **metadata, }, ) return dataparser_outputs + def _load_3D_points(self, ply_file_path: Path, transform_matrix: torch.Tensor, scale_factor: float): + pcd = o3d.io.read_point_cloud(str(ply_file_path)) + + points3D = torch.from_numpy(np.asarray(pcd.points, dtype=np.float32)) + points3D = ( + torch.cat( + ( + points3D, + torch.ones_like(points3D[..., :1]), + ), + -1, + ) + @ transform_matrix.T + ) + points3D *= scale_factor + points3D_rgb = torch.from_numpy((np.asarray(pcd.colors) * 255).astype(np.uint8)) + + out = { + "points3D_xyz": points3D, + "points3D_rgb": points3D_rgb, + } + return out + def _get_fname(self, filepath: Path, data_dir: Path, downsample_folder_prefix="images_") -> Path: """Get the filename of the image file. downsample_folder_prefix can be used to point to auxiliary image data, e.g. masks diff --git a/nerfstudio/data/utils/dataloaders.py b/nerfstudio/data/utils/dataloaders.py index e546a130b6..6a64ba738b 100644 --- a/nerfstudio/data/utils/dataloaders.py +++ b/nerfstudio/data/utils/dataloaders.py @@ -175,13 +175,17 @@ def __iter__(self): def __next__(self) -> Tuple[RayBundle, Dict]: """Returns the next batch of data""" - def get_camera(self, image_idx: int = 0) -> Cameras: + def get_camera(self, image_idx: int = 0) -> Tuple[Cameras, Dict]: """Get camera for the given image index Args: image_idx: Camera image index """ - return self.cameras[image_idx] + camera = self.cameras[image_idx : image_idx + 1] + batch = self.input_dataset[image_idx] + batch = get_dict_to_torch(batch, device=self.device, exclude=["image"]) + assert isinstance(batch, dict) + return camera, batch def get_data_from_image_idx(self, image_idx: int) -> Tuple[RayBundle, Dict]: """Returns the data for a specific image index. @@ -226,9 +230,9 @@ def __iter__(self): def __next__(self): if self.count < len(self.image_indices): image_idx = self.image_indices[self.count] - ray_bundle, batch = self.get_data_from_image_idx(image_idx) + camera, batch = self.get_camera(image_idx) self.count += 1 - return ray_bundle, batch + return camera, batch raise StopIteration @@ -245,5 +249,5 @@ def __iter__(self): def __next__(self): # choose a random image index image_idx = random.randint(0, len(self.cameras) - 1) - ray_bundle, batch = self.get_data_from_image_idx(image_idx) - return ray_bundle, batch + camera, batch = self.get_camera(image_idx) + return camera, batch diff --git a/nerfstudio/engine/optimizers.py b/nerfstudio/engine/optimizers.py index 9b8d48dba3..0a87947863 100644 --- a/nerfstudio/engine/optimizers.py +++ b/nerfstudio/engine/optimizers.py @@ -135,6 +135,12 @@ def zero_grad_all(self) -> None: for _, optimizer in self.optimizers.items(): optimizer.zero_grad() + def zero_grad_some(self, param_groups: List[str]) -> None: + """Zero the gradients for the given parameter groups.""" + for param_group in param_groups: + optimizer = self.optimizers[param_group] + optimizer.zero_grad() + def optimizer_scaler_step_all(self, grad_scaler: GradScaler) -> None: """Take an optimizer step using a grad scaler. @@ -149,6 +155,21 @@ def optimizer_scaler_step_all(self, grad_scaler: GradScaler) -> None: if any(any(p.grad is not None for p in g["params"]) for g in optimizer.param_groups): grad_scaler.step(optimizer) + def optimizer_scaler_step_some(self, grad_scaler: GradScaler, param_groups: List[str]) -> None: + """Take an optimizer step using a grad scaler ONLY on the specified param groups. + + Args: + grad_scaler: GradScaler to use + """ + for param_group in param_groups: + optimizer = self.optimizers[param_group] + max_norm = self.config[param_group]["optimizer"].max_norm + if max_norm is not None: + grad_scaler.unscale_(optimizer) + torch.nn.utils.clip_grad_norm_(self.parameters[param_group], max_norm) + if any(any(p.grad is not None for p in g["params"]) for g in optimizer.param_groups): + grad_scaler.step(optimizer) + def optimizer_step_all(self) -> None: """Run step for all optimizers.""" for param_group, optimizer in self.optimizers.items(): diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py index 2930db892e..c262887d8d 100644 --- a/nerfstudio/engine/trainer.py +++ b/nerfstudio/engine/trainer.py @@ -24,8 +24,8 @@ from dataclasses import dataclass, field from pathlib import Path from threading import Lock -from typing import Dict, List, Literal, Optional, Tuple, Type, cast - +from typing import Dict, List, Literal, Optional, Tuple, Type, cast, DefaultDict +from collections import defaultdict import torch from nerfstudio.configs.experiment_config import ExperimentConfig from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes, TrainingCallbackLocation @@ -80,8 +80,8 @@ class TrainerConfig(ExperimentConfig): """Path to checkpoint file.""" log_gradients: bool = False """Optionally log gradients during training""" - gradient_accumulation_steps: int = 1 - """Number of steps to accumulate gradients over.""" + gradient_accumulation_steps: Dict = field(default_factory=lambda: {}) + """Number of steps to accumulate gradients over. Contains a mapping of {param_group:num}""" class Trainer: @@ -118,7 +118,8 @@ def __init__(self, config: TrainerConfig, local_rank: int = 0, world_size: int = self.mixed_precision: bool = self.config.mixed_precision self.use_grad_scaler: bool = self.mixed_precision or self.config.use_grad_scaler self.training_state: Literal["training", "paused", "completed"] = "training" - self.gradient_accumulation_steps: int = self.config.gradient_accumulation_steps + self.gradient_accumulation_steps: DefaultDict = defaultdict(lambda: 1) + self.gradient_accumulation_steps.update(self.config.gradient_accumulation_steps) if self.device == "cpu": self.mixed_precision = False @@ -459,19 +460,23 @@ def train_iteration(self, step: int) -> TRAIN_INTERATION_OUTPUT: step: Current training step. """ - self.optimizers.zero_grad_all() + needs_zero = [ + group for group in self.optimizers.parameters.keys() if step % self.gradient_accumulation_steps[group] == 0 + ] + self.optimizers.zero_grad_some(needs_zero) cpu_or_cuda_str: str = self.device.split(":")[0] cpu_or_cuda_str = "cpu" if cpu_or_cuda_str == "mps" else cpu_or_cuda_str - assert ( - self.gradient_accumulation_steps > 0 - ), f"gradient_accumulation_steps must be > 0, not {self.gradient_accumulation_steps}" - for _ in range(self.gradient_accumulation_steps): - with torch.autocast(device_type=cpu_or_cuda_str, enabled=self.mixed_precision): - _, loss_dict, metrics_dict = self.pipeline.get_train_loss_dict(step=step) - loss = functools.reduce(torch.add, loss_dict.values()) - loss /= self.gradient_accumulation_steps - self.grad_scaler.scale(loss).backward() # type: ignore - self.optimizers.optimizer_scaler_step_all(self.grad_scaler) + + with torch.autocast(device_type=cpu_or_cuda_str, enabled=self.mixed_precision): + _, loss_dict, metrics_dict = self.pipeline.get_train_loss_dict(step=step) + loss = functools.reduce(torch.add, loss_dict.values()) + self.grad_scaler.scale(loss).backward() # type: ignore + needs_step = [ + group + for group in self.optimizers.parameters.keys() + if step % self.gradient_accumulation_steps[group] == self.gradient_accumulation_steps[group] - 1 + ] + self.optimizers.optimizer_scaler_step_some(self.grad_scaler, needs_step) if self.config.log_gradients: total_grad = 0 diff --git a/nerfstudio/exporter/exporter_utils.py b/nerfstudio/exporter/exporter_utils.py index 66d6586901..56b4b64456 100644 --- a/nerfstudio/exporter/exporter_utils.py +++ b/nerfstudio/exporter/exporter_utils.py @@ -28,6 +28,7 @@ import pymeshlab import torch from jaxtyping import Float +from nerfstudio.cameras.rays import RayBundle from rich.progress import BarColumn, Progress, TaskProgressColumn, TextColumn, TimeRemainingColumn from torch import Tensor @@ -130,6 +131,7 @@ def generate_point_cloud( with torch.no_grad(): ray_bundle, _ = pipeline.datamanager.next_train(0) + assert isinstance(ray_bundle, RayBundle) outputs = pipeline.model(ray_bundle) if rgb_output_name not in outputs: CONSOLE.rule("Error", style="red") diff --git a/nerfstudio/model_components/losses.py b/nerfstudio/model_components/losses.py index e90ec8e613..d464669893 100644 --- a/nerfstudio/model_components/losses.py +++ b/nerfstudio/model_components/losses.py @@ -575,6 +575,10 @@ def depth_ranking_loss(rendered_depth, gt_depth): and rendered_depth are from pixels with a radius of each other """ m = 1e-4 + if rendered_depth.shape[0] % 2 != 0: + # chop off one index + rendered_depth = rendered_depth[:-1, :] + gt_depth = gt_depth[:-1, :] dpt_diff = gt_depth[::2, :] - gt_depth[1::2, :] out_diff = rendered_depth[::2, :] - rendered_depth[1::2, :] + m differing_signs = torch.sign(dpt_diff) != torch.sign(out_diff) diff --git a/nerfstudio/models/base_model.py b/nerfstudio/models/base_model.py index d7a2217443..2a434a85e8 100644 --- a/nerfstudio/models/base_model.py +++ b/nerfstudio/models/base_model.py @@ -13,7 +13,7 @@ # limitations under the License. """ -Base Model implementation which takes in RayBundles +Base Model implementation which takes in RayBundles or Cameras """ from __future__ import annotations @@ -27,10 +27,11 @@ from torch import nn from torch.nn import Parameter +from nerfstudio.cameras.cameras import Cameras from nerfstudio.cameras.rays import RayBundle from nerfstudio.configs.base_config import InstantiateConfig from nerfstudio.configs.config_utils import to_immutable_dict -from nerfstudio.data.scene_box import SceneBox +from nerfstudio.data.scene_box import SceneBox, OrientedBox from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes from nerfstudio.model_components.scene_colliders import NearFarCollider @@ -117,7 +118,7 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]: """ @abstractmethod - def get_outputs(self, ray_bundle: RayBundle) -> Dict[str, Union[torch.Tensor, List]]: + def get_outputs(self, ray_bundle: Union[RayBundle, Cameras]) -> Dict[str, Union[torch.Tensor, List]]: """Takes in a Ray Bundle and returns a dictionary of outputs. Args: @@ -128,7 +129,7 @@ def get_outputs(self, ray_bundle: RayBundle) -> Dict[str, Union[torch.Tensor, Li Outputs of model. (ie. rendered colors) """ - def forward(self, ray_bundle: RayBundle) -> Dict[str, Union[torch.Tensor, List]]: + def forward(self, ray_bundle: Union[RayBundle, Cameras]) -> Dict[str, Union[torch.Tensor, List]]: """Run forward starting with a ray bundle. This outputs different things depending on the configuration of the model and whether or not the batch is provided (whether or not we are training basically) @@ -161,6 +162,18 @@ def get_loss_dict(self, outputs, batch, metrics_dict=None) -> Dict[str, torch.Te metrics_dict: dictionary of metrics, some of which we can use for loss """ + @torch.no_grad() + def get_outputs_for_camera(self, camera: Cameras, obb_box: Optional[OrientedBox] = None) -> Dict[str, torch.Tensor]: + """Takes in a camera, generates the raybundle, and computes the output of the model. + Assumes a ray-based model. + + Args: + camera: generates raybundle + """ + return self.get_outputs_for_camera_ray_bundle( + camera.generate_rays(camera_indices=0, keep_shape=True, obb_box=obb_box) + ) + @torch.no_grad() def get_outputs_for_camera_ray_bundle(self, camera_ray_bundle: RayBundle) -> Dict[str, torch.Tensor]: """Takes in camera parameters and computes the output of the model. diff --git a/nerfstudio/models/gaussian_splatting.py b/nerfstudio/models/gaussian_splatting.py new file mode 100644 index 0000000000..fe081734ec --- /dev/null +++ b/nerfstudio/models/gaussian_splatting.py @@ -0,0 +1,764 @@ +# ruff: noqa: E741 +# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +NeRF implementation that combines many recent advancements. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Tuple, Type, Union +from nerfstudio.data.scene_box import OrientedBox + +import torch +from torch.nn import Parameter +from torchmetrics.image import PeakSignalNoiseRatio +from torchmetrics.image.lpip import LearnedPerceptualImagePatchSimilarity +import torchvision.transforms.functional as TF + +from nerfstudio.cameras.cameras import Cameras +from gsplat._torch_impl import quat_to_rotmat +from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes, TrainingCallbackLocation +from nerfstudio.engine.optimizers import Optimizers +from nerfstudio.models.base_model import Model, ModelConfig +import math +import numpy as np +from sklearn.neighbors import NearestNeighbors +from nerfstudio.cameras.camera_optimizers import CameraOptimizer, CameraOptimizerConfig + +from gsplat.rasterize import RasterizeGaussians +from gsplat.project_gaussians import ProjectGaussians +from gsplat.sh import SphericalHarmonics, num_sh_bases +from pytorch_msssim import SSIM + +# need following import for background color override +from nerfstudio.model_components import renderers + + +def random_quat_tensor(N): + """ + Defines a random quaternion tensor of shape (N, 4) + """ + u = torch.rand(N) + v = torch.rand(N) + w = torch.rand(N) + return torch.stack( + [ + torch.sqrt(1 - u) * torch.sin(2 * math.pi * v), + torch.sqrt(1 - u) * torch.cos(2 * math.pi * v), + torch.sqrt(u) * torch.sin(2 * math.pi * w), + torch.sqrt(u) * torch.sin(2 * math.pi * w), + ], + dim=-1, + ) + + +def RGB2SH(rgb): + """ + Converts from RGB values [0,1] to the 0th spherical harmonic coefficient + """ + C0 = 0.28209479177387814 + return (rgb - 0.5) / C0 + + +def SH2RGB(sh): + """ + Converts from the 0th spherical harmonic coefficient to RGB values [0,1] + """ + C0 = 0.28209479177387814 + return sh * C0 + 0.5 + + +def projection_matrix(znear, zfar, fovx, fovy, device: Union[str, torch.device] = "cpu"): + """ + Constructs an OpenGL-style perspective projection matrix. + """ + t = znear * math.tan(0.5 * fovy) + b = -t + r = znear * math.tan(0.5 * fovx) + l = -r + n = znear + f = zfar + return torch.tensor( + [ + [2 * n / (r - l), 0.0, (r + l) / (r - l), 0.0], + [0.0, 2 * n / (t - b), (t + b) / (t - b), 0.0], + [0.0, 0.0, (f + n) / (f - n), -1.0 * f * n / (f - n)], + [0.0, 0.0, 1.0, 0.0], + ], + device=device, + ) + + +@dataclass +class GaussianSplattingModelConfig(ModelConfig): + """Gaussian Splatting Model Config""" + + _target: Type = field(default_factory=lambda: GaussianSplattingModel) + warmup_length: int = 500 + """period of steps where refinement is turned off""" + refine_every: int = 100 + """period of steps where gaussians are culled and densified""" + resolution_schedule: int = 250 + """training starts at 1/d resolution, every n steps this is doubled""" + num_downscales: int = 2 + """at the beginning, resolution is 1/2^d, where d is this number""" + cull_alpha_thresh: float = 0.1 + """threshold of opacity for culling gaussians""" + cull_scale_thresh: float = 0.5 + """threshold of scale for culling gaussians""" + reset_alpha_every: int = 30 + """Every this many refinement steps, reset the alpha""" + densify_grad_thresh: float = 0.0002 + """threshold of positional gradient norm for densifying gaussians""" + densify_size_thresh: float = 0.01 + """below this size, gaussians are *duplicated*, otherwise split""" + n_split_samples: int = 2 + """number of samples to split gaussians into""" + sh_degree_interval: int = 1000 + """every n intervals turn on another sh degree""" + cull_screen_size: float = 0.15 + """if a gaussian is more than this percent of screen space, cull it""" + split_screen_size: float = 0.05 + """if a gaussian is more than this percent of screen space, split it""" + stop_screen_size_at: int = 4000 + """stop culling/splitting at this step WRT screen size of gaussians""" + random_init: bool = False + """whether to initialize the positions uniformly randomly (not SFM points)""" + ssim_lambda: float = 0.2 + """weight of ssim loss""" + stop_split_at: int = 15000 + """stop splitting at this step""" + sh_degree: int = 4 + """maximum degree of spherical harmonics to use""" + camera_optimizer: CameraOptimizerConfig = CameraOptimizerConfig(mode="off") + """camera optimizer config""" + max_gauss_ratio: float = 10.0 + """threshold of ratio of gaussian max to min scale before applying regularization + loss from the PhysGaussian paper + """ + + +class GaussianSplattingModel(Model): + """Gaussian Splatting model + + Args: + config: Gaussian Splatting configuration to instantiate model + """ + + config: GaussianSplattingModelConfig + + def __init__(self, *args, **kwargs): + if "seed_points" in kwargs: + self.seed_pts = kwargs["seed_points"] + else: + self.seed_pts = None + super().__init__(*args, **kwargs) + + def populate_modules(self): + if self.seed_pts is not None and not self.config.random_init: + self.means = torch.nn.Parameter(self.seed_pts[0]) # (Location, Color) + else: + self.means = torch.nn.Parameter((torch.rand((500000, 3)) - 0.5) * 10) + self.xys_grad_norm = None + self.max_2Dsize = None + distances, _ = self.k_nearest_sklearn(self.means.data, 3) + distances = torch.from_numpy(distances) + # find the average of the three nearest neighbors for each point and use that as the scale + avg_dist = distances.mean(dim=-1, keepdim=True) + self.scales = torch.nn.Parameter(torch.log(avg_dist.repeat(1, 3))) + self.quats = torch.nn.Parameter(random_quat_tensor(self.num_points)) + dim_sh = num_sh_bases(self.config.sh_degree) + + if self.seed_pts is not None and not self.config.random_init: + fused_color = RGB2SH(self.seed_pts[1] / 255) + shs = torch.zeros((fused_color.shape[0], dim_sh, 3)).float().cuda() + shs[:, 0, :3] = fused_color + shs[:, 1:, 3:] = 0.0 + self.colors_all = torch.nn.Parameter(shs) + else: + colors = torch.nn.Parameter(torch.rand(self.num_points, 1, 3)) + shs_rest = torch.nn.Parameter(torch.zeros((self.num_points, dim_sh - 1, 3))) + self.colors_all = torch.nn.Parameter(torch.cat([colors, shs_rest], dim=1)) + + self.opacities = torch.nn.Parameter(torch.logit(0.1 * torch.ones(self.num_points, 1))) + + # metrics + self.psnr = PeakSignalNoiseRatio(data_range=1.0) + self.ssim = SSIM(data_range=1.0, size_average=True, channel=3) + self.lpips = LearnedPerceptualImagePatchSimilarity(normalize=True) + self.step = 0 + + self.crop_box: Optional[OrientedBox] = None + self.back_color = torch.zeros(3) + + self.camera_optimizer: CameraOptimizer = self.config.camera_optimizer.setup( + num_cameras=self.num_train_data, device="cpu" + ) + + @property + def colors(self): + return SH2RGB(self.colors_all[:, 0, :]) + + @property + def shs_rest(self): + return self.colors_all[:, 1:, :] + + def load_state_dict(self, dict, **kwargs): # type: ignore + # resize the parameters to match the new number of points + self.step = 30000 + newp = dict["means"].shape[0] + self.means = torch.nn.Parameter(torch.zeros(newp, 3, device=self.device)) + self.scales = torch.nn.Parameter(torch.zeros(newp, 3, device=self.device)) + self.quats = torch.nn.Parameter(torch.zeros(newp, 4, device=self.device)) + self.opacities = torch.nn.Parameter(torch.zeros(newp, 1, device=self.device)) + self.colors_all = torch.nn.Parameter( + torch.zeros(newp, num_sh_bases(self.config.sh_degree), 3, device=self.device) + ) + super().load_state_dict(dict, **kwargs) + + def k_nearest_sklearn(self, x: torch.Tensor, k: int): + """ + Find k-nearest neighbors using sklearn's NearestNeighbors. + x: The data tensor of shape [num_samples, num_features] + k: The number of neighbors to retrieve + """ + # Convert tensor to numpy array + x_np = x.cpu().numpy() + + # Build the nearest neighbors model + nn_model = NearestNeighbors(n_neighbors=k + 1, algorithm="auto", metric="euclidean").fit(x_np) + + # Find the k-nearest neighbors + distances, indices = nn_model.kneighbors(x_np) + + # Exclude the point itself from the result and return + return distances[:, 1:].astype(np.float32), indices[:, 1:].astype(np.float32) + + def remove_from_optim(self, optimizer, deleted_mask, new_params): + """removes the deleted_mask from the optimizer provided""" + assert len(new_params) == 1 + # assert isinstance(optimizer, torch.optim.Adam), "Only works with Adam" + + param = optimizer.param_groups[0]["params"][0] + param_state = optimizer.state[param] + del optimizer.state[param] + + # Modify the state directly without deleting and reassigning. + param_state["exp_avg"] = param_state["exp_avg"][~deleted_mask] + param_state["exp_avg_sq"] = param_state["exp_avg_sq"][~deleted_mask] + + # Update the parameter in the optimizer's param group. + del optimizer.param_groups[0]["params"][0] + del optimizer.param_groups[0]["params"] + optimizer.param_groups[0]["params"] = new_params + optimizer.state[new_params[0]] = param_state + + def dup_in_optim(self, optimizer, dup_mask, new_params, n=2): + """adds the parameters to the optimizer""" + param = optimizer.param_groups[0]["params"][0] + param_state = optimizer.state[param] + repeat_dims = (n,) + tuple(1 for _ in range(param_state["exp_avg"].dim() - 1)) + param_state["exp_avg"] = torch.cat( + [param_state["exp_avg"], torch.zeros_like(param_state["exp_avg"][dup_mask.squeeze()]).repeat(*repeat_dims)], + dim=0, + ) + param_state["exp_avg_sq"] = torch.cat( + [ + param_state["exp_avg_sq"], + torch.zeros_like(param_state["exp_avg_sq"][dup_mask.squeeze()]).repeat(*repeat_dims), + ], + dim=0, + ) + del optimizer.state[param] + optimizer.state[new_params[0]] = param_state + optimizer.param_groups[0]["params"] = new_params + del param + + def after_train(self, step: int): + with torch.no_grad(): + # keep track of a moving average of grad norms + visible_mask = (self.radii > 0).flatten() + grads = self.xys.grad.detach().norm(dim=-1) # TODO fill in + # print(f"grad norm min {grads.min().item()} max {grads.max().item()} mean {grads.mean().item()} size {grads.shape}") + if self.xys_grad_norm is None: + self.xys_grad_norm = grads + self.vis_counts = torch.ones_like(self.xys_grad_norm) + else: + assert self.vis_counts is not None + self.vis_counts[visible_mask] = self.vis_counts[visible_mask] + 1 + self.xys_grad_norm[visible_mask] = grads[visible_mask] + self.xys_grad_norm[visible_mask] + + # update the max screen size, as a ratio of number of pixels + if self.max_2Dsize is None: + self.max_2Dsize = torch.zeros_like(self.radii, dtype=torch.float32) + newradii = self.radii.detach()[visible_mask] + self.max_2Dsize[visible_mask] = torch.maximum( + self.max_2Dsize[visible_mask], newradii / float(max(self.last_size[0], self.last_size[1])) + ) + + def set_crop(self, crop_box: Optional[OrientedBox]): + self.crop_box = crop_box + + def set_background(self, back_color: torch.Tensor): + assert back_color.shape == (3,) + self.back_color = back_color + + def refinement_after(self, optimizers: Optimizers, step): + if self.step >= self.config.warmup_length: + with torch.no_grad(): + # only split/cull if we've seen every image since opacity reset + reset_interval = self.config.reset_alpha_every * self.config.refine_every + if ( + self.step < self.config.stop_split_at + and self.step % reset_interval > self.num_train_data + self.config.refine_every + ): + # then we densify + assert ( + self.xys_grad_norm is not None and self.vis_counts is not None and self.max_2Dsize is not None + ) + avg_grad_norm = ( + (self.xys_grad_norm / self.vis_counts) * 0.5 * max(self.last_size[0], self.last_size[1]) + ) + high_grads = (avg_grad_norm > self.config.densify_grad_thresh).squeeze() + splits = (self.scales.exp().max(dim=-1).values > self.config.densify_size_thresh).squeeze() + if self.step < self.config.stop_screen_size_at: + splits |= (self.max_2Dsize > self.config.split_screen_size).squeeze() + splits &= high_grads + nsamps = self.config.n_split_samples + ( + split_means, + split_colors, + split_opacities, + split_scales, + split_quats, + ) = self.split_gaussians(splits, nsamps) + + dups = (self.scales.exp().max(dim=-1).values <= self.config.densify_size_thresh).squeeze() + dups &= high_grads + dup_means, dup_colors, dup_opacities, dup_scales, dup_quats = self.dup_gaussians(dups) + self.means = Parameter(torch.cat([self.means.detach(), split_means, dup_means], dim=0)) + self.colors_all = Parameter(torch.cat([self.colors_all.detach(), split_colors, dup_colors], dim=0)) + + self.opacities = Parameter( + torch.cat([self.opacities.detach(), split_opacities, dup_opacities], dim=0) + ) + self.scales = Parameter(torch.cat([self.scales.detach(), split_scales, dup_scales], dim=0)) + self.quats = Parameter(torch.cat([self.quats.detach(), split_quats, dup_quats], dim=0)) + # append zeros to the max_2Dsize tensor + self.max_2Dsize = torch.cat( + [self.max_2Dsize, torch.zeros_like(split_scales[:, 0]), torch.zeros_like(dup_scales[:, 0])], + dim=0, + ) + split_idcs = torch.where(splits)[0] + param_groups = self.get_gaussian_param_groups() + for group, param in param_groups.items(): + self.dup_in_optim(optimizers.optimizers[group], split_idcs, param, n=nsamps) + dup_idcs = torch.where(dups)[0] + + param_groups = self.get_gaussian_param_groups() + for group, param in param_groups.items(): + self.dup_in_optim(optimizers.optimizers[group], dup_idcs, param, 1) + + # Offset all the opacity reset logic by refine_every so that we don't + # save checkpoints right when the opacity is reset (saves every 2k) + if self.step % reset_interval > self.num_train_data + self.config.refine_every: + # then cull + deleted_mask = self.cull_gaussians() + param_groups = self.get_gaussian_param_groups() + for group, param in param_groups.items(): + self.remove_from_optim(optimizers.optimizers[group], deleted_mask, param) + + if self.step % reset_interval == self.config.refine_every: + reset_value = self.config.cull_alpha_thresh * 0.8 + self.opacities.data = torch.full_like( + self.opacities.data, torch.logit(torch.tensor(reset_value)).item() + ) + # reset the exp of optimizer + optim = optimizers.optimizers["opacity"] + param = optim.param_groups[0]["params"][0] + param_state = optim.state[param] + param_state["exp_avg"] = torch.zeros_like(param_state["exp_avg"]) + param_state["exp_avg_sq"] = torch.zeros_like(param_state["exp_avg_sq"]) + self.xys_grad_norm = None + self.vis_counts = None + self.max_2Dsize = None + + def cull_gaussians(self): + """ + This function deletes gaussians with under a certain opacity threshold + """ + n_bef = self.num_points + # cull transparent ones + culls = (torch.sigmoid(self.opacities) < self.config.cull_alpha_thresh).squeeze() + if self.step > self.config.refine_every * self.config.reset_alpha_every: + # cull huge ones + toobigs = (torch.exp(self.scales).max(dim=-1).values > self.config.cull_scale_thresh).squeeze() + culls = culls | toobigs + if self.step < self.config.stop_screen_size_at: + # cull big screen space + assert self.max_2Dsize is not None + culls = culls | (self.max_2Dsize > self.config.cull_screen_size).squeeze() + self.means = Parameter(self.means[~culls].detach()) + self.scales = Parameter(self.scales[~culls].detach()) + self.quats = Parameter(self.quats[~culls].detach()) + self.colors_all = Parameter(self.colors_all[~culls].detach()) + self.opacities = Parameter(self.opacities[~culls].detach()) + + print(f"Culled {n_bef - self.num_points} gaussians") + return culls + + def split_gaussians(self, split_mask, samps): + """ + This function splits gaussians that are too large + """ + + n_splits = split_mask.sum().item() + print(f"Splitting {split_mask.sum().item()/self.num_points} gaussians: {n_splits}/{self.num_points}") + centered_samples = torch.randn((samps * n_splits, 3), device=self.device) # Nx3 of axis-aligned scales + scaled_samples = ( + torch.exp(self.scales[split_mask].repeat(samps, 1)) * centered_samples + ) # how these scales are rotated + quats = self.quats[split_mask] / self.quats[split_mask].norm(dim=-1, keepdim=True) # normalize them first + rots = quat_to_rotmat(quats.repeat(samps, 1)) # how these scales are rotated + rotated_samples = torch.bmm(rots, scaled_samples[..., None]).squeeze() + new_means = rotated_samples + self.means[split_mask].repeat(samps, 1) + # step 2, sample new colors + new_colors_all = self.colors_all[split_mask].repeat(samps, 1, 1) + # step 3, sample new opacities + new_opacities = self.opacities[split_mask].repeat(samps, 1) + # step 4, sample new scales + size_fac = 1.6 + new_scales = torch.log(torch.exp(self.scales[split_mask]) / size_fac).repeat(samps, 1) + self.scales[split_mask] = torch.log(torch.exp(self.scales[split_mask]) / size_fac) + # step 5, sample new quats + new_quats = self.quats[split_mask].repeat(samps, 1) + return new_means, new_colors_all, new_opacities, new_scales, new_quats + + def dup_gaussians(self, dup_mask): + """ + This function duplicates gaussians that are too small + """ + n_dups = dup_mask.sum().item() + print(f"Duplicating {dup_mask.sum().item()/self.num_points} gaussians: {n_dups}/{self.num_points}") + dup_means = self.means[dup_mask] + dup_colors = self.colors_all[dup_mask] + dup_opacities = self.opacities[dup_mask] + dup_scales = self.scales[dup_mask] + dup_quats = self.quats[dup_mask] + return dup_means, dup_colors, dup_opacities, dup_scales, dup_quats + + @property + def num_points(self): + return self.means.shape[0] + + def get_training_callbacks( + self, training_callback_attributes: TrainingCallbackAttributes + ) -> List[TrainingCallback]: + cbs = [] + cbs.append(TrainingCallback([TrainingCallbackLocation.BEFORE_TRAIN_ITERATION], self.step_cb)) + # The order of these matters + cbs.append( + TrainingCallback( + [TrainingCallbackLocation.AFTER_TRAIN_ITERATION], + self.after_train, + ) + ) + cbs.append( + TrainingCallback( + [TrainingCallbackLocation.AFTER_TRAIN_ITERATION], + self.refinement_after, + update_every_num_iters=self.config.refine_every, + args=[training_callback_attributes.optimizers], + ) + ) + return cbs + + def step_cb(self, step): + self.step = step + + def get_gaussian_param_groups(self) -> Dict[str, List[Parameter]]: + return { + "xyz": [self.means], + "color": [self.colors_all], + "opacity": [self.opacities], + "scaling": [self.scales], + "rotation": [self.quats], + } + + def get_param_groups(self) -> Dict[str, List[Parameter]]: + """Obtain the parameter groups for the optimizers + + Returns: + Mapping of different parameter groups + """ + gps = self.get_gaussian_param_groups() + # add camera optimizer param groups + self.camera_optimizer.get_param_groups(gps) + return gps + + def _get_downscale_factor(self): + if self.training: + return 2 ** max((self.config.num_downscales - self.step // self.config.resolution_schedule), 0) + else: + return 1 + + def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: + """Takes in a Ray Bundle and returns a dictionary of outputs. + + Args: + ray_bundle: Input bundle of rays. This raybundle should have all the + needed information to compute the outputs. + + Returns: + Outputs of model. (ie. rendered colors) + """ + if not isinstance(camera, Cameras): + print("Called get_outputs with not a camera") + return {} + assert camera.shape[0] == 1, "Only one camera at a time" + if self.training: + # currently relies on the branch vickie/camera-grads + self.camera_optimizer.apply_to_camera(camera) + if self.training: + background = torch.rand(3, device=self.device) + else: + # logic for setting the background of the scene + if renderers.BACKGROUND_COLOR_OVERRIDE is not None: + background = renderers.BACKGROUND_COLOR_OVERRIDE + else: + background = self.back_color.to(self.device) + if self.crop_box is not None and not self.training: + crop_ids = self.crop_box.within(self.means).squeeze() + if crop_ids.sum() == 0: + return {"rgb": background.repeat(int(camera.height.item()), int(camera.width.item()), 1)} + else: + crop_ids = None + camera_downscale = self._get_downscale_factor() + camera.rescale_output_resolution(1 / camera_downscale) + # shift the camera to center of scene looking at center + R = camera.camera_to_worlds[0, :3, :3] # 3 x 3 + T = camera.camera_to_worlds[0, :3, 3:4] # 3 x 1 + # flip the z and y axes to align with gsplat conventions + R_edit = torch.diag(torch.tensor([1, -1, -1], device="cuda", dtype=R.dtype)) + R = R @ R_edit + # analytic matrix inverse to get world2camera matrix + R_inv = R.T + T_inv = -R_inv @ T + viewmat = torch.eye(4, device=R.device, dtype=R.dtype) + viewmat[:3, :3] = R_inv + viewmat[:3, 3:4] = T_inv + # calculate the FOV of the camera given fx and fy, width and height + cx = camera.cx.item() + cy = camera.cy.item() + fovx = 2 * math.atan(camera.width / (2 * camera.fx)) + fovy = 2 * math.atan(camera.height / (2 * camera.fy)) + W, H = camera.width.item(), camera.height.item() + self.last_size = (H, W) + projmat = projection_matrix(0.001, 1000, fovx, fovy, device=self.device) + BLOCK_X, BLOCK_Y = 16, 16 + tile_bounds = ( + (W + BLOCK_X - 1) // BLOCK_X, + (H + BLOCK_Y - 1) // BLOCK_Y, + 1, + ) + + if crop_ids is not None: + opacities_crop = self.opacities[crop_ids] + means_crop = self.means[crop_ids] + colors_crop = self.colors_all[crop_ids] + scales_crop = self.scales[crop_ids] + quats_crop = self.quats[crop_ids] + else: + opacities_crop = self.opacities + means_crop = self.means + colors_crop = self.colors_all + scales_crop = self.scales + quats_crop = self.quats + self.xys, depths, self.radii, conics, num_tiles_hit, _ = ProjectGaussians.apply( # type: ignore + means_crop, + torch.exp(scales_crop), + 1, + quats_crop / quats_crop.norm(dim=-1, keepdim=True), + viewmat.squeeze()[:3, :], + projmat.squeeze() @ viewmat.squeeze(), + camera.fx.item(), + camera.fy.item(), + cx, + cy, + H, + W, + tile_bounds, + ) + if (self.radii).sum() == 0: + return {"rgb": background.repeat(int(camera.height.item()), int(camera.width.item()), 1)} + + # Important to allow xys grads to populate properly + if self.training: + self.xys.retain_grad() + if self.config.sh_degree > 0: + viewdirs = means_crop.detach() - camera.camera_to_worlds.detach()[..., :3, 3] # (N, 3) + viewdirs = viewdirs / viewdirs.norm(dim=-1, keepdim=True) + n = min(self.step // self.config.sh_degree_interval, self.config.sh_degree) + rgbs = SphericalHarmonics.apply(n, viewdirs, colors_crop) + rgbs = torch.clamp(rgbs + 0.5, 0.0, 1.0) # type: ignore + else: + rgbs = self.get_colors.squeeze() # (N, 3) + rgbs = torch.sigmoid(rgbs) + rgb = RasterizeGaussians.apply( + self.xys, + depths, + self.radii, + conics, + num_tiles_hit, + rgbs, + torch.sigmoid(opacities_crop), + H, + W, + background, + ) + depth_im = None + if not self.training: + depth_im = RasterizeGaussians.apply( # type: ignore + self.xys, + depths, + self.radii, + conics, + num_tiles_hit, + depths[:, None].repeat(1, 3), + torch.sigmoid(opacities_crop), + H, + W, + torch.ones(3, device=self.device) * 10, + )[..., 0:1] + # rescale the camera back to original dimensions + camera.rescale_output_resolution(camera_downscale) + return {"rgb": rgb, "depth": depth_im} # type: ignore + + def get_metrics_dict(self, outputs, batch) -> Dict[str, torch.Tensor]: + """Compute and returns metrics. + + Args: + outputs: the output to compute loss dict to + batch: ground truth batch corresponding to outputs + """ + d = self._get_downscale_factor() + if d > 1: + newsize = [batch["image"].shape[0] // d, batch["image"].shape[1] // d] + gt_img = TF.resize(batch["image"].permute(2, 0, 1), newsize, antialias=None).permute(1, 2, 0) + else: + gt_img = batch["image"] + metrics_dict = {} + gt_rgb = gt_img.to(self.device) # RGB or RGBA image + predicted_rgb = outputs["rgb"] + metrics_dict["psnr"] = self.psnr(predicted_rgb, gt_rgb) + + self.camera_optimizer.get_metrics_dict(metrics_dict) + metrics_dict["gaussian_count"] = self.num_points + return metrics_dict + + def get_loss_dict(self, outputs, batch, metrics_dict=None) -> Dict[str, torch.Tensor]: + """Computes and returns the losses dict. + + Args: + outputs: the output to compute loss dict to + batch: ground truth batch corresponding to outputs + metrics_dict: dictionary of metrics, some of which we can use for loss + """ + d = self._get_downscale_factor() + if d > 1: + newsize = [batch["image"].shape[0] // d, batch["image"].shape[1] // d] + gt_img = TF.resize(batch["image"].permute(2, 0, 1), newsize, antialias=None).permute(1, 2, 0) + else: + gt_img = batch["image"] + Ll1 = torch.abs(gt_img - outputs["rgb"]).mean() + simloss = 1 - self.ssim(gt_img.permute(2, 0, 1)[None, ...], outputs["rgb"].permute(2, 0, 1)[None, ...]) + if self.step % 10 == 0: + # Before, we made split sh and colors onto different optimizer, with shs having a low learning rate + # This is slow, instead we apply a regularization every few steps + sh_reg = self.colors_all[:, 1:, :].norm(dim=1).mean() + scale_exp = torch.exp(self.scales) + scale_reg = ( + torch.maximum( + scale_exp.amax(dim=-1) / scale_exp.amin(dim=-1), torch.tensor(self.config.max_gauss_ratio) + ) + - self.config.max_gauss_ratio + ) + scale_reg = 0.1 * scale_reg.mean() + else: + sh_reg = torch.tensor(0.0).to(self.device) + scale_reg = torch.tensor(0.0).to(self.device) + return { + "main_loss": (1 - self.config.ssim_lambda) * Ll1 + self.config.ssim_lambda * simloss, + "sh_reg": sh_reg, + "scale_reg": scale_reg, + } + + @torch.no_grad() + def get_outputs_for_camera(self, camera: Cameras, obb_box: Optional[OrientedBox] = None) -> Dict[str, torch.Tensor]: + """Takes in a camera, generates the raybundle, and computes the output of the model. + Overridden for a camera-based gaussian model. + + Args: + camera: generates raybundle + """ + assert camera is not None, "must provide camera to gaussian model" + self.set_crop(obb_box) + outs = self.get_outputs(camera.to(self.device)) + return outs # type: ignore + + def get_image_metrics_and_images( + self, outputs: Dict[str, torch.Tensor], batch: Dict[str, torch.Tensor] + ) -> Tuple[Dict[str, float], Dict[str, torch.Tensor]]: + """Writes the test image outputs. + + Args: + image_idx: Index of the image. + step: Current step. + batch: Batch of data. + outputs: Outputs of the model. + + Returns: + A dictionary of metrics. + """ + d = self._get_downscale_factor() + if d > 1: + newsize = [batch["image"].shape[0] // d, batch["image"].shape[1] // d] + gt_img = TF.resize(batch["image"].permute(2, 0, 1), newsize, antialias=None).permute(1, 2, 0) + predicted_rgb = TF.resize(outputs["rgb"].permute(2, 0, 1), newsize, antialias=None).permute(1, 2, 0) + else: + gt_img = batch["image"] + predicted_rgb = outputs["rgb"] + + gt_rgb = gt_img.to(self.device) + + combined_rgb = torch.cat([gt_rgb, predicted_rgb], dim=1) + + # Switch images from [H, W, C] to [1, C, H, W] for metrics computations + gt_rgb = torch.moveaxis(gt_rgb, -1, 0)[None, ...] + predicted_rgb = torch.moveaxis(predicted_rgb, -1, 0)[None, ...] + + psnr = self.psnr(gt_rgb, predicted_rgb) + ssim = self.ssim(gt_rgb, predicted_rgb) + lpips = self.lpips(gt_rgb, predicted_rgb) + + # all of these metrics will be logged as scalars + metrics_dict = {"psnr": float(psnr.item()), "ssim": float(ssim)} # type: ignore + metrics_dict["lpips"] = float(lpips) + + images_dict = {"img": combined_rgb} + + return metrics_dict, images_dict diff --git a/nerfstudio/pipelines/base_pipeline.py b/nerfstudio/pipelines/base_pipeline.py index 345a39d4b6..b75adc6adb 100644 --- a/nerfstudio/pipelines/base_pipeline.py +++ b/nerfstudio/pipelines/base_pipeline.py @@ -26,18 +26,11 @@ import torch import torch.distributed as dist -from PIL import Image -from rich.progress import ( - BarColumn, - MofNCompleteColumn, - Progress, - TextColumn, - TimeElapsedColumn, -) +from rich.progress import BarColumn, MofNCompleteColumn, Progress, TextColumn, TimeElapsedColumn from torch import nn +from torch.cuda.amp.grad_scaler import GradScaler from torch.nn import Parameter from torch.nn.parallel import DistributedDataParallel as DDP -from torch.cuda.amp.grad_scaler import GradScaler from nerfstudio.configs import base_config as cfg from nerfstudio.data.datamanagers.base_datamanager import ( @@ -46,6 +39,7 @@ VanillaDataManager, ) from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManager +from nerfstudio.data.datamanagers.full_images_datamanager import FullImageDatamanager from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes from nerfstudio.models.base_model import Model, ModelConfig from nerfstudio.utils import profiler @@ -264,6 +258,15 @@ def __init__( self.datamanager: DataManager = config.datamanager.setup( device=device, test_mode=test_mode, world_size=world_size, local_rank=local_rank ) + # TODO make cleaner + seed_pts = None + if ( + hasattr(self.datamanager, "train_dataparser_outputs") + and "points3D_xyz" in self.datamanager.train_dataparser_outputs.metadata + ): + pts = self.datamanager.train_dataparser_outputs.metadata["points3D_xyz"] + pts_rgb = self.datamanager.train_dataparser_outputs.metadata["points3D_rgb"] + seed_pts = (pts, pts_rgb) self.datamanager.to(device) # TODO(ethan): get rid of scene_bounds from the model assert self.datamanager.train_dataset is not None, "Missing input dataset" @@ -274,6 +277,7 @@ def __init__( metadata=self.datamanager.train_dataset.metadata, device=device, grad_scaler=grad_scaler, + seed_points=seed_pts, ) self.model.to(device) @@ -335,13 +339,11 @@ def get_eval_image_metrics_and_images(self, step: int): step: current iteration step """ self.eval() - image_idx, camera_ray_bundle, batch = self.datamanager.next_eval_image(step) - outputs = self.model.get_outputs_for_camera_ray_bundle(camera_ray_bundle) + camera, batch = self.datamanager.next_eval_image(step) + outputs = self.model.get_outputs_for_camera(camera) metrics_dict, images_dict = self.model.get_image_metrics_and_images(outputs, batch) - assert "image_idx" not in metrics_dict - metrics_dict["image_idx"] = image_idx assert "num_rays" not in metrics_dict - metrics_dict["num_rays"] = len(camera_ray_bundle) + metrics_dict["num_rays"] = (camera.height * camera.width * camera.size).item() self.train() return metrics_dict, images_dict @@ -361,7 +363,7 @@ def get_average_eval_image_metrics( """ self.eval() metrics_dict_list = [] - assert isinstance(self.datamanager, (VanillaDataManager, ParallelDataManager)) + assert isinstance(self.datamanager, (VanillaDataManager, ParallelDataManager, FullImageDatamanager)) num_images = len(self.datamanager.fixed_indices_eval_dataloader) with Progress( TextColumn("[progress.description]{task.description}"), @@ -371,26 +373,21 @@ def get_average_eval_image_metrics( transient=True, ) as progress: task = progress.add_task("[green]Evaluating all eval images...", total=num_images) - for camera_ray_bundle, batch in self.datamanager.fixed_indices_eval_dataloader: + for camera, batch in self.datamanager.fixed_indices_eval_dataloader: # time this the following line inner_start = time() - height, width = camera_ray_bundle.shape + outputs = self.model.get_outputs_for_camera(camera=camera) + height, width = camera.height, camera.width num_rays = height * width - outputs = self.model.get_outputs_for_camera_ray_bundle(camera_ray_bundle) - metrics_dict, images_dict = self.model.get_image_metrics_and_images(outputs, batch) - + metrics_dict, _ = self.model.get_image_metrics_and_images(outputs, batch) if output_path is not None: - camera_indices = camera_ray_bundle.camera_indices - assert camera_indices is not None - for key, val in images_dict.items(): - Image.fromarray((val * 255).byte().cpu().numpy()).save( - output_path / "{0:06d}-{1}.jpg".format(int(camera_indices[0, 0, 0]), key) - ) + raise NotImplementedError("Saving images is not implemented yet") + assert "num_rays_per_sec" not in metrics_dict - metrics_dict["num_rays_per_sec"] = num_rays / (time() - inner_start) + metrics_dict["num_rays_per_sec"] = (num_rays / (time() - inner_start)).item() fps_str = "fps" assert fps_str not in metrics_dict - metrics_dict[fps_str] = metrics_dict["num_rays_per_sec"] / (height * width) + metrics_dict[fps_str] = (metrics_dict["num_rays_per_sec"] / (height * width)).item() metrics_dict_list.append(metrics_dict) progress.advance(task) # average the metrics list diff --git a/nerfstudio/scripts/exporter.py b/nerfstudio/scripts/exporter.py index b104597abd..9cebd896c2 100644 --- a/nerfstudio/scripts/exporter.py +++ b/nerfstudio/scripts/exporter.py @@ -37,15 +37,10 @@ from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManager from nerfstudio.data.scene_box import OrientedBox from nerfstudio.exporter import texture_utils, tsdf_utils -from nerfstudio.exporter.exporter_utils import ( - collect_camera_poses, - generate_point_cloud, - get_mesh_from_filename, -) -from nerfstudio.exporter.marching_cubes import ( - generate_mesh_with_multires_marching_cubes, -) +from nerfstudio.exporter.exporter_utils import collect_camera_poses, generate_point_cloud, get_mesh_from_filename +from nerfstudio.exporter.marching_cubes import generate_mesh_with_multires_marching_cubes from nerfstudio.fields.sdf_field import SDFField +from nerfstudio.models.gaussian_splatting import GaussianSplattingModel from nerfstudio.pipelines.base_pipeline import Pipeline, VanillaPipeline from nerfstudio.utils.eval_utils import eval_setup from nerfstudio.utils.rich_utils import CONSOLE @@ -126,6 +121,9 @@ class ExportPointCloud(Exporter): """Number of rays to evaluate per batch. Decrease if you run out of memory.""" std_ratio: float = 10.0 """Threshold based on STD of the average distances across the point cloud to remove outliers.""" + save_world_frame: bool = True + """If true, saves in the frame of the transform.json file, if false saves in the frame of the scaled + dataparser transform""" def main(self) -> None: """Export point cloud.""" @@ -162,6 +160,17 @@ def main(self) -> None: crop_obb=crop_obb, std_ratio=self.std_ratio, ) + if self.save_world_frame: + # apply the inverse dataparser transform to the point cloud + points = np.asarray(pcd.points) + poses = np.eye(4, dtype=np.float32)[None, ...].repeat(points.shape[0], axis=0)[:, :3, :] + poses[:, :3, 3] = points + poses = pipeline.datamanager.train_dataparser_outputs.transform_poses_to_original_space( + torch.from_numpy(poses) + ) + points = poses[:, :3, 3].numpy() + pcd.points = o3d.utility.Vector3dVector(points) + torch.cuda.empty_cache() CONSOLE.print(f"[bold green]:white_check_mark: Generated {pcd}") @@ -469,6 +478,58 @@ def main(self) -> None: CONSOLE.print(f"[bold green]:white_check_mark: Saved poses to {output_file_path}") +@dataclass +class ExportGaussianSplat(Exporter): + """ + Export 3D Gaussian Splatting model to a .ply + """ + + def main(self) -> None: + if not self.output_dir.exists(): + self.output_dir.mkdir(parents=True) + + _, pipeline, _, _ = eval_setup(self.load_config) + + assert isinstance(pipeline.model, GaussianSplattingModel) + + model: GaussianSplattingModel = pipeline.model + + filename = self.output_dir / "point_cloud.ply" + + map_to_tensors = {} + + with torch.no_grad(): + positions = model.means.cpu().numpy() + map_to_tensors["positions"] = o3d.core.Tensor(positions, o3d.core.float32) + map_to_tensors["normals"] = o3d.core.Tensor(np.zeros_like(positions), o3d.core.float32) + + colors = model.colors.data.cpu().numpy() + map_to_tensors["colors"] = (colors * 255).astype(np.uint8) + for i in range(colors.shape[1]): + map_to_tensors[f"f_dc_{i}"] = colors[:, i : i + 1] + + shs = model.shs_rest.data.cpu().numpy() + if model.config.sh_degree > 0: + shs = shs.reshape((colors.shape[0], -1, 1)) + for i in range(shs.shape[-1]): + map_to_tensors[f"f_rest_{i}"] = shs[:, i] + + map_to_tensors["opacity"] = model.opacities.data.cpu().numpy() + + scales = model.scales.data.cpu().unsqueeze(-1).numpy() + for i in range(3): + map_to_tensors[f"scale_{i}"] = scales[:, i] + + quats = model.quats.data.cpu().unsqueeze(-1).numpy() + + for i in range(4): + map_to_tensors[f"rot_{i}"] = quats[:, i] + + pcd = o3d.t.geometry.PointCloud(map_to_tensors) + + o3d.t.io.write_point_cloud(str(filename), pcd) + + Commands = tyro.conf.FlagConversionOff[ Union[ Annotated[ExportPointCloud, tyro.conf.subcommand(name="pointcloud")], @@ -476,6 +537,7 @@ def main(self) -> None: Annotated[ExportPoissonMesh, tyro.conf.subcommand(name="poisson")], Annotated[ExportMarchingCubesMesh, tyro.conf.subcommand(name="marching-cubes")], Annotated[ExportCameraPoses, tyro.conf.subcommand(name="cameras")], + Annotated[ExportGaussianSplat, tyro.conf.subcommand(name="gaussian-splat")], ] ] diff --git a/nerfstudio/scripts/render.py b/nerfstudio/scripts/render.py index 79cc6c7bd9..114e4195ec 100644 --- a/nerfstudio/scripts/render.py +++ b/nerfstudio/scripts/render.py @@ -156,7 +156,6 @@ def _render_trajectory_video( obb_box = None if crop_data is not None: obb_box = crop_data.obb - camera_ray_bundle = cameras.generate_rays(camera_indices=camera_idx, obb_box=obb_box) max_dist, max_idx = -1, -1 true_max_dist, true_max_idx = -1, -1 @@ -206,10 +205,14 @@ def _render_trajectory_video( with renderers.background_color_override_context( crop_data.background_color.to(pipeline.device) ), torch.no_grad(): - outputs = pipeline.model.get_outputs_for_camera_ray_bundle(camera_ray_bundle) + outputs = pipeline.model.get_outputs_for_camera( + cameras[camera_idx : camera_idx + 1], obb_box=obb_box + ) else: with torch.no_grad(): - outputs = pipeline.model.get_outputs_for_camera_ray_bundle(camera_ray_bundle) + outputs = pipeline.model.get_outputs_for_camera( + cameras[camera_idx : camera_idx + 1], obb_box=obb_box + ) render_image = [] for rendered_output_name in rendered_output_names: @@ -679,7 +682,7 @@ def main(self) -> None: ), ) steps = int(self.frame_rate * self.seconds) - camera_start = pipeline.datamanager.eval_dataloader.get_camera(image_idx=0).flatten() + camera_start, _ = pipeline.datamanager.eval_dataloader.get_camera(image_idx=0) camera_path = get_spiral_path(camera_start, steps=steps, radius=self.radius) _render_trajectory_video( @@ -788,10 +791,9 @@ def update_config(config: TrainerConfig) -> TrainerConfig: TimeRemainingColumn(elapsed_when_finished=False, compact=False), TimeElapsedColumn(), ) as progress: - for camera_idx, (ray_bundle, batch) in enumerate(progress.track(dataloader, total=len(dataset))): - ray_bundle: RayBundle + for camera_idx, (camera, batch) in enumerate(progress.track(dataloader, total=len(dataset))): with torch.no_grad(): - outputs = pipeline.model.get_outputs_for_camera_ray_bundle(ray_bundle) + outputs = pipeline.model.get_outputs_for_camera(camera) gt_batch = batch.copy() gt_batch["rgb"] = gt_batch.pop("image") diff --git a/nerfstudio/scripts/viewer/run_viewer.py b/nerfstudio/scripts/viewer/run_viewer.py index de210a30d8..b1b87c42c0 100644 --- a/nerfstudio/scripts/viewer/run_viewer.py +++ b/nerfstudio/scripts/viewer/run_viewer.py @@ -104,7 +104,7 @@ def _start_viewer(config: TrainerConfig, pipeline: Pipeline, step: int): viewer_state = ViewerBetaState( config.viewer, log_filename=viewer_log_path, - datapath=base_dir, + datapath=pipeline.datamanager.get_datapath(), pipeline=pipeline, share=config.viewer.make_share_url, ) diff --git a/nerfstudio/viewer/server/render_state_machine.py b/nerfstudio/viewer/server/render_state_machine.py index 390390d480..7533005214 100644 --- a/nerfstudio/viewer/server/render_state_machine.py +++ b/nerfstudio/viewer/server/render_state_machine.py @@ -24,6 +24,7 @@ from nerfstudio.cameras.cameras import Cameras from nerfstudio.model_components.renderers import background_color_override_context +from nerfstudio.models.gaussian_splatting import GaussianSplattingModel from nerfstudio.utils import colormaps, writer from nerfstudio.utils.writer import GLOBAL_BUFFER, EventName, TimeWriter from nerfstudio.viewer.server import viewer_utils @@ -127,7 +128,14 @@ def _render_img(self, cam_msg: CameraMessage): assert camera is not None, "render called before viewer connected" with self.viewer.train_lock if self.viewer.train_lock is not None else contextlib.nullcontext(): - camera_ray_bundle = camera.generate_rays(camera_indices=0, aabb_box=self.viewer.get_model().render_aabb) + # TODO jake-austin: Make this check whether the model inherits from a camera based model or a ray based model + # TODO Zhuoyang: First made some dummy judgements, need to be fixed later + isGaussianSplattingModel = isinstance(self.viewer.get_model(), GaussianSplattingModel) + if isGaussianSplattingModel: + # TODO fix me before ship + camera_ray_bundle = camera.generate_rays(camera_indices=0, aabb_box=self.viewer.get_model().render_aabb) + else: + camera_ray_bundle = camera.generate_rays(camera_indices=0, aabb_box=self.viewer.get_model().render_aabb) with TimeWriter(None, None, write=False) as vis_t: self.viewer.get_model().eval() @@ -142,12 +150,21 @@ def _render_img(self, cam_msg: CameraMessage): device=self.viewer.get_model().device, ) with background_color_override_context(background_color), torch.no_grad(): - outputs = self.viewer.get_model().get_outputs_for_camera_ray_bundle(camera_ray_bundle) + if isGaussianSplattingModel: + outputs = self.viewer.get_model().get_outputs_for_camera(camera) + else: + outputs = self.viewer.get_model().get_outputs_for_camera_ray_bundle(camera_ray_bundle) else: with torch.no_grad(): - outputs = self.viewer.get_model().get_outputs_for_camera_ray_bundle(camera_ray_bundle) + if isGaussianSplattingModel: + outputs = self.viewer.get_model().get_outputs_for_camera(camera) + else: + outputs = self.viewer.get_model().get_outputs_for_camera_ray_bundle(camera_ray_bundle) self.viewer.get_model().train() - num_rays = len(camera_ray_bundle) + if True: + num_rays = (camera.height * camera.width).item() + else: + num_rays = len(camera_ray_bundle) render_time = vis_t.duration if writer.is_initialized(): writer.put_time( diff --git a/nerfstudio/viewer_beta/export_panel.py b/nerfstudio/viewer_beta/export_panel.py index 425e10a19d..a5564327ae 100644 --- a/nerfstudio/viewer_beta/export_panel.py +++ b/nerfstudio/viewer_beta/export_panel.py @@ -81,6 +81,11 @@ def populate_point_cloud_tab( ) -> None: server.add_gui_markdown("Render depth, project to an oriented point cloud, and filter. ") num_points = server.add_gui_number("# Points", initial_value=1_000_000, min=1, max=None, step=1) + world_frame = server.add_gui_checkbox( + "Save in world frame", + True, + hint="Save the point cloud in the transforms.json frame, rather than scaled scene frame", + ) remove_outliers = server.add_gui_checkbox("Remove outliers", True) normals = server.add_gui_dropdown( "Normals", @@ -104,6 +109,7 @@ def _(event: viser.GuiEvent) -> None: f"--remove-outliers {remove_outliers.value}", f"--normal-method {normals.value}", f"--use_bounding_box {control_panel.crop_viewport}", + f"--save-world-frame {world_frame.value}", get_crop_string(control_panel.crop_obb), ] ) diff --git a/nerfstudio/viewer_beta/render_state_machine.py b/nerfstudio/viewer_beta/render_state_machine.py index 53c73c34e8..849cf3d4e7 100644 --- a/nerfstudio/viewer_beta/render_state_machine.py +++ b/nerfstudio/viewer_beta/render_state_machine.py @@ -27,6 +27,8 @@ from nerfstudio.utils.writer import GLOBAL_BUFFER, EventName, TimeWriter from nerfstudio.viewer.server import viewer_utils from nerfstudio.viewer_beta.utils import CameraState, get_camera +from nerfstudio.models.gaussian_splatting import GaussianSplattingModel +from nerfstudio.cameras.cameras import Cameras if TYPE_CHECKING: from nerfstudio.viewer_beta.viewer import Viewer @@ -125,11 +127,18 @@ def _render_img(self, camera_state: CameraState): camera = get_camera(camera_state, image_height, image_width) camera = camera.to(self.viewer.get_model().device) + assert isinstance(camera, Cameras) assert camera is not None, "render called before viewer connected" with TimeWriter(None, None, write=False) as vis_t: with self.viewer.train_lock if self.viewer.train_lock is not None else contextlib.nullcontext(): - camera_ray_bundle = camera.generate_rays(camera_indices=0, obb_box=obb) + if isinstance(self.viewer.get_model(), GaussianSplattingModel): + color = self.viewer.control_panel.background_color + background_color = torch.tensor( + [color[0] / 255.0, color[1] / 255.0, color[2] / 255.0], + device=self.viewer.get_model().device, + ) + self.viewer.get_model().set_background(background_color) self.viewer.get_model().eval() step = self.viewer.step try: @@ -145,21 +154,27 @@ def _render_img(self, camera_state: CameraState): with background_color_override_context( background_color ), torch.no_grad(), viewer_utils.SetTrace(self.check_interrupt): - outputs = self.viewer.get_model().get_outputs_for_camera_ray_bundle(camera_ray_bundle) + outputs = self.viewer.get_model().get_outputs_for_camera(camera, obb_box=obb) else: with torch.no_grad(), viewer_utils.SetTrace(self.check_interrupt): - outputs = self.viewer.get_model().get_outputs_for_camera_ray_bundle(camera_ray_bundle) + outputs = self.viewer.get_model().get_outputs_for_camera(camera, obb_box=obb) except viewer_utils.IOChangeException: self.viewer.get_model().train() raise self.viewer.get_model().train() - num_rays = len(camera_ray_bundle) + num_rays = (camera.height * camera.width).item() if self.viewer.control_panel.layer_depth: - # convert to z_depth if depth compositing is enabled - R = camera.camera_to_worlds[0:3, 0:3].T - pts = camera_ray_bundle.directions * outputs["depth"] - pts = (R @ (pts.view(-1, 3).T)).T.view(*camera_ray_bundle.directions.shape) - outputs["gl_z_buf_depth"] = -pts[..., 2:3] # negative z axis is the coordinate convention + if isinstance(self.viewer.get_model(), GaussianSplattingModel): + # TODO: sending depth at high resolution lags the network a lot, figure out how to do this more efficiently + # outputs["gl_z_buf_depth"] = outputs["depth"] + pass + else: + # convert to z_depth if depth compositing is enabled + R = camera.camera_to_worlds[0, 0:3, 0:3].T + camera_ray_bundle = camera.generate_rays(camera_indices=0, obb_box=obb) + pts = camera_ray_bundle.directions * outputs["depth"] + pts = (R @ (pts.view(-1, 3).T)).T.view(*camera_ray_bundle.directions.shape) + outputs["gl_z_buf_depth"] = -pts[..., 2:3] # negative z axis is the coordinate convention render_time = vis_t.duration if writer.is_initialized() and render_time != 0: writer.put_time( @@ -187,7 +202,7 @@ def run(self): except viewer_utils.IOChangeException: # if we got interrupted, don't send the output to the viewer continue - self._send_output_to_viewer(outputs) + self._send_output_to_viewer(outputs, static_render=(action.action in ["static", "step"])) def check_interrupt(self, frame, event, arg): """Raises interrupt when flag has been set and not already on lowest resolution. @@ -199,7 +214,7 @@ def check_interrupt(self, frame, event, arg): raise viewer_utils.IOChangeException return self.check_interrupt - def _send_output_to_viewer(self, outputs: Dict[str, Any]): + def _send_output_to_viewer(self, outputs: Dict[str, Any], static_render: bool = True): """Chooses the correct output and sends it to the viewer Args: @@ -239,11 +254,11 @@ def _send_output_to_viewer(self, outputs: Dict[str, Any]): depth = ( outputs["gl_z_buf_depth"].cpu().numpy() * self.viser_scale_ratio if "gl_z_buf_depth" in outputs else None ) - + jpg_quality = self.viewer.config.jpeg_quality if static_render else 40 self.client.set_background_image( selected_output.cpu().numpy(), format=self.viewer.config.image_format, - jpeg_quality=self.viewer.config.jpeg_quality, + jpeg_quality=jpg_quality, depth=depth, ) res = f"{selected_output.shape[0]}x{selected_output.shape[1]}px" diff --git a/nerfstudio/viewer_beta/utils.py b/nerfstudio/viewer_beta/utils.py index a3dd7b1c2c..3614fce419 100644 --- a/nerfstudio/viewer_beta/utils.py +++ b/nerfstudio/viewer_beta/utils.py @@ -65,7 +65,7 @@ def get_camera( cx=pp_w, cy=pp_h, camera_type=camera_type, - camera_to_worlds=camera_state.c2w.to(torch.float32), + camera_to_worlds=camera_state.c2w.to(torch.float32)[None, ...], times=torch.tensor([0.0], dtype=torch.float32), ) return camera diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py index 92e609f076..2b6f4e140d 100644 --- a/nerfstudio/viewer_beta/viewer.py +++ b/nerfstudio/viewer_beta/viewer.py @@ -291,13 +291,16 @@ def set_camera_visibility(self, visible: bool) -> None: self.camera_handles[idx].visible = visible def update_camera_poses(self): + # TODO this fn accounts for like ~5% of total train time # Update the train camera locations based on optimization assert self.camera_handles is not None - idxs = list(self.camera_handles.keys()) if hasattr(self.pipeline.datamanager, "train_camera_optimizer"): camera_optimizer = self.pipeline.datamanager.train_camera_optimizer - else: + elif hasattr(self.pipeline.model, "camera_optimizer"): camera_optimizer = self.pipeline.model.camera_optimizer + else: + return + idxs = list(self.camera_handles.keys()) with torch.no_grad(): assert isinstance(camera_optimizer, CameraOptimizer) c2ws_delta = camera_optimizer(torch.tensor(idxs, device=camera_optimizer.device)).cpu().numpy() @@ -378,7 +381,7 @@ def init_scene( camera = train_dataset.cameras[idx] image_uint8 = (image * 255).detach().type(torch.uint8) image_uint8 = image_uint8.permute(2, 0, 1) - image_uint8 = torchvision.transforms.functional.resize(image_uint8, 100) # type: ignore + image_uint8 = torchvision.transforms.functional.resize(image_uint8, 100, antialias=None) # type: ignore image_uint8 = image_uint8.permute(1, 2, 0) image_uint8 = image_uint8.cpu().numpy() c2w = camera.camera_to_worlds.cpu().numpy() @@ -420,7 +423,6 @@ def update_scene(self, step: int, num_rays_per_batch: Optional[int] = None) -> N # this stops training while moving to make the response smoother while time.time() - self.last_move_time < 0.1: time.sleep(0.05) - # self.render_statemachine.action(RenderAction("static", self.camera_state)) if self.trainer is not None and self.trainer.training_state == "training" and self.train_util != 1: if ( EventName.TRAIN_RAYS_PER_SEC.value in GLOBAL_BUFFER["events"] diff --git a/pyproject.toml b/pyproject.toml index 019ef6b6a7..fd058f9e87 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,12 +56,14 @@ dependencies = [ "torchvision>=0.14.1", "torchmetrics[image]>=1.0.1", "typing_extensions>=4.4.0", - "viser==0.1.13", + "viser==0.1.14", "nuscenes-devkit>=1.1.1", "wandb>=0.13.3", "xatlas", "trimesh>=3.20.2", - "timm==0.6.7" + "timm==0.6.7", + "gsplat==0.1.0", + "pytorch-msssim" ] [project.urls] diff --git a/tests/test_train.py b/tests/test_train.py index a88283a592..45d8e348f0 100644 --- a/tests/test_train.py +++ b/tests/test_train.py @@ -26,6 +26,7 @@ "neus", "generfacto", "neus-facto", + "gaussian-splatting", ] From 626441e15e8e59970ba95229e40727458bbf65a3 Mon Sep 17 00:00:00 2001 From: Otto Seiskari Date: Fri, 15 Dec 2023 18:37:27 +0200 Subject: [PATCH 096/101] Add instructions for data collection and processing with Spectacular AI tools (#2668) * Add instructions for using Spectacular AI tools Adds easy support for new device types: OAK-D, RealSense and Azure Kinect, as well as an alternative pipeline for iOS. * Grammar fixes in docs/quickstart/custom_dataset.md (Spectacular AI section) Co-authored-by: Matias Turkulainen <30566358+maturk@users.noreply.github.com> * Fix section label in the Spectacular AI section of custom_dataset.md * Update custom_dataset.md: typo fix. "montions" -> "motions" --------- Co-authored-by: Matias Turkulainen <30566358+maturk@users.noreply.github.com> --- README.md | 2 ++ docs/quickstart/custom_dataset.md | 39 +++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/README.md b/README.md index b6c5bb6ff2..dc4a0093e3 100644 --- a/README.md +++ b/README.md @@ -241,12 +241,14 @@ Using an existing dataset is great, but likely you want to use your own data! We | πŸ“± [Polycam](https://docs.nerf.studio/quickstart/custom_dataset.html#polycam-capture) | IOS with LiDAR | [Polycam App](https://poly.cam/) | πŸ‡ | | πŸ“± [KIRI Engine](https://docs.nerf.studio/quickstart/custom_dataset.html#kiri-engine-capture) | IOS or Android | [KIRI Engine App](https://www.kiriengine.com/) | πŸ‡ | | πŸ“± [Record3D](https://docs.nerf.studio/quickstart/custom_dataset.html#record3d-capture) | IOS with LiDAR | [Record3D app](https://record3d.app/) | πŸ‡ | +| πŸ“± [Spectacular AI](https://docs.nerf.studio/quickstart/custom_dataset.html#spectacularai) | IOS, OAK, [others](https://www.spectacularai.com/mapping#supported-devices) | [App](https://apps.apple.com/us/app/spectacular-rec/id6473188128) / [`sai-cli`](https://www.spectacularai.com/mapping) | πŸ‡ | | πŸ–₯ [Metashape](https://docs.nerf.studio/quickstart/custom_dataset.html#metashape) | Any | [Metashape](https://www.agisoft.com/) | πŸ‡ | | πŸ–₯ [RealityCapture](https://docs.nerf.studio/quickstart/custom_dataset.html#realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | πŸ‡ | | πŸ–₯ [ODM](https://docs.nerf.studio/quickstart/custom_dataset.html#odm) | Any | [ODM](https://github.com/OpenDroneMap/ODM) | πŸ‡ | | πŸ‘“ [Aria](https://docs.nerf.studio/quickstart/custom_dataset.html#aria) | Aria glasses | [Project Aria](https://projectaria.com/) | πŸ‡ | | πŸ›  [Custom](https://docs.nerf.studio/quickstart/data_conventions.html) | Any | Camera Poses | πŸ‡ | + ## 5. Advanced Options ### Training models other than nerfacto diff --git a/docs/quickstart/custom_dataset.md b/docs/quickstart/custom_dataset.md index fddbe1d56b..6444ca4456 100644 --- a/docs/quickstart/custom_dataset.md +++ b/docs/quickstart/custom_dataset.md @@ -19,6 +19,7 @@ We currently support the following custom data types: | πŸ“± [Polycam](polycam) | IOS with LiDAR | [Polycam App](https://poly.cam/) | πŸ‡ | | πŸ“± [KIRI Engine](kiri) | IOS or Android | [KIRI Engine App](https://www.kiriengine.com/) | πŸ‡ | | πŸ“± [Record3D](record3d) | IOS with LiDAR | [Record3D app](https://record3d.app/) | πŸ‡ | +| πŸ“± [Spectacular AI](spectacularai) | IOS, OAK, others| [App](https://apps.apple.com/us/app/spectacular-rec/id6473188128) / [`sai-cli`](https://www.spectacularai.com/mapping) | πŸ‡ | | πŸ–₯ [Metashape](metashape) | Any | [Metashape](https://www.agisoft.com/) | πŸ‡ | | πŸ–₯ [RealityCapture](realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | πŸ‡ | | πŸ–₯ [ODM](odm) | Any | [ODM](https://github.com/OpenDroneMap/ODM) | πŸ‡ | @@ -267,6 +268,44 @@ ns-process-data record3d --data {data directory} --output-dir {output directory} ns-train nerfacto --data {output directory} ``` +(spectacularai)= + +## Spectacular AI + +Spectacular AI SDK and apps can be used to capture data from various devices: + + * iPhones (with LiDAR) + * OAK-D cameras + * RealSense D455/D435i + * Azure Kinect DK + +The SDK also records IMU data, which is fused with camera and (if available) LiDAR/ToF data when computing the camera poses. This approach, VISLAM, is more robust than purely image based methods (e.g., COLMAP) and can work better and faster for difficult data (monotonic environments, fast motions, narrow FoV, etc.). + +Instructions: + +1. Installation. With the Nerfstudio Conda environment active, first install the Spectacular AI Python library + +```bash +pip install spectacularAI[full] +``` + +2. Install FFmpeg. Linux: `apt install ffmpeg` (or similar, if using another package manager). Windows: [see here](https://www.editframe.com/guides/how-to-install-and-start-using-ffmpeg-in-under-10-minutes). FFmpeg must be in your `PATH` so that `ffmpeg` works on the command line. + +3. Data capture. See [here for specific instructions for each supported device](https://github.com/SpectacularAI/sdk-examples/tree/main/python/mapping#recording-data). + +4. Process and export. Once you have recorded a dataset in Spectacular AI format and have it stored in `{data directory}` it can be converted into a Nerfstudio supported format with: + +```bash +sai-cli process {data directory} --preview3d --key_frame_distance=0.05 {output directory} +``` +The optional `--preview3d` flag shows a 3D preview of the point cloud and estimated trajectory live while VISLAM is running. The `--key_frame_distance` argument can be tuned based on the recorded scene size: 0.05 (5cm) is good for small scans and 0.15 for room-sized scans. If the processing gets slow, you can also try adding a --fast flag to `sai-cli process` to trade off quality for speed. + +5. Train. No separate `ns-process-data` step is needed. The data in `{output directory}` can now be trained with Nerfstudio: + +```bash +ns-train nerfacto --data {output directory} +``` + (metashape)= ## Metashape From f7a49563a0b93df1fac2ada0cb15b0ea9ec36155 Mon Sep 17 00:00:00 2001 From: Zhuoyang Pan <102644383+Zhuoyang-Pan@users.noreply.github.com> Date: Mon, 18 Dec 2023 17:52:14 -0800 Subject: [PATCH 097/101] Fix a bug of random quaternions (#2687) fix a bug for random quaternions --- nerfstudio/models/gaussian_splatting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nerfstudio/models/gaussian_splatting.py b/nerfstudio/models/gaussian_splatting.py index fe081734ec..78ad185a2c 100644 --- a/nerfstudio/models/gaussian_splatting.py +++ b/nerfstudio/models/gaussian_splatting.py @@ -60,7 +60,7 @@ def random_quat_tensor(N): torch.sqrt(1 - u) * torch.sin(2 * math.pi * v), torch.sqrt(1 - u) * torch.cos(2 * math.pi * v), torch.sqrt(u) * torch.sin(2 * math.pi * w), - torch.sqrt(u) * torch.sin(2 * math.pi * w), + torch.sqrt(u) * torch.cos(2 * math.pi * w), ], dim=-1, ) From 4a54763323041941f43140736d1b76c80a75af7f Mon Sep 17 00:00:00 2001 From: Hardik Dava <39372750+hardikdava@users.noreply.github.com> Date: Tue, 19 Dec 2023 18:20:17 +0100 Subject: [PATCH 098/101] fixed color issue in ply file (#2693) fixed color export --- nerfstudio/models/gaussian_splatting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nerfstudio/models/gaussian_splatting.py b/nerfstudio/models/gaussian_splatting.py index 78ad185a2c..91686b44a2 100644 --- a/nerfstudio/models/gaussian_splatting.py +++ b/nerfstudio/models/gaussian_splatting.py @@ -211,7 +211,7 @@ def populate_modules(self): @property def colors(self): - return SH2RGB(self.colors_all[:, 0, :]) + return self.colors_all[:, 0, :] @property def shs_rest(self): From 7a8e31d84e424611db77f780e05f8af2e80394b4 Mon Sep 17 00:00:00 2001 From: Brent Yi Date: Wed, 20 Dec 2023 17:20:41 -0800 Subject: [PATCH 099/101] Improved render panel for beta viewer (#2667) * Continuous velocities for render splines * Configure ruff line length * Improved render preview * No longer need to update camera FOV in render preview * Details * Polishing * Cleanup * Details * Address pyright errors * type: ignore for broken (?) Aria stub --- .../scripts/datasets/process_project_aria.py | 2 +- nerfstudio/viewer_beta/control_panel.py | 75 ++--- nerfstudio/viewer_beta/render_panel.py | 261 ++++++++++++------ .../viewer_beta/render_state_machine.py | 37 ++- nerfstudio/viewer_beta/utils.py | 23 +- nerfstudio/viewer_beta/viewer.py | 56 ++-- nerfstudio/viewer_beta/viewer_elements.py | 12 +- pyproject.toml | 2 +- 8 files changed, 311 insertions(+), 157 deletions(-) diff --git a/nerfstudio/scripts/datasets/process_project_aria.py b/nerfstudio/scripts/datasets/process_project_aria.py index f2fdcc94c6..10f26653fd 100644 --- a/nerfstudio/scripts/datasets/process_project_aria.py +++ b/nerfstudio/scripts/datasets/process_project_aria.py @@ -103,7 +103,7 @@ def get_camera_calibs(provider: VrsDataProvider) -> Dict[str, AriaCameraCalibrat def read_trajectory_csv_to_dict(file_iterable_csv: str) -> TimedPoses: - closed_loop_traj = mps.read_closed_loop_trajectory(file_iterable_csv) + closed_loop_traj = mps.read_closed_loop_trajectory(file_iterable_csv) # type: ignore timestamps_secs, poses = zip( *[(it.tracking_timestamp.total_seconds(), it.transform_world_device) for it in closed_loop_traj] diff --git a/nerfstudio/viewer_beta/control_panel.py b/nerfstudio/viewer_beta/control_panel.py index c94b4745f6..a937edce16 100644 --- a/nerfstudio/viewer_beta/control_panel.py +++ b/nerfstudio/viewer_beta/control_panel.py @@ -18,9 +18,8 @@ import numpy as np import torch -import viser.transforms as vtf -from viser import ViserServer import viser +import viser.transforms as vtf from nerfstudio.data.scene_box import OrientedBox from nerfstudio.utils.colormaps import ColormapOptions, Colormaps from nerfstudio.viewer_beta.viewer_elements import ( # ViewerButtonGroup, @@ -33,6 +32,7 @@ ViewerSlider, ViewerVec3, ) +from viser import ViserServer class ControlPanel: @@ -42,7 +42,6 @@ class ControlPanel: time_enabled: whether or not the time slider should be enabled rerender_cb: a callback that will be called when the user changes a parameter that requires a rerender (eg train speed, max res, etc) - crop_update_cb: a callback that will be called when the user changes the crop parameters update_output_cb: a callback that will be called when the user changes the output render default_composite_depth: whether to default to compositing depth or not """ @@ -52,8 +51,7 @@ def __init__( viser_server: ViserServer, time_enabled: bool, scale_ratio: float, - rerender_cb: Callable, - crop_update_cb: Callable, + rerender_cb: Callable[[], None], update_output_cb: Callable, update_split_output_cb: Callable, default_composite_depth: bool = True, @@ -71,51 +69,53 @@ def __init__( cb_hook=lambda han: self._train_speed_cb(), ) self._output_render = ViewerDropdown( - "Output Render", + "Output type", "not set", ["not set"], - cb_hook=lambda han: [self.update_control_panel(), update_output_cb(han), rerender_cb(han)], + cb_hook=lambda han: [self.update_control_panel(), update_output_cb(han), rerender_cb()], hint="The output to render", ) self._colormap = ViewerDropdown[Colormaps]( - "Colormap", "default", ["default"], cb_hook=rerender_cb, hint="The colormap to use" + "Colormap", "default", ["default"], cb_hook=lambda _: rerender_cb(), hint="The colormap to use" + ) + self._invert = ViewerCheckbox("Invert", False, cb_hook=lambda _: rerender_cb(), hint="Invert the colormap") + self._normalize = ViewerCheckbox( + "Normalize", True, cb_hook=lambda _: rerender_cb(), hint="Normalize the colormap" ) - self._invert = ViewerCheckbox("Invert", False, cb_hook=rerender_cb, hint="Invert the colormap") - self._normalize = ViewerCheckbox("Normalize", True, cb_hook=rerender_cb, hint="Normalize the colormap") - self._min = ViewerNumber("Min", 0.0, cb_hook=rerender_cb, hint="Min value of the colormap") - self._max = ViewerNumber("Max", 1.0, cb_hook=rerender_cb, hint="Max value of the colormap") + self._min = ViewerNumber("Min", 0.0, cb_hook=lambda _: rerender_cb(), hint="Min value of the colormap") + self._max = ViewerNumber("Max", 1.0, cb_hook=lambda _: rerender_cb(), hint="Max value of the colormap") self._split = ViewerCheckbox( "Enable", False, - cb_hook=lambda han: [self.update_control_panel(), rerender_cb(han)], + cb_hook=lambda han: [self.update_control_panel(), rerender_cb()], hint="Render two outputs", ) self._split_percentage = ViewerSlider( - "Split Percentage", 0.5, 0.0, 1.0, 0.01, cb_hook=rerender_cb, hint="Where to split" + "Split percentage", 0.5, 0.0, 1.0, 0.01, cb_hook=lambda _: rerender_cb(), hint="Where to split" ) self._split_output_render = ViewerDropdown( - "Output Render Split", + "Output render split", "not set", ["not set"], - cb_hook=lambda han: [self.update_control_panel(), update_split_output_cb(han), rerender_cb(han)], + cb_hook=lambda han: [self.update_control_panel(), update_split_output_cb(han), rerender_cb()], hint="The second output", ) # Hack: spaces are after at the end of the names to make them unique self._split_colormap = ViewerDropdown[Colormaps]( - "Colormap ", "default", ["default"], cb_hook=rerender_cb, hint="Colormap of the second output" + "Colormap ", "default", ["default"], cb_hook=lambda _: rerender_cb(), hint="Colormap of the second output" ) self._split_invert = ViewerCheckbox( - "Invert ", False, cb_hook=rerender_cb, hint="Invert the colormap of the second output" + "Invert ", False, cb_hook=lambda _: rerender_cb(), hint="Invert the colormap of the second output" ) self._split_normalize = ViewerCheckbox( - "Normalize ", True, cb_hook=rerender_cb, hint="Normalize the colormap of the second output" + "Normalize ", True, cb_hook=lambda _: rerender_cb(), hint="Normalize the colormap of the second output" ) self._split_min = ViewerNumber( - "Min ", 0.0, cb_hook=rerender_cb, hint="Min value of the colormap of the second output" + "Min ", 0.0, cb_hook=lambda _: rerender_cb(), hint="Min value of the colormap of the second output" ) self._split_max = ViewerNumber( - "Max ", 1.0, cb_hook=rerender_cb, hint="Max value of the colormap of the second output" + "Max ", 1.0, cb_hook=lambda _: rerender_cb(), hint="Max value of the colormap of the second output" ) self._train_util = ViewerSlider( @@ -127,22 +127,28 @@ def __init__( hint="Target training utilization, 0.0 is slow, 1.0 is fast. Doesn't affect final render quality", ) self._layer_depth = ViewerCheckbox( - "Composite Depth", + "Composite depth", self.default_composite_depth, - cb_hook=rerender_cb, + cb_hook=lambda _: rerender_cb(), hint="Allow NeRF to occlude 3D browser objects", ) self._max_res = ViewerSlider( - "Max Res", 512, 64, 2048, 100, cb_hook=rerender_cb, hint="Maximum resolution to render in viewport" + "Max res", + 512, + 64, + 2048, + 100, + cb_hook=lambda _: rerender_cb(), + hint="Maximum resolution to render in viewport", ) self._crop_viewport = ViewerCheckbox( "Enable ", False, - cb_hook=lambda han: [self.update_control_panel(), crop_update_cb(han), rerender_cb(han)], + cb_hook=lambda han: [self.update_control_panel(), rerender_cb()], hint="Crop the scene to a specified box", ) self._background_color = ViewerRGB( - "Background color", (38, 42, 55), cb_hook=crop_update_cb, hint="Color of the background" + "Background color", (38, 42, 55), cb_hook=lambda _: rerender_cb(), hint="Color of the background" ) self._crop_handle = self.viser_server.add_transform_controls("Crop", depth_test=False, line_width=4.0) @@ -150,10 +156,10 @@ def update_center(han): self._crop_handle.position = tuple(p * self.viser_scale_ratio for p in han.value) # type: ignore self._crop_center = ViewerVec3( - "Crop Center", + "Crop center", (0.0, 0.0, 0.0), step=0.01, - cb_hook=lambda e: [crop_update_cb(e), update_center(e)], + cb_hook=lambda e: [rerender_cb(), update_center(e)], hint="Center of the crop box", ) @@ -161,15 +167,15 @@ def update_rot(han): self._crop_handle.wxyz = vtf.SO3.from_rpy_radians(*han.value).wxyz self._crop_rot = ViewerVec3( - "Crop Rotation", + "Crop rotation", (0.0, 0.0, 0.0), step=0.01, - cb_hook=lambda e: [crop_update_cb(e), update_rot(e)], + cb_hook=lambda e: [rerender_cb(), update_rot(e)], hint="Rotation of the crop box", ) self._crop_scale = ViewerVec3( - "Crop Scale", (1.0, 1.0, 1.0), step=0.01, cb_hook=crop_update_cb, hint="Scale of the crop box" + "Crop scale", (1.0, 1.0, 1.0), step=0.01, cb_hook=lambda _: rerender_cb(), hint="Size of the crop box." ) @self._crop_handle.on_update @@ -179,7 +185,7 @@ def _update_crop_handle(han): rpy = vtf.SO3(self._crop_handle.wxyz).as_rpy_radians() self._crop_rot.value = (float(rpy.roll), float(rpy.pitch), float(rpy.yaw)) - self._time = ViewerSlider("Time", 0.0, 0.0, 1.0, 0.01, cb_hook=rerender_cb, hint="Time to render") + self._time = ViewerSlider("Time", 0.0, 0.0, 1.0, 0.01, cb_hook=lambda _: rerender_cb(), hint="Time to render") self._time_enabled = time_enabled self.add_element(self._train_speed) @@ -219,7 +225,10 @@ def _update_crop_handle(han): self.add_element(self._time, additional_tags=("time",)) self._reset_camera = viser_server.add_gui_button( - label="Reset Up Dir", disabled=False, icon=viser.Icon.ARROW_BIG_UP_LINES, color="gray" + label="Reset Up Direction", + icon=viser.Icon.ARROW_BIG_UP_LINES, + color="gray", + hint="Set the up direction of the camera orbit controls to the camera's current up direction.", ) self._reset_camera.on_click(self._reset_camera_cb) diff --git a/nerfstudio/viewer_beta/render_panel.py b/nerfstudio/viewer_beta/render_panel.py index 0e666038ef..6c31447bfb 100644 --- a/nerfstudio/viewer_beta/render_panel.py +++ b/nerfstudio/viewer_beta/render_panel.py @@ -21,14 +21,14 @@ import threading import time from pathlib import Path -from typing import Dict, List, Optional, Tuple, Union +from typing import Dict, List, Literal, Optional, Tuple import numpy as np +import scipy.interpolate import splines import splines.quaternion import viser import viser.transforms as tf - from nerfstudio.viewer_beta.control_panel import ControlPanel @@ -209,21 +209,50 @@ def update_aspect(self, aspect: float) -> None: frame = dataclasses.replace(frame[0], aspect=aspect) self.add_camera(frame, keyframe_index=keyframe_index) + def get_aspect(self) -> float: + """Get W/H aspect ratio, which is shared across all keyframes.""" + assert len(self._keyframes) > 0 + return next(iter(self._keyframes.values()))[0].aspect + def reset(self) -> None: for frame in self._keyframes.values(): frame[1].remove() self._keyframes.clear() self.update_spline() - def interpolate_pose_and_fov_rad(self, normalized_t: float) -> Optional[Tuple[tf.SE3, float]]: - if len(self._keyframes) < 2: - return None + def spline_t_from_t_sec(self, time: np.ndarray) -> np.ndarray: + """From a time value in seconds, compute a t value for our geometric + spline interpolation. An increment of 1 for the latter will move the + camera forward by one keyframe. + We use a PCHIP spline here to guarantee monotonicity. + """ transition_times_cumsum = self.compute_transition_times_cumsum() spline_indices = np.arange(transition_times_cumsum.shape[0]) - def spline_t_from_t_sec(time: Union[float, np.ndarray]) -> np.ndarray: - return np.interp(time, transition_times_cumsum, spline_indices) + if self.loop: + # In the case of a loop, we pad the spline to match the start/end + # slopes. + interpolator = scipy.interpolate.PchipInterpolator( + x=np.concatenate( + [ + [-(transition_times_cumsum[-1] - transition_times_cumsum[-2])], + transition_times_cumsum, + transition_times_cumsum[-1:] + transition_times_cumsum[1:2], + ], + axis=0, + ), + y=np.concatenate([[-1], spline_indices, [spline_indices[-1] + 1]], axis=0), + ) + else: + interpolator = scipy.interpolate.PchipInterpolator(x=transition_times_cumsum, y=spline_indices) + + # Clip to account for floating point error. + return np.clip(interpolator(time), 0, spline_indices[-1]) + + def interpolate_pose_and_fov_rad(self, normalized_t: float) -> Optional[Tuple[tf.SE3, float]]: + if len(self._keyframes) < 2: + return None self._fov_spline = splines.KochanekBartels( [ @@ -239,32 +268,29 @@ def spline_t_from_t_sec(time: Union[float, np.ndarray]) -> np.ndarray: assert self._fov_spline is not None max_t = self.compute_duration() t = max_t * normalized_t + spline_t = float(self.spline_t_from_t_sec(np.array(t))) - quat = self._orientation_spline.evaluate(spline_t_from_t_sec(t)) + quat = self._orientation_spline.evaluate(spline_t) assert isinstance(quat, splines.quaternion.UnitQuaternion) return ( tf.SE3.from_rotation_and_translation( tf.SO3(np.array([quat.scalar, *quat.vector])), - self._position_spline.evaluate(spline_t_from_t_sec(t)), + self._position_spline.evaluate(spline_t), ), - float(self._fov_spline.evaluate(spline_t_from_t_sec(t))), + float(self._fov_spline.evaluate(spline_t)), ) def update_spline(self) -> None: num_frames = int(self.compute_duration() * self.framerate) - if num_frames <= 0 or not self.show_spline: + keyframes = list(self._keyframes.values()) + + if num_frames <= 0 or not self.show_spline or len(keyframes) < 2: for node in self._spline_nodes: node.remove() self._spline_nodes.clear() return - # Update internal splines. - keyframes = list(self._keyframes.values()) transition_times_cumsum = self.compute_transition_times_cumsum() - spline_indices = np.arange(transition_times_cumsum.shape[0]) - - def spline_t_from_t_sec(time: Union[float, np.ndarray]) -> np.ndarray: - return np.interp(time, transition_times_cumsum, spline_indices) self._orientation_spline = splines.quaternion.KochanekBartels( [ @@ -282,7 +308,7 @@ def spline_t_from_t_sec(time: Union[float, np.ndarray]) -> np.ndarray: # Update visualized spline. points_array = self._position_spline.evaluate( - spline_t_from_t_sec(np.linspace(0, transition_times_cumsum[-1], num_frames)) + self.spline_t_from_t_sec(np.linspace(0, transition_times_cumsum[-1], num_frames)) ) colors_array = np.array([colorsys.hls_to_rgb(h, 0.5, 1.0) for h in np.linspace(0.0, 1.0, len(points_array))]) @@ -313,7 +339,11 @@ def spline_t_from_t_sec(time: Union[float, np.ndarray]) -> np.ndarray: def make_transition_handle(i: int) -> None: assert self._position_spline is not None transition_pos = self._position_spline.evaluate( - spline_t_from_t_sec((transition_times_cumsum[i] + transition_times_cumsum[i + 1]) / 2.0) + float( + self.spline_t_from_t_sec( + (transition_times_cumsum[i] + transition_times_cumsum[i + 1]) / 2.0, + ) + ) ) transition_sphere = self._server.add_icosphere( f"/render_camera_spline/transition_{i}", @@ -340,7 +370,8 @@ def _(_) -> None: ) as camera_edit_panel: self._camera_edit_panel = camera_edit_panel override_transition_enabled = server.add_gui_checkbox( - "Override transition", initial_value=keyframe.override_transition_enabled + "Override transition", + initial_value=keyframe.override_transition_enabled, ) override_transition_sec = server.add_gui_number( "Override transition (sec)", @@ -418,14 +449,31 @@ def compute_transition_times_cumsum(self) -> np.ndarray: return np.array(out) +@dataclasses.dataclass +class RenderTabState: + """Useful GUI handles exposed by the render tab.""" + + preview_render: bool + preview_fov: float + preview_aspect: float + preview_camera_type: Literal["Perspective", "Fisheye", "Equirectangular"] + + def populate_render_tab( server: viser.ViserServer, config_path: Path, datapath: Path, control_panel: Optional[ControlPanel] = None, -) -> None: +) -> RenderTabState: from nerfstudio.viewer_beta.viewer import VISER_NERFSTUDIO_SCALE_RATIO + render_tab_state = RenderTabState( + preview_render=False, + preview_fov=0.0, + preview_aspect=1.0, + preview_camera_type="Perspective", + ) + fov_degrees = server.add_gui_slider( "Default FOV", initial_value=75.0, @@ -445,6 +493,7 @@ def _(_) -> None: # Updating the aspect ratio will also re-render the camera frustums. # Could rethink this. camera_path.update_aspect(resolution.value[0] / resolution.value[1]) + compute_and_update_preview_camera_state() resolution = server.add_gui_vector2( "Resolution", @@ -454,16 +503,20 @@ def _(_) -> None: step=1, hint="Render output resolution in pixels.", ) - resolution.on_update(lambda _: camera_path.update_aspect(resolution.value[0] / resolution.value[1])) + + @resolution.on_update + def _(_) -> None: + camera_path.update_aspect(resolution.value[0] / resolution.value[1]) + compute_and_update_preview_camera_state() camera_type = server.add_gui_dropdown( - "Camera Type", + "Camera type", ("Perspective", "Fisheye", "Equirectangular"), initial_value="Perspective", hint="Camera model to render with. This is applied to all keyframes.", ) add_button = server.add_gui_button( - "Add keyframe", + "Add Keyframe", icon=viser.Icon.PLUS, hint="Add a new keyframe at the current pose.", ) @@ -483,19 +536,8 @@ def _(event: viser.GuiEvent) -> None: duration_number.value = camera_path.compute_duration() camera_path.update_spline() - reset_up_button = server.add_gui_button( - "Reset up direction", - icon=viser.Icon.ARROW_BIG_UP_LINES, - hint="Reset the orbit up direction.", - ) - - @reset_up_button.on_click - def _(event: viser.GuiEvent) -> None: - assert event.client is not None - event.client.camera.up_direction = tf.SO3(event.client.camera.wxyz) @ np.array([0.0, -1.0, 0.0]) - clear_keyframes_button = server.add_gui_button( - "Clear keyframes", + "Clear Keyframes", icon=viser.Icon.TRASH, hint="Remove all keyframes from the render path.", ) @@ -535,7 +577,7 @@ def _(_) -> None: duration_number.value = camera_path.compute_duration() tension_slider = server.add_gui_slider( - "Spline Tension", + "Spline tension", min=0.0, max=1.0, initial_value=0.0, @@ -554,6 +596,8 @@ def _(_) -> None: hint="Toggle move handles for keyframes in the scene.", ) + transform_controls: List[viser.SceneNodeHandle] = [] + @move_checkbox.on_update def _(event: viser.GuiEvent) -> None: # Clear move handles when toggled off. @@ -614,7 +658,11 @@ def _(_) -> None: with playback_folder: play_button = server.add_gui_button("Play", icon=viser.Icon.PLAYER_PLAY) pause_button = server.add_gui_button("Pause", icon=viser.Icon.PLAYER_PAUSE, visible=False) - attach_viewport_checkbox = server.add_gui_checkbox("Attach viewport", initial_value=False) + preview_render_button = server.add_gui_button( + "Preview Render", hint="Show a preview of the render in the viewport." + ) + preview_render_stop_button = server.add_gui_button("Exit Render Preview", color="red", visible=False) + transition_sec_number = server.add_gui_number( "Transition (sec)", min=0.001, @@ -654,6 +702,24 @@ def remove_preview_camera() -> None: preview_camera_handle.remove() preview_camera_handle = None + def compute_and_update_preview_camera_state() -> Optional[Tuple[tf.SE3, float]]: + """Update the render tab state with the current preview camera pose. + Returns current camera pose + FOV if available.""" + + if preview_frame_slider is None: + return + maybe_pose_and_fov_rad = camera_path.interpolate_pose_and_fov_rad( + preview_frame_slider.value / get_max_frame_index() + ) + if maybe_pose_and_fov_rad is None: + remove_preview_camera() + return + pose, fov_rad = maybe_pose_and_fov_rad + render_tab_state.preview_fov = fov_rad + render_tab_state.preview_aspect = camera_path.get_aspect() + render_tab_state.preview_camera_type = camera_type.value + return pose, fov_rad + def add_preview_frame_slider() -> Optional[viser.GuiInputHandle[int]]: """Helper for creating the current frame # slider. This is removed and re-added anytime the `max` value changes.""" @@ -666,18 +732,17 @@ def add_preview_frame_slider() -> Optional[viser.GuiInputHandle[int]]: step=1, initial_value=0, # Place right after the pause button. - order=pause_button.order + 0.01, + order=preview_render_stop_button.order + 0.01, + disabled=get_max_frame_index() == 1, ) + play_button.disabled = preview_frame_slider.disabled + preview_render_button.disabled = preview_frame_slider.disabled @preview_frame_slider.on_update def _(_) -> None: nonlocal preview_camera_handle - - maybe_pose_and_fov_rad = camera_path.interpolate_pose_and_fov_rad( - preview_frame_slider.value / get_max_frame_index() - ) + maybe_pose_and_fov_rad = compute_and_update_preview_camera_state() if maybe_pose_and_fov_rad is None: - remove_preview_camera() return pose, fov_rad = maybe_pose_and_fov_rad @@ -689,57 +754,61 @@ def _(_) -> None: wxyz=pose.rotation().wxyz, position=pose.translation(), color=(10, 200, 30), - # Hack: hide green frustum if the viewport is attached. - # This is a waste of bandwidth, but will ensure that any old - # frustums are removed/aren't rendered. - # - # Easy to fix with a global variable. - visible=not attach_viewport_checkbox.value, ) - if attach_viewport_checkbox.value: + if render_tab_state.preview_render: for client in server.get_clients().values(): client.camera.wxyz = pose.rotation().wxyz client.camera.position = pose.translation() - client.camera.fov = fov_rad return preview_frame_slider - @attach_viewport_checkbox.on_update + # We back up the camera poses before and after we start previewing renders. + camera_pose_backup_from_id: Dict[int, tuple] = {} + + @preview_render_button.on_click def _(_) -> None: - if preview_frame_slider is None: - remove_preview_camera() - return - maybe_pose_and_fov_rad = camera_path.interpolate_pose_and_fov_rad( - preview_frame_slider.value / get_max_frame_index() - ) + render_tab_state.preview_render = True + preview_render_button.visible = False + preview_render_stop_button.visible = True + + maybe_pose_and_fov_rad = compute_and_update_preview_camera_state() if maybe_pose_and_fov_rad is None: remove_preview_camera() return pose, fov = maybe_pose_and_fov_rad - server.add_camera_frustum( - "/preview_camera", - fov=fov, - aspect=resolution.value[0] / resolution.value[1], - scale=0.35, - wxyz=pose.rotation().wxyz, - position=pose.translation(), - color=(10, 200, 30), - # Hack: hide green frustum if the viewport is attached. - # This is a waste of bandwidth, but will ensure that any old - # frustums are removed/aren't rendered. - # - # Easy to fix with a global variable. - visible=not attach_viewport_checkbox.value, - ) - if not attach_viewport_checkbox.value: - for client in server.get_clients().values(): - client.camera.fov = fov_degrees.value / 180 * np.pi - else: - if attach_viewport_checkbox.value: - for client in server.get_clients().values(): - client.camera.wxyz = pose.rotation().wxyz - client.camera.position = pose.translation() - client.camera.fov = fov + del fov + + # Hide all scene nodes when we're previewing the render. + server.set_global_scene_node_visibility(False) + + # Back up and then set camera poses. + for client in server.get_clients().values(): + camera_pose_backup_from_id[client.client_id] = ( + client.camera.position, + client.camera.look_at, + client.camera.up_direction, + ) + client.camera.wxyz = pose.rotation().wxyz + client.camera.position = pose.translation() + + @preview_render_stop_button.on_click + def _(_) -> None: + render_tab_state.preview_render = False + preview_render_button.visible = True + preview_render_stop_button.visible = False + + # Revert camera poses. + for client in server.get_clients().values(): + if client.client_id not in camera_pose_backup_from_id: + continue + cam_position, cam_look_at, cam_up = camera_pose_backup_from_id.pop(client.client_id) + client.camera.position = cam_position + client.camera.look_at = cam_look_at + client.camera.up_direction = cam_up + client.flush() + + # Un-hide scene nodes. + server.set_global_scene_node_visibility(True) preview_frame_slider = add_preview_frame_slider() @@ -823,13 +892,16 @@ def _(_) -> None: pose = tf.SE3.from_matrix(np.array(frame["matrix"]).reshape(4, 4)) # apply the x rotation by 180 deg pose = tf.SE3.from_rotation_and_translation( - pose.rotation() @ tf.SO3.from_x_radians(np.pi), pose.translation() + pose.rotation() @ tf.SO3.from_x_radians(np.pi), + pose.translation(), ) camera_path.add_camera( Keyframe( position=pose.translation() * VISER_NERFSTUDIO_SCALE_RATIO, wxyz=pose.rotation().wxyz, - override_fov_enabled=True, + # There are some floating point conversions between degrees and radians, so the fov and + # default_Fov values will not be exactly matched. + override_fov_enabled=abs(frame["fov"] - json_data.get("default_fov", 0.0)) < 1e-3, override_fov_rad=frame["fov"] / 180.0 * np.pi, aspect=frame["aspect"], override_transition_enabled=frame.get("override_transition_enabled", None), @@ -853,7 +925,9 @@ def _(_) -> None: # set the initial value to the current date-time string now = datetime.datetime.now() render_name_text = server.add_gui_text( - "Render Name", initial_value=now.strftime("%Y-%m-%d-%H-%M-%S"), hint="Name of the render" + "Render name", + initial_value=now.strftime("%Y-%m-%d-%H-%M-%S"), + hint="Name of the render", ) render_button = server.add_gui_button( "Generate Command", @@ -862,6 +936,18 @@ def _(_) -> None: hint="Generate the ns-render command for rendering the camera path.", ) + reset_up_button = server.add_gui_button( + "Reset Up Direction", + icon=viser.Icon.ARROW_BIG_UP_LINES, + color="gray", + hint="Set the up direction of the camera orbit controls to the camera's current up direction.", + ) + + @reset_up_button.on_click + def _(event: viser.GuiEvent) -> None: + assert event.client is not None + event.client.camera.up_direction = tf.SO3(event.client.camera.wxyz) @ np.array([0.0, -1.0, 0.0]) + @render_button.on_click def _(event: viser.GuiEvent) -> None: assert event.client is not None @@ -901,6 +987,7 @@ def _(event: viser.GuiEvent) -> None: "override_transition_sec": keyframe.override_transition_sec, } ) + json_data["default_fov"] = fov_degrees.value json_data["default_transition_sec"] = transition_sec_number.value json_data["keyframes"] = keyframes json_data["camera_type"] = camera_type.value.lower() @@ -980,7 +1067,7 @@ def _(_) -> None: camera_path.default_fov = fov_degrees.value / 180.0 * np.pi camera_path.default_transition_sec = transition_sec_number.value - transform_controls: List[viser.SceneNodeHandle] = [] + return render_tab_state if __name__ == "__main__": diff --git a/nerfstudio/viewer_beta/render_state_machine.py b/nerfstudio/viewer_beta/render_state_machine.py index 849cf3d4e7..54f8e69ae2 100644 --- a/nerfstudio/viewer_beta/render_state_machine.py +++ b/nerfstudio/viewer_beta/render_state_machine.py @@ -20,15 +20,16 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Tuple, get_args +import numpy as np import torch -from viser import ClientHandle +from nerfstudio.cameras.cameras import Cameras from nerfstudio.model_components.renderers import background_color_override_context +from nerfstudio.models.gaussian_splatting import GaussianSplattingModel from nerfstudio.utils import colormaps, writer from nerfstudio.utils.writer import GLOBAL_BUFFER, EventName, TimeWriter from nerfstudio.viewer.server import viewer_utils from nerfstudio.viewer_beta.utils import CameraState, get_camera -from nerfstudio.models.gaussian_splatting import GaussianSplattingModel -from nerfstudio.cameras.cameras import Cameras +from viser import ClientHandle if TYPE_CHECKING: from nerfstudio.viewer_beta.viewer import Viewer @@ -96,7 +97,7 @@ def action(self, action: RenderAction): # 1. we are in low_moving state # 2. the current next_action is move, static, or rerender return - elif self.next_action == "rerender": + elif self.next_action.action == "rerender": # never overwrite rerenders pass elif action.action == "static" and self.next_action.action == "move": @@ -254,9 +255,33 @@ def _send_output_to_viewer(self, outputs: Dict[str, Any], static_render: bool = depth = ( outputs["gl_z_buf_depth"].cpu().numpy() * self.viser_scale_ratio if "gl_z_buf_depth" in outputs else None ) - jpg_quality = self.viewer.config.jpeg_quality if static_render else 40 + + # Convert to numpy. + selected_output = selected_output.cpu().numpy() + assert selected_output.shape[-1] == 3 + + # Pad image if the aspect ratio (W/H) doesn't match the client! + current_h, current_w = selected_output.shape[:2] + desired_aspect = self.client.camera.aspect + pad_width = int(max(0, (desired_aspect * current_h - current_w) // 2)) + pad_height = int(max(0, (current_w / desired_aspect - current_h) // 2)) + if pad_width > 5 or pad_height > 5: + selected_output = np.pad( + selected_output, + ((pad_height, pad_height), (pad_width, pad_width), (0, 0)), + mode="constant", + constant_values=0, + ) + + jpg_quality = ( + self.viewer.config.jpeg_quality + if static_render + else 75 + if self.viewer.render_tab_state.preview_render + else 40 + ) self.client.set_background_image( - selected_output.cpu().numpy(), + selected_output, format=self.viewer.config.image_format, jpeg_quality=jpg_quality, depth=depth, diff --git a/nerfstudio/viewer_beta/utils.py b/nerfstudio/viewer_beta/utils.py index 3614fce419..8b9e237938 100644 --- a/nerfstudio/viewer_beta/utils.py +++ b/nerfstudio/viewer_beta/utils.py @@ -15,7 +15,7 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Any, List, Optional, Tuple, Union +from typing import Any, List, Literal, Optional, Tuple, Union import numpy as np import torch @@ -31,11 +31,13 @@ class CameraState: """A dataclass for storing the camera state.""" fov: float - """ The field of view of the camera. """ + """The field of view of the camera.""" aspect: float - """ The aspect ratio of the image. """ + """The aspect ratio of the image. """ c2w: Float[torch.Tensor, "3 4"] - """ The camera matrix. """ + """The camera matrix.""" + camera_type: Literal[CameraType.PERSPECTIVE, CameraType.EQUIRECTANGULAR, CameraType.FISHEYE] + """Type of camera to render.""" def get_camera( @@ -57,14 +59,19 @@ def get_camera( focal_length = pp_h / np.tan(fov / 2.0) intrinsics_matrix = torch.tensor([[focal_length, 0, pp_w], [0, focal_length, pp_h], [0, 0, 1]], dtype=torch.float32) - camera_type = CameraType.PERSPECTIVE + if camera_state.camera_type is CameraType.EQUIRECTANGULAR: + fx = float(image_width / 2) + fy = float(image_height) + else: + fx = intrinsics_matrix[0, 0] + fy = intrinsics_matrix[1, 1] camera = Cameras( - fx=intrinsics_matrix[0, 0], - fy=intrinsics_matrix[1, 1], + fx=fx, + fy=fy, cx=pp_w, cy=pp_h, - camera_type=camera_type, + camera_type=camera_state.camera_type, camera_to_worlds=camera_state.c2w.to(torch.float32)[None, ...], times=torch.tensor([0.0], dtype=torch.float32), ) diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py index 2b6f4e140d..0c560f9d86 100644 --- a/nerfstudio/viewer_beta/viewer.py +++ b/nerfstudio/viewer_beta/viewer.py @@ -26,8 +26,8 @@ import viser import viser.theme import viser.transforms as vtf - from nerfstudio.cameras.camera_optimizers import CameraOptimizer +from nerfstudio.cameras.cameras import CameraType from nerfstudio.configs import base_config as cfg from nerfstudio.data.datasets.base_dataset import InputDataset from nerfstudio.models.base_model import Model @@ -41,6 +41,7 @@ from nerfstudio.viewer_beta.render_state_machine import RenderAction, RenderStateMachine from nerfstudio.viewer_beta.utils import CameraState, parse_object from nerfstudio.viewer_beta.viewer_elements import ViewerControl, ViewerElement +from typing_extensions import assert_never if TYPE_CHECKING: from nerfstudio.engine.trainer import Trainer @@ -80,6 +81,7 @@ def __init__( train_lock: Optional[threading.Lock] = None, share: bool = False, ): + self.ready = False # Set to True at end of constructor. self.config = config self.trainer = trainer self.last_step = 0 @@ -182,15 +184,16 @@ def __init__( self.viser_server, self.include_time, VISER_NERFSTUDIO_SCALE_RATIO, - self._interrupt_render, - self._crop_params_update, + self._trigger_rerender, self._output_type_change, self._output_split_type_change, default_composite_depth=self.config.default_composite_depth, ) config_path = self.log_filename.parents[0] / "config.yml" with tabs.add_tab("Render", viser.Icon.CAMERA): - populate_render_tab(self.viser_server, config_path, self.datapath, self.control_panel) + self.render_tab_state = populate_render_tab( + self.viser_server, config_path, self.datapath, self.control_panel + ) with tabs.add_tab("Export", viser.Icon.PACKAGE_EXPORT): populate_export_tab(self.viser_server, self.control_panel, config_path) @@ -203,7 +206,7 @@ def nested_folder_install(folder_labels: List[str], prev_labels: List[str], elem element.install(self.viser_server) # also rewire the hook to rerender prev_cb = element.cb_hook - element.cb_hook = lambda element: [prev_cb(element), self._interrupt_render(element)] + element.cb_hook = lambda element: [prev_cb(element), self._trigger_rerender()] else: # recursively create folders # If the folder name is "Custom Elements/a/b", then: @@ -237,6 +240,8 @@ def nested_folder_install(folder_labels: List[str], prev_labels: List[str], elem for c in self.viewer_controls: c._setup(self) + self.ready = True + def toggle_pause_button(self) -> None: self.pause_train.visible = not self.pause_train.visible self.resume_train.visible = not self.resume_train.visible @@ -266,7 +271,27 @@ def get_camera_state(self, client: viser.ClientHandle) -> CameraState: R = torch.tensor(R.as_matrix()) pos = torch.tensor(client.camera.position, dtype=torch.float64) / VISER_NERFSTUDIO_SCALE_RATIO c2w = torch.concatenate([R, pos[:, None]], dim=1) - camera_state = CameraState(fov=client.camera.fov, aspect=client.camera.aspect, c2w=c2w) + if self.ready and self.render_tab_state.preview_render: + camera_type = self.render_tab_state.preview_camera_type + camera_state = CameraState( + fov=self.render_tab_state.preview_fov, + aspect=self.render_tab_state.preview_aspect, + c2w=c2w, + camera_type=CameraType.PERSPECTIVE + if camera_type == "Perspective" + else CameraType.FISHEYE + if camera_type == "Fisheye" + else CameraType.EQUIRECTANGULAR + if camera_type == "Equirectangular" + else assert_never(camera_type), + ) + else: + camera_state = CameraState( + fov=client.camera.fov, + aspect=client.camera.aspect, + c2w=c2w, + camera_type=CameraType.PERSPECTIVE, + ) return camera_state def handle_disconnect(self, client: viser.ClientHandle) -> None: @@ -278,7 +303,9 @@ def handle_new_client(self, client: viser.ClientHandle) -> None: self.render_statemachines[client.client_id].start() @client.camera.on_update - def _(cam: viser.CameraHandle) -> None: + def _(_: viser.CameraHandle) -> None: + if not self.ready: + return self.last_move_time = time.time() with self.viser_server.atomic(): camera_state = self.get_camera_state(client) @@ -314,13 +341,14 @@ def update_camera_poses(self): self.camera_handles[key].position = c2w[:3, 3] * VISER_NERFSTUDIO_SCALE_RATIO self.camera_handles[key].wxyz = R.wxyz - def _interrupt_render(self, _) -> None: + def _trigger_rerender(self) -> None: """Interrupt current render.""" + if not self.ready: + return clients = self.viser_server.get_clients() for id in clients: camera_state = self.get_camera_state(clients[id]) - if camera_state is not None: - self.render_statemachines[id].action(RenderAction("rerender", camera_state)) + self.render_statemachines[id].action(RenderAction("move", camera_state)) def _toggle_training_state(self, _) -> None: """Toggle the trainer's training state.""" @@ -330,14 +358,6 @@ def _toggle_training_state(self, _) -> None: elif self.trainer.training_state == "paused": self.trainer.training_state = "training" - def _crop_params_update(self, _) -> None: - """Update crop parameters""" - clients = self.viser_server.get_clients() - for id in clients: - camera_state = self.get_camera_state(clients[id]) - if camera_state is not None: - self.render_statemachines[id].action(RenderAction("move", camera_state)) - def _output_type_change(self, _): self.output_type_changed = True diff --git a/nerfstudio/viewer_beta/viewer_elements.py b/nerfstudio/viewer_beta/viewer_elements.py index e8de855e0d..b53da0535a 100644 --- a/nerfstudio/viewer_beta/viewer_elements.py +++ b/nerfstudio/viewer_beta/viewer_elements.py @@ -35,7 +35,7 @@ ViserServer, ) -from nerfstudio.cameras.cameras import Cameras +from nerfstudio.cameras.cameras import Cameras, CameraType from nerfstudio.viewer_beta.utils import CameraState, get_camera if TYPE_CHECKING: @@ -141,7 +141,9 @@ def get_camera(self, img_height: int, img_width: int, client_id: Optional[int] = R = torch.tensor(R.as_matrix()) pos = torch.tensor(client.camera.position, dtype=torch.float64) / VISER_NERFSTUDIO_SCALE_RATIO c2w = torch.concatenate([R, pos[:, None]], dim=1) - camera_state = CameraState(fov=client.camera.fov, aspect=client.camera.aspect, c2w=c2w) + camera_state = CameraState( + fov=client.camera.fov, aspect=client.camera.aspect, c2w=c2w, camera_type=CameraType.PERSPECTIVE + ) return get_camera(camera_state, img_height, img_width) def register_click_cb(self, cb: Callable): @@ -483,7 +485,11 @@ def __init__( def _create_gui_handle(self, viser_server: ViserServer) -> None: assert self.gui_handle is None, "gui_handle should be initialized once" self.gui_handle = viser_server.add_gui_dropdown( - self.name, self.options, self.default_value, disabled=self.disabled, hint=self.hint # type: ignore + self.name, + self.options, + self.default_value, + disabled=self.disabled, + hint=self.hint, # type: ignore ) def set_options(self, new_options: List[TString]) -> None: diff --git a/pyproject.toml b/pyproject.toml index fd058f9e87..cd971dd6f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,7 +56,7 @@ dependencies = [ "torchvision>=0.14.1", "torchmetrics[image]>=1.0.1", "typing_extensions>=4.4.0", - "viser==0.1.14", + "viser==0.1.17", "nuscenes-devkit>=1.1.1", "wandb>=0.13.3", "xatlas", From 38fc72729cc86d3e0d11c2df16f69f5da603b2d8 Mon Sep 17 00:00:00 2001 From: Abhik Ahuja Date: Tue, 26 Dec 2023 17:07:08 -0800 Subject: [PATCH 100/101] Fix dycheck depth and rgb rescale (#2623) Fix depth and rgb rescale Not sure if depth is correct since I'm not sure what the 3rd dim is for, the code runs properly though. --- nerfstudio/data/dataparsers/dycheck_dataparser.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nerfstudio/data/dataparsers/dycheck_dataparser.py b/nerfstudio/data/dataparsers/dycheck_dataparser.py index d9d4c8f4c6..9af4f58001 100644 --- a/nerfstudio/data/dataparsers/dycheck_dataparser.py +++ b/nerfstudio/data/dataparsers/dycheck_dataparser.py @@ -173,7 +173,10 @@ def _rescale_depth(depth_raw: np.ndarray, cam: Dict) -> np.ndarray: viewdirs /= np.linalg.norm(viewdirs, axis=-1, keepdims=True) viewdirs = viewdirs.reshape((*batch_shape, 3)) cosa = viewdirs @ (cam["camera_to_worlds"][:, 2]) - depth = depth_raw / cosa[..., None] + if depth_raw.ndim == cosa.ndim: + depth = depth_raw[..., None] / cosa[..., None] + else: + depth = depth_raw / cosa[..., None] return depth From a8e6f8fa3fd6c0ad2f3e681dcf1519e74ad2230f Mon Sep 17 00:00:00 2001 From: Cyrus Vachha Date: Wed, 27 Dec 2023 21:19:19 -0800 Subject: [PATCH 101/101] Adding docs for Instruct-GS2GS (#2702) * Adding docs for Instruct-GS2GS Adding documentation for Instruct-GS2GS including its page and adding it to the list of external and 3rd party methods. * fixing blank formatting dev checks * updating subtitle Updating the subtitle to specify gaussian splatting --- docs/index.md | 1 + docs/nerfology/methods/igs2gs.md | 103 +++++++++++++++++++++++++ docs/nerfology/methods/index.md | 3 +- nerfstudio/configs/external_methods.py | 13 ++++ 4 files changed, 119 insertions(+), 1 deletion(-) create mode 100644 docs/nerfology/methods/igs2gs.md diff --git a/docs/index.md b/docs/index.md index 6b160fcecc..3815860a87 100644 --- a/docs/index.md +++ b/docs/index.md @@ -151,6 +151,7 @@ This documentation is organized into 3 parts: - [Nerfbusters](nerfology/methods/nerfbusters.md): Removing Ghostly Artifacts from Casually Captured NeRFs - [NeRFPlayer](nerfology/methods/nerfplayer.md): 4D Radiance Fields by Streaming Feature Channels - [Tetra-NeRF](nerfology/methods/tetranerf.md): Representing Neural Radiance Fields Using Tetrahedra +- [Instruct-GS2GS](nerfology/methods/igs2gs.md): Editing 3DGS Scenes with Instructions **Eager to contribute a method?** We'd love to see you use nerfstudio in implementing new (or even existing) methods! Please view our {ref}`guide` for more details about how to add to this list! diff --git a/docs/nerfology/methods/igs2gs.md b/docs/nerfology/methods/igs2gs.md new file mode 100644 index 0000000000..0b60486fdf --- /dev/null +++ b/docs/nerfology/methods/igs2gs.md @@ -0,0 +1,103 @@ +# Instruct-GS2GS + +

Editing Gaussian Splatting Scenes with Instructions

+ +```{button-link} https://instruct-gs2gs.github.io/ +:color: primary +:outline: +Paper Website +``` + +```{button-link} https://github.com/cvachha/instruct-gs2gs +:color: primary +:outline: +Code +``` + + + +**Instruct-GS2GS enables instruction-based editing of 3D Gaussian Splatting scenes via a 2D diffusion model** + +## Installation + +First install nerfstudio dependencies. Then run: + +```bash +pip install git+https://github.com/cvachha/instruct-gs2gs +cd instruct-gs2gs +pip install --upgrade pip setuptools +pip install -e . +``` + +## Running Instruct-GS2GS + +Details for running Instruct-GS2GS (built with Nerfstudio!) can be found [here](https://github.com/cvachha/instruct-gs2gs). Once installed, run: + +```bash +ns-train igs2gs --help +``` + +| Method | Description | Memory | +| ------------ | ---------------------------- | ------ | +| `igs2gs` | Full model, used in paper | ~15GB | + +Datasets need to be processed with COLMAP for Gaussian Splatting support. + +Once you have trained your GS scene for 20k iterations, the checkpoints will be saved to the `outputs` directory. Copy the path to the `nerfstudio_models` folder. (Note: We noticed that training for 20k iterations rather than 30k seemed to run more reliably) + +To start training for editing the GS, run the following command: + +```bash +ns-train igs2gs --data {PROCESSED_DATA_DIR} --load-dir {outputs/.../nerfstudio_models} --pipeline.prompt {"prompt"} --pipeline.guidance-scale 12.5 --pipeline.image-guidance-scale 1.5 +``` + +The `{PROCESSED_DATA_DIR}` must be the same path as used in training the original GS. Using the CLI commands, you can choose the prompt and the guidance scales used for InstructPix2Pix. + +## Method + +### Overview + +Instruct-GS2GS is a method for editing 3D Gaussian Splatting (3DGS) scenes with text instructions in a method based on [Instruct-NeRF2NeRF](https://instruct-nerf2nerf.github.io/). Given a 3DGS scene of a scene and the collection of images used to reconstruct it, this method uses an image-conditioned diffusion model ([InstructPix2Pix](https://www.timothybrooks.com/instruct-pix2pix)) to iteratively edit the input images while optimizing the underlying scene, resulting in an optimized 3D scene that respects the edit instruction. The paper demonstrates that our proposed method is able to edit large-scale, real-world scenes, and is able to accomplish realistic and targeted edits. + + +## Pipeline + + + +This section will walk through each component of the Instruct-GS2GS method. + +### How it Works + +Instruct-GS2GS gradually updates a reconstructed Gaussian Splatting scene by iteratively updating the dataset images while training the 3DGS: + +1. Images are rendered from the scene at all training viewpoints. +2. They get edited by InstructPix2Pix given a global text instruction. +3. The training dataset images are replaced with the edited images. +4. The 3DGS continues training as usual for 2.5k iterations. + +### Editing Images with InstructPix2Pix + +To update a dataset image from a given viewpoint, Instruct-GS2GS takes the original, unedited training image as image conditioning and uses the global text instruction as text conditioning. This process mixes the information of the diffusion model, which attempts to edit the image, the current 3D structure of the 3DGS, and view-consistent information from the unedited, ground-truth images. By combining this set of information, the edit is respected while maintaining 3D consistency. + +The code snippet for how an image is edited in the pipeline can be found [here](https://github.com/cvachha/instruct-gs2gs/blob/main/igs2gs/ip2p.py). + +### Iterative Dataset Update and Implementation + +The method takes in a dataset of camera poses and training images, a trained 3DGS scene, and a user-specified text-prompt instruction, e.g. β€œmake him a marble statue”. Instruct-GS2GS constructs the edited GS scene guided by the text-prompt by applying a 2D text and image conditioned diffusion model, in this case Instruct-Pix2Pix, to all training images over the course of training. It performs these edits using an iterative udpate scheme in which all training dataset images are updated using a diffusion model individually, for sequential iterations spanning the size of the training images, every 2.5k training iterations. This process allows the GS to have a holistic edit and maintain 3D consistency. + +The process is similar to Instruct-NeRF2NeRF where for a given training camera view, it sets the original training image as the conditioning image, the noisy image input as the GS rendered from the camera combined with some randomly selected noise, and receives an edited image respecting the text conditioning. With this method, it is able to propagate the edited changes to the GS scene. The method is able to maintain grounded edits by conditioning Instruct-Pix2Pix on the original unedited training image. + +This method uses Nerfstudio’s gsplat library for our underlying gaussian splatting model. We adapt similar parameters for the diffusion model from Instruct-NeRF2NeRF. Among these are the values that define the amount of noise (and therefore the amount signal retained from the original images). We vary the classifier-free guidance scales per edit and scene, using a range of values. We edit the entire dataset and then train the scene for 2.5k iterations. For GS training, we use L1 and LPIPS losses. We train our method for a maximum of 27.5k iterations (starting with a GS scene trained for 20k iterations). However, in practice we stop training once the edit has converged. In many cases, the optimal training length is a subjective decision β€” a user may prefer more subtle or more extreme edits that are best found at different stages of training. + + +## Results + +For results, view the [project page](https://instruct-gs2gs.github.io/)! + + \ No newline at end of file diff --git a/docs/nerfology/methods/index.md b/docs/nerfology/methods/index.md index f1d4bd5e83..0cc68432d8 100644 --- a/docs/nerfology/methods/index.md +++ b/docs/nerfology/methods/index.md @@ -38,6 +38,7 @@ The following methods are supported in nerfstudio: Tetra-NeRF TensoRF Generfacto + Instruct-GS2GS ``` (own_method_docs)= @@ -50,7 +51,7 @@ We also welcome additions to the list of methods above. To do this, simply creat 1. Add a markdown file describing the model to the `docs/nerfology/methods` folder 2. Update the above list of implement methods in this file. -3. Add the method to the {ref}`this` list in `docs/index.md`. +3. Add the method to {ref}`this` list in `docs/index.md`. 4. Add a new `ExternalMethod` entry to the `nerfstudio/configs/external_methods.py` file. For the method description, please refer to the [Instruct-NeRF2NeRF](in2n) page as an example of the layout. Please try to include the following information: diff --git a/nerfstudio/configs/external_methods.py b/nerfstudio/configs/external_methods.py index ce4e7bbc2b..72a679066f 100644 --- a/nerfstudio/configs/external_methods.py +++ b/nerfstudio/configs/external_methods.py @@ -139,6 +139,19 @@ class ExternalMethod: ) ) +# Instruct-GS2GS +external_methods.append( + ExternalMethod( + """[bold yellow]Instruct-GS2GS[/bold yellow] +For more information visit: https://docs.nerf.studio/nerfology/methods/igs2gs.html + +To enable Instruct-GS2GS, you must install it first by running: + [grey]pip install git+https://github.com/cvachha/instruct-gs2gs[/grey]""", + configurations=[("igs2gs", "Instruct-GS2GS. Full model, used in paper")], + pip_package="git+https://github.com/cvachha/instruct-gs2gs", + ) +) + @dataclass class ExternalMethodTrainerConfig(TrainerConfig):