From ead9b37c658708af7971871839c6fbf613d8092d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Kulh=C3=A1nek?=
Date: Mon, 11 Sep 2023 09:50:18 +0200
Subject: [PATCH 001/101] Allow colmap parser to load 3D points (#2408)
---
.../data/dataparsers/colmap_dataparser.py | 78 ++++++++++++++++++-
1 file changed, 77 insertions(+), 1 deletion(-)
diff --git a/nerfstudio/data/dataparsers/colmap_dataparser.py b/nerfstudio/data/dataparsers/colmap_dataparser.py
index a8a993d305..046f6d5d35 100644
--- a/nerfstudio/data/dataparsers/colmap_dataparser.py
+++ b/nerfstudio/data/dataparsers/colmap_dataparser.py
@@ -71,6 +71,10 @@ class ColmapDataParserConfig(DataParserConfig):
"""Path to depth maps directory. If not set, depths are not loaded."""
colmap_path: Path = Path("sparse/0")
"""Path to the colmap reconstruction directory relative to the data path."""
+ load_3D_points: bool = True
+ """Whether to load the 3D points from the colmap reconstruction."""
+ max_2D_matches_per_3D_point: int = -1
+ """Maximum number of 2D matches per 3D point. If set to -1, all 2D matches are loaded. If set to 0, no 2D matches are loaded."""
class ColmapDataParser(DataParser):
@@ -202,7 +206,7 @@ def _get_image_indices(self, image_filenames, split):
raise ValueError(f"Unknown dataparser split {split}")
return indices
- def _generate_dataparser_outputs(self, split: str = "train"):
+ def _generate_dataparser_outputs(self, split: str = "train", **kwargs):
assert self.config.data.exists(), f"Data directory {self.config.data} does not exist."
colmap_path = self.config.data / self.config.colmap_path
assert colmap_path.exists(), f"Colmap path {colmap_path} does not exist."
@@ -328,6 +332,11 @@ def _generate_dataparser_outputs(self, split: str = "train"):
applied_scale = float(meta["applied_scale"])
scale_factor *= applied_scale
+ metadata = {}
+ if self.config.load_3D_points:
+ # Load 3D points
+ metadata.update(self._load_3D_points(colmap_path, transform_matrix, scale_factor))
+
dataparser_outputs = DataparserOutputs(
image_filenames=image_filenames,
cameras=cameras,
@@ -338,10 +347,77 @@ def _generate_dataparser_outputs(self, split: str = "train"):
metadata={
"depth_filenames": depth_filenames if len(depth_filenames) > 0 else None,
"depth_unit_scale_factor": self.config.depth_unit_scale_factor,
+ **metadata,
},
)
return dataparser_outputs
+ def _load_3D_points(self, colmap_path: Path, transform_matrix: torch.Tensor, scale_factor: float):
+ if (colmap_path / "points3D.bin").exists():
+ colmap_points = colmap_utils.read_points3D_binary(colmap_path / "points3D.bin")
+ elif (colmap_path / "points3D.txt").exists():
+ colmap_points = colmap_utils.read_points3D_text(colmap_path / "points3D.txt")
+ else:
+ raise ValueError(f"Could not find points3D.txt or points3D.bin in {colmap_path}")
+ points3D = torch.from_numpy(np.array([p.xyz for p in colmap_points.values()], dtype=np.float32))
+ points3D = (
+ torch.cat(
+ (
+ points3D,
+ torch.ones_like(points3D[..., :1]),
+ ),
+ -1,
+ )
+ @ transform_matrix.T
+ )
+ points3D *= scale_factor
+
+ # Load point colours
+ points3D_rgb = torch.from_numpy(np.array([p.rgb for p in colmap_points.values()], dtype=np.uint8))
+ points3D_num_points = torch.tensor([len(p.image_ids) for p in colmap_points.values()], dtype=torch.int64)
+ out = {
+ "points3D_xyz": points3D,
+ "points3D_rgb": points3D_rgb,
+ "points3D_error": torch.from_numpy(np.array([p.error for p in colmap_points.values()], dtype=np.float32)),
+ "points3D_num_points": points3D_num_points,
+ }
+ if self.config.max_2D_matches_per_3D_point != 0:
+ if (colmap_path / "images.txt").exists():
+ im_id_to_image = colmap_utils.read_images_text(colmap_path / "images.txt")
+ elif (colmap_path / "images.bin").exists():
+ im_id_to_image = colmap_utils.read_images_binary(colmap_path / "images.bin")
+ else:
+ raise ValueError(f"Could not find images.txt or images.bin in {colmap_path}")
+ downscale_factor = self._downscale_factor
+ max_num_points = int(torch.max(points3D_num_points).item())
+ if self.config.max_2D_matches_per_3D_point > 0:
+ max_num_points = min(max_num_points, self.config.max_2D_matches_per_3D_point)
+ points3D_image_ids = []
+ points3D_image_xy = []
+ for p in colmap_points.values():
+ nids = np.array(p.image_ids, dtype=np.int64)
+ nxy_ids = np.array(p.point2D_idxs, dtype=np.int32)
+ if self.config.max_2D_matches_per_3D_point != -1:
+ # Randomly sample 2D matches
+ idxs = np.argsort(p.error)[: self.config.max_2D_matches_per_3D_point]
+ nids = nids[idxs]
+ nxy_ids = nxy_ids[idxs]
+ nxy = [im_id_to_image[im_id].xys[pt_idx] for im_id, pt_idx in zip(nids, nxy_ids)]
+ nxy = torch.from_numpy(np.stack(nxy).astype(np.float32))
+ nids = torch.from_numpy(nids)
+ assert len(nids.shape) == 1
+ assert len(nxy.shape) == 2
+ points3D_image_ids.append(
+ torch.cat((nids, torch.full((max_num_points - len(nids),), -1, dtype=torch.int64)))
+ )
+ points3D_image_xy.append(
+ torch.cat((nxy, torch.full((max_num_points - len(nxy), nxy.shape[-1]), 0, dtype=torch.float32)))
+ / downscale_factor
+ )
+ out["points3D_image_ids"] = torch.stack(points3D_image_ids, dim=0)
+ out["points3D_image_xy"] = torch.stack(points3D_image_xy, dim=0)
+ return out
+
def _setup_downscale_factor(
self, image_filenames: List[Path], mask_filenames: List[Path], depth_filenames: List[Path]
):
From 570ccf3c0a298160caad72745d13db0c23d45a2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Kulh=C3=A1nek?=
Date: Mon, 11 Sep 2023 18:00:44 +0200
Subject: [PATCH 002/101] Rename COLMAP 3D point features (#2417)
---
nerfstudio/data/dataparsers/colmap_dataparser.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/nerfstudio/data/dataparsers/colmap_dataparser.py b/nerfstudio/data/dataparsers/colmap_dataparser.py
index 046f6d5d35..2c15258d6d 100644
--- a/nerfstudio/data/dataparsers/colmap_dataparser.py
+++ b/nerfstudio/data/dataparsers/colmap_dataparser.py
@@ -379,7 +379,7 @@ def _load_3D_points(self, colmap_path: Path, transform_matrix: torch.Tensor, sca
"points3D_xyz": points3D,
"points3D_rgb": points3D_rgb,
"points3D_error": torch.from_numpy(np.array([p.error for p in colmap_points.values()], dtype=np.float32)),
- "points3D_num_points": points3D_num_points,
+ "points3D_num_points2D": points3D_num_points,
}
if self.config.max_2D_matches_per_3D_point != 0:
if (colmap_path / "images.txt").exists():
@@ -415,7 +415,7 @@ def _load_3D_points(self, colmap_path: Path, transform_matrix: torch.Tensor, sca
/ downscale_factor
)
out["points3D_image_ids"] = torch.stack(points3D_image_ids, dim=0)
- out["points3D_image_xy"] = torch.stack(points3D_image_xy, dim=0)
+ out["points3D_points2D_xy"] = torch.stack(points3D_image_xy, dim=0)
return out
def _setup_downscale_factor(
From cc98fb6bd4eb5d14a2a75bfa3dc46a6bc0d2666e Mon Sep 17 00:00:00 2001
From: Maxim Bonnaerens
Date: Mon, 11 Sep 2023 20:07:52 +0200
Subject: [PATCH 003/101] Fix benchmarking: apply get_background_color in
renderer and set profiler to none (#2397)
* apply get_background_color and set profiler to none
* Apply override first
---------
Co-authored-by: Brent Yi
---
nerfstudio/model_components/renderers.py | 17 +++++++----------
.../benchmarking/launch_train_blender.sh | 6 +++---
2 files changed, 10 insertions(+), 13 deletions(-)
diff --git a/nerfstudio/model_components/renderers.py b/nerfstudio/model_components/renderers.py
index 20a506b1ea..1fde0d693c 100644
--- a/nerfstudio/model_components/renderers.py
+++ b/nerfstudio/model_components/renderers.py
@@ -102,21 +102,18 @@ def combine_rgb(
comp_rgb = torch.sum(weights * rgb, dim=-2)
accumulated_weight = torch.sum(weights, dim=-2)
if BACKGROUND_COLOR_OVERRIDE is not None:
- # This case must be before the others or the override is not properly applied
- background_color = cls.get_background_color(
- BACKGROUND_COLOR_OVERRIDE, shape=comp_rgb.shape, device=comp_rgb.device
- )
- elif background_color == "random":
+ background_color = BACKGROUND_COLOR_OVERRIDE
+ if background_color == "random":
# If background color is random, the predicted color is returned without blending,
# as if the background color was black.
return comp_rgb
-
elif background_color == "last_sample":
# Note, this is only supported for non-packed samples.
background_color = rgb[..., -1, :]
+ background_color = cls.get_background_color(background_color, shape=comp_rgb.shape, device=comp_rgb.device)
assert isinstance(background_color, torch.Tensor)
- comp_rgb = comp_rgb + background_color.to(comp_rgb.device) * (1.0 - accumulated_weight)
+ comp_rgb = comp_rgb + background_color * (1.0 - accumulated_weight)
return comp_rgb
@classmethod
@@ -124,13 +121,13 @@ def get_background_color(
cls, background_color: BackgroundColor, shape: Tuple[int, ...], device: torch.device
) -> Union[Float[Tensor, "3"], Float[Tensor, "*bs 3"]]:
"""Returns the RGB background color for a specified background color.
-
Note:
This function CANNOT be called for background_color being either "last_sample" or "random".
Args:
- rgb: RGB for each sample.
- background_color: The background color specification.
+ background_color: The background color specification. If a string is provided, it must be a valid color name.
+ shape: Shape of the output tensor.
+ device: Device on which to create the tensor.
Returns:
Background color as RGB.
diff --git a/nerfstudio/scripts/benchmarking/launch_train_blender.sh b/nerfstudio/scripts/benchmarking/launch_train_blender.sh
index cbdd7d754a..7b9db727db 100644
--- a/nerfstudio/scripts/benchmarking/launch_train_blender.sh
+++ b/nerfstudio/scripts/benchmarking/launch_train_blender.sh
@@ -17,7 +17,7 @@ while getopts "m:v:s" opt; do
m ) method_name="$OPTARG" ;;
v ) vis="$OPTARG" ;;
s ) single=true ;;
- ? ) helpFunction ;;
+ ? ) helpFunction ;;
esac
done
@@ -83,12 +83,12 @@ for dataset in "${DATASETS[@]}"; do
--steps-per-save=1000 \
--max-num-iterations=16500 \
--logging.local-writer.enable=False \
- --logging.enable-profiler=False \
+ --logging.profiler="none" \
--vis "${vis}" \
--timestamp "$timestamp" \
${dataparser} & GPU_PID[$idx]=$!
echo "Launched ${method_name} ${dataset} on gpu ${GPU_IDX[$idx]}, ${tag}"
-
+
# update gpu
((idx=(idx+1)%len))
done
From e9330d515d6c7c81905e0b61ca3bd1e018d7ecdc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Kulh=C3=A1nek?=
Date: Wed, 13 Sep 2023 19:55:24 +0200
Subject: [PATCH 004/101] Colmap parser: fix downscale for complex paths
(#2425)
---
.../data/dataparsers/colmap_dataparser.py | 89 ++++++++++++++-----
1 file changed, 65 insertions(+), 24 deletions(-)
diff --git a/nerfstudio/data/dataparsers/colmap_dataparser.py b/nerfstudio/data/dataparsers/colmap_dataparser.py
index 2c15258d6d..851a30367f 100644
--- a/nerfstudio/data/dataparsers/colmap_dataparser.py
+++ b/nerfstudio/data/dataparsers/colmap_dataparser.py
@@ -19,6 +19,7 @@
import sys
from dataclasses import dataclass, field
from pathlib import Path
+from functools import partial
from typing import List, Literal, Optional, Type
import numpy as np
@@ -32,8 +33,8 @@
from nerfstudio.data.scene_box import SceneBox
from nerfstudio.data.utils import colmap_parsing_utils as colmap_utils
from nerfstudio.process_data.colmap_utils import parse_colmap_camera_params
-from nerfstudio.process_data.process_data_utils import downscale_images
-from nerfstudio.utils.rich_utils import CONSOLE
+from nerfstudio.utils.scripts import run_command
+from nerfstudio.utils.rich_utils import CONSOLE, status
MAX_AUTO_RESOLUTION = 1600
@@ -139,15 +140,19 @@ def _get_all_images_and_cameras(self, recon_dir: Path):
c2w[2, :] *= -1
frame = {
- "file_path": (self.config.images_path / im_data.name).as_posix(),
+ "file_path": (self.config.data / self.config.images_path / im_data.name).as_posix(),
"transform_matrix": c2w,
"colmap_im_id": im_id,
}
frame.update(cameras[im_data.camera_id])
if self.config.masks_path is not None:
- frame["mask_path"] = ((self.config.masks_path / im_data.name).with_suffix(".png").as_posix(),)
+ frame["mask_path"] = (
+ (self.config.data / self.config.masks_path / im_data.name).with_suffix(".png").as_posix(),
+ )
if self.config.depths_path is not None:
- frame["depth_path"] = ((self.config.depths_path / im_data.name).with_suffix(".png").as_posix(),)
+ frame["depth_path"] = (
+ (self.config.data / self.config.depths_path / im_data.name).with_suffix(".png").as_posix(),
+ )
frames.append(frame)
if camera_model is not None:
assert camera_model == frame["camera_model"], "Multiple camera models are not supported"
@@ -175,7 +180,7 @@ def _get_image_indices(self, image_filenames, split):
with (self.config.data / f"{split}_list.txt").open("r", encoding="utf8") as f:
filenames = f.read().splitlines()
# Validate split first
- split_filenames = set(self.config.images_path / x for x in filenames)
+ split_filenames = set(self.config.data / self.config.images_path / x for x in filenames)
unmatched_filenames = split_filenames.difference(image_filenames)
if unmatched_filenames:
raise RuntimeError(
@@ -418,6 +423,25 @@ def _load_3D_points(self, colmap_path: Path, transform_matrix: torch.Tensor, sca
out["points3D_points2D_xy"] = torch.stack(points3D_image_xy, dim=0)
return out
+ def _downscale_images(self, paths, get_fname, downscale_factor: int, nearest_neighbor: bool = False):
+ with status(msg="[bold yellow]Downscaling images...", spinner="growVertical"):
+ assert downscale_factor > 1
+ assert isinstance(downscale_factor, int)
+ # Using %05d ffmpeg commands appears to be unreliable (skips images).
+ for path in paths:
+ nn_flag = "" if not nearest_neighbor else ":flags=neighbor"
+ path_out = get_fname(path)
+ path_out.parent.mkdir(parents=True, exist_ok=True)
+ ffmpeg_cmd = [
+ f'ffmpeg -y -noautorotate -i "{path}" ',
+ f"-q:v 2 -vf scale=iw/{downscale_factor}:ih/{downscale_factor}{nn_flag} ",
+ f'"{path_out}"',
+ ]
+ ffmpeg_cmd = " ".join(ffmpeg_cmd)
+ run_command(ffmpeg_cmd)
+
+ CONSOLE.log("[bold green]:tada: Done downscaling images.")
+
def _setup_downscale_factor(
self, image_filenames: List[Path], mask_filenames: List[Path], depth_filenames: List[Path]
):
@@ -425,17 +449,16 @@ def _setup_downscale_factor(
Setup the downscale factor for the dataset. This is used to downscale the images and cameras.
"""
- def get_fname(filepath: Path) -> Path:
+ def get_fname(parent: Path, filepath: Path) -> Path:
"""Returns transformed file name when downscale factor is applied"""
- parts = list(filepath.parts)
- parts[-2] += f"_{self._downscale_factor}"
- filepath = Path(*parts)
- return self.config.data / filepath
+ rel_part = filepath.relative_to(parent)
+ base_part = parent.parent / (str(parent.name) + f"_{self._downscale_factor}")
+ return base_part / rel_part
filepath = next(iter(image_filenames))
if self._downscale_factor is None:
if self.config.downscale_factor is None:
- test_img = Image.open(self.config.data / filepath)
+ test_img = Image.open(filepath)
h, w = test_img.size
max_res = max(h, w)
df = 0
@@ -448,7 +471,9 @@ def get_fname(filepath: Path) -> Path:
CONSOLE.log(f"Using image downscale factor of {self._downscale_factor}")
else:
self._downscale_factor = self.config.downscale_factor
- if self._downscale_factor > 1 and not all(get_fname(fp).parent.exists() for fp in image_filenames):
+ if self._downscale_factor > 1 and not all(
+ get_fname(self.config.data / self.config.images_path, fp).parent.exists() for fp in image_filenames
+ ):
# Downscaled images not found
# Ask if user wants to downscale the images automatically here
CONSOLE.print(
@@ -456,23 +481,39 @@ def get_fname(filepath: Path) -> Path:
)
if Confirm.ask("\nWould you like to downscale the images now?", default=False, console=CONSOLE):
# Install the method
- image_dir = self.config.data / image_filenames[0].parent
- num_downscales = int(math.log2(self._downscale_factor))
- assert 2**num_downscales == self._downscale_factor, "Downscale factor must be a power of 2"
- downscale_images(image_dir, num_downscales, folder_name=image_dir.name, nearest_neighbor=False)
+ self._downscale_images(
+ image_filenames,
+ partial(get_fname, self.config.data / self.config.images_path),
+ self._downscale_factor,
+ nearest_neighbor=False,
+ )
if len(mask_filenames) > 0:
- mask_dir = mask_filenames[0].parent
- downscale_images(mask_dir, num_downscales, folder_name=mask_dir.name, nearest_neighbor=True)
+ assert self.config.masks_path is not None
+ self._downscale_images(
+ mask_filenames,
+ partial(get_fname, self.config.data / self.config.masks_path),
+ self._downscale_factor,
+ nearest_neighbor=True,
+ )
if len(depth_filenames) > 0:
- depth_dir = depth_filenames[0].parent
- downscale_images(depth_dir, num_downscales, folder_name=depth_dir.name, nearest_neighbor=False)
+ assert self.config.depths_path is not None
+ self._downscale_images(
+ depth_filenames,
+ partial(get_fname, self.config.data / self.config.depths_path),
+ self._downscale_factor,
+ nearest_neighbor=True,
+ )
else:
sys.exit(1)
# Return transformed filenames
if self._downscale_factor > 1:
- image_filenames = [get_fname(fp) for fp in image_filenames]
- mask_filenames = [get_fname(fp) for fp in mask_filenames]
- depth_filenames = [get_fname(fp) for fp in depth_filenames]
+ image_filenames = [get_fname(self.config.data / self.config.images_path, fp) for fp in image_filenames]
+ if len(mask_filenames) > 0:
+ assert self.config.masks_path is not None
+ mask_filenames = [get_fname(self.config.data / self.config.masks_path, fp) for fp in mask_filenames]
+ if len(depth_filenames) > 0:
+ assert self.config.depths_path is not None
+ depth_filenames = [get_fname(self.config.data / self.config.depths_path, fp) for fp in depth_filenames]
assert isinstance(self._downscale_factor, int)
return image_filenames, mask_filenames, depth_filenames, self._downscale_factor
From 805e6e702d328bf6fc2bf9721cbb0a4eedf03a35 Mon Sep 17 00:00:00 2001
From: anc2001 <71151378+anc2001@users.noreply.github.com>
Date: Thu, 14 Sep 2023 09:11:15 -0700
Subject: [PATCH 005/101] fix pixel sampling with masks issue when data is list
(#2369)
Co-authored-by: Brent Yi
---
nerfstudio/data/pixel_samplers.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/nerfstudio/data/pixel_samplers.py b/nerfstudio/data/pixel_samplers.py
index b852c31b19..463d08294c 100644
--- a/nerfstudio/data/pixel_samplers.py
+++ b/nerfstudio/data/pixel_samplers.py
@@ -215,7 +215,7 @@ def collate_image_dataset_batch_list(self, batch: Dict, num_rays_per_batch: int,
num_rays_in_batch = num_rays_per_batch - (num_images - 1) * num_rays_in_batch
indices = self.sample_method(
- num_rays_in_batch, 1, image_height, image_width, mask=batch["mask"][i], device=device
+ num_rays_in_batch, 1, image_height, image_width, mask=batch["mask"][i].unsqueeze(0), device=device
)
indices[:, 0] = i
all_indices.append(indices)
From 7269a7ee1639cdb6d287988a648f53075ca8e1ea Mon Sep 17 00:00:00 2001
From: Sid Mehta
Date: Fri, 15 Sep 2023 09:21:09 -0700
Subject: [PATCH 006/101] Add Comet Logging (#2431)
* initial commit
* fixed naming
* added comet_ml as depedency
* added comet to docs
* Update custom_gui.md
* Update models.md
* Update benchmarking.md
* added Comet to ReadMe
* added decorator
* fixed link and added comet examples
* fixed link and added comet example
* fixed format
* fixed type ignore error
* fixed readme typo
* fixed typo
---
README.md | 2 +-
.../debugging_tools/benchmarking.md | 2 +-
docs/developer_guides/pipelines/models.md | 2 +-
docs/developer_guides/viewer/custom_gui.md | 2 +-
docs/quickstart/first_nerf.md | 4 +-
nerfstudio/configs/experiment_config.py | 15 ++++---
nerfstudio/engine/trainer.py | 11 ++---
nerfstudio/utils/decorators.py | 2 +-
nerfstudio/utils/writer.py | 42 ++++++++++++++++---
pyproject.toml | 1 +
10 files changed, 57 insertions(+), 26 deletions(-)
diff --git a/README.md b/README.md
index aa5825a23a..b6f05f9be0 100644
--- a/README.md
+++ b/README.md
@@ -274,7 +274,7 @@ ns-train nerfacto --help
### Tensorboard / WandB / Viewer
-We support three different methods to track training progress, using the viewer, [tensorboard](https://www.tensorflow.org/tensorboard), and [Weights and Biases](https://wandb.ai/site). You can specify which visualizer to use by appending `--vis {viewer, tensorboard, wandb, viewer+wandb, viewer+tensorboard}` to the training command. Simultaneously utilizing the viewer alongside wandb or tensorboard may cause stuttering issues during evaluation steps. The viewer only works for methods that are fast (ie. nerfacto, instant-ngp), for slower methods like NeRF, use the other loggers.
+We support four different methods to track training progress, using the viewer[tensorboard](https://www.tensorflow.org/tensorboard), [Weights and Biases](https://wandb.ai/site), and ,[Comet](https://comet.com/?utm_source=nerf&utm_medium=referral&utm_content=github). You can specify which visualizer to use by appending `--vis {viewer, tensorboard, wandb, comet viewer+wandb, viewer+tensorboard, viewer+comet}` to the training command. Simultaneously utilizing the viewer alongside wandb or tensorboard may cause stuttering issues during evaluation steps. The viewer only works for methods that are fast (ie. nerfacto, instant-ngp), for slower methods like NeRF, use the other loggers.
# Learn More
diff --git a/docs/developer_guides/debugging_tools/benchmarking.md b/docs/developer_guides/debugging_tools/benchmarking.md
index b38c09277c..bf97db4b5c 100644
--- a/docs/developer_guides/debugging_tools/benchmarking.md
+++ b/docs/developer_guides/debugging_tools/benchmarking.md
@@ -16,7 +16,7 @@ Simply replace the arguments in brackets with the correct arguments.
- `-m {METHOD_NAME}`: Name of the method you want to benchmark (e.g. `nerfacto`, `mipnerf`).
- `-s`: Launch a single job per GPU.
-- `-v {VIS}`: Use another visualization than wandb, which is the default. Only other option is tensorboard.
+- `-v {VIS}`: Use another visualization than wandb, which is the default. Other options are comet & tensorboard.
- `{GPU_LIST}`: (optional) Specify the list of gpus you want to use on your machine space separated. for instance, if you want to use GPU's 0-3, you will need to pass in `0 1 2 3`. If left empty, the script will automatically find available GPU's and distribute training jobs on the available GPUs.
:::{admonition} Tip
diff --git a/docs/developer_guides/pipelines/models.md b/docs/developer_guides/pipelines/models.md
index 8afa76d3a2..4fb1cb3f24 100644
--- a/docs/developer_guides/pipelines/models.md
+++ b/docs/developer_guides/pipelines/models.md
@@ -55,7 +55,7 @@ class Model:
"""Process a RayBundle object and return RayOutputs describing quanties for each ray."""
def get_metrics_dict(self, outputs, batch):
- """Returns metrics dictionary which will be plotted with wandb or tensorboard."""
+ """Returns metrics dictionary which will be plotted with comet, wandb or tensorboard."""
def get_loss_dict(self, outputs, batch, metrics_dict=None):
"""Returns a dictionary of losses to be summed which will be your loss."""
diff --git a/docs/developer_guides/viewer/custom_gui.md b/docs/developer_guides/viewer/custom_gui.md
index e11381d020..2ae41fa6ac 100644
--- a/docs/developer_guides/viewer/custom_gui.md
+++ b/docs/developer_guides/viewer/custom_gui.md
@@ -59,7 +59,7 @@ class MyModel(Model):
**Writing to the element**
-You can write to a viewer element in Python, which provides a convenient way to track values in your code without the need for wandb/tensorboard or relying on `print` statements.
+You can write to a viewer element in Python, which provides a convenient way to track values in your code without the need for comet/wandb/tensorboard or relying on `print` statements.
```python
self.custom_value.value = x
diff --git a/docs/quickstart/first_nerf.md b/docs/quickstart/first_nerf.md
index a88183d9e4..137bd0b235 100644
--- a/docs/quickstart/first_nerf.md
+++ b/docs/quickstart/first_nerf.md
@@ -96,9 +96,9 @@ ns-train nerfacto nerfstudio-data --help
Each script will have some other minor quirks (like the training script dataparser subcommand needing to come after the model subcommand), read up on them [here](../reference/cli/index.md).
-## Tensorboard / WandB / Viewer
+## Comet / Tensorboard / WandB / Viewer
-We support three different methods to track training progress, using the viewer, [tensorboard](https://www.tensorflow.org/tensorboard), and [Weights and Biases](https://wandb.ai/site). You can specify which visualizer to use by appending `--vis {viewer, tensorboard, wandb, viewer+wandb, viewer+tensorboard}` to the training command. Simultaneously utilizing the viewer alongside wandb or tensorboard may cause stuttering issues during evaluation steps. The viewer only works for methods that are fast (ie. nerfacto, instant-ngp), for slower methods like NeRF, use the other loggers.
+We support four different methods to track training progress, using the viewer [tensorboard](https://www.tensorflow.org/tensorboard), [Weights and Biases](https://wandb.ai/site), and [Comet](https://comet.com/?utm_source=nerf&utm_medium=referral&utm_content=nerf_docs). You can specify which visualizer to use by appending `--vis {viewer, tensorboard, wandb, viewer+wandb, viewer+tensorboard, viewer+comet}` to the training command. Simultaneously utilizing the viewer alongside wandb or tensorboard may cause stuttering issues during evaluation steps. The viewer only works for methods that are fast (ie. nerfacto, instant-ngp), for slower methods like NeRF, use the other loggers.
## Evaluating Runs
diff --git a/nerfstudio/configs/experiment_config.py b/nerfstudio/configs/experiment_config.py
index 1fa3598f6d..0da8893ed1 100644
--- a/nerfstudio/configs/experiment_config.py
+++ b/nerfstudio/configs/experiment_config.py
@@ -22,13 +22,7 @@
from typing import Any, Dict, Literal, Optional
import yaml
-
-from nerfstudio.configs.base_config import (
- InstantiateConfig,
- LoggingConfig,
- MachineConfig,
- ViewerConfig,
-)
+from nerfstudio.configs.base_config import InstantiateConfig, LoggingConfig, MachineConfig, ViewerConfig
from nerfstudio.configs.config_utils import to_immutable_dict
from nerfstudio.engine.optimizers import OptimizerConfig
from nerfstudio.engine.schedulers import SchedulerConfig
@@ -68,7 +62,9 @@ class ExperimentConfig(InstantiateConfig):
}
)
"""Dictionary of optimizer groups and their schedulers"""
- vis: Literal["viewer", "wandb", "tensorboard", "viewer+wandb", "viewer+tensorboard", "viewer_beta"] = "wandb"
+ vis: Literal[
+ "viewer", "wandb", "tensorboard", "comet", "viewer+wandb", "viewer+tensorboard", "viewer+comet", "viewer_beta"
+ ] = "wandb"
"""Which visualizer to use."""
data: Optional[Path] = None
"""Alias for --pipeline.datamanager.data"""
@@ -95,6 +91,9 @@ def is_tensorboard_enabled(self) -> bool:
"""Checks if tensorboard is enabled."""
return ("tensorboard" == self.vis) | ("viewer+tensorboard" == self.vis)
+ def is_comet_enabled(self) -> bool:
+ return ("comet" == self.vis) | ("viewer+comet" == self.vis)
+
def set_timestamp(self) -> None:
"""Dynamically set the experiment timestamp"""
if self.timestamp == "{timestamp}":
diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py
index 94256809cf..c8ae500433 100644
--- a/nerfstudio/engine/trainer.py
+++ b/nerfstudio/engine/trainer.py
@@ -27,11 +27,6 @@
from typing import Dict, List, Literal, Optional, Tuple, Type, cast
import torch
-from rich import box, style
-from rich.panel import Panel
-from rich.table import Table
-from torch.cuda.amp.grad_scaler import GradScaler
-
from nerfstudio.configs.experiment_config import ExperimentConfig
from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager
from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes, TrainingCallbackLocation
@@ -44,6 +39,10 @@
from nerfstudio.utils.writer import EventName, TimeWriter
from nerfstudio.viewer.server.viewer_state import ViewerState
from nerfstudio.viewer_beta.viewer import Viewer as ViewerBetaState
+from rich import box, style
+from rich.panel import Panel
+from rich.table import Table
+from torch.cuda.amp.grad_scaler import GradScaler
TRAIN_INTERATION_OUTPUT = Tuple[torch.Tensor, Dict[str, torch.Tensor], Dict[str, torch.Tensor]]
TORCH_DEVICE = str
@@ -197,6 +196,7 @@ def setup(self, test_mode: Literal["test", "val", "inference"] = "val") -> None:
writer.setup_event_writer(
self.config.is_wandb_enabled(),
self.config.is_tensorboard_enabled(),
+ self.config.is_comet_enabled(),
log_dir=writer_log_path,
experiment_name=self.config.experiment_name,
project_name=self.config.project_name,
@@ -326,6 +326,7 @@ def _check_viewer_warnings(self) -> None:
(self.config.is_viewer_enabled() or self.config.is_viewer_beta_enabled())
and not self.config.is_tensorboard_enabled()
and not self.config.is_wandb_enabled()
+ and not self.config.is_comet_enabled()
):
string: str = (
"[NOTE] Not running eval iterations since only viewer is enabled.\n"
diff --git a/nerfstudio/utils/decorators.py b/nerfstudio/utils/decorators.py
index 06f28f60a6..d6fd3f4179 100644
--- a/nerfstudio/utils/decorators.py
+++ b/nerfstudio/utils/decorators.py
@@ -66,7 +66,7 @@ def check_eval_enabled(func: Callable) -> Callable:
def wrapper(self, *args, **kwargs):
ret = None
- if self.config.is_wandb_enabled() or self.config.is_tensorboard_enabled():
+ if self.config.is_wandb_enabled() or self.config.is_tensorboard_enabled() or self.config.is_comet_enabled():
ret = func(self, *args, **kwargs)
return ret
diff --git a/nerfstudio/utils/writer.py b/nerfstudio/utils/writer.py
index 8d3b07b1a1..59e6c5a713 100644
--- a/nerfstudio/utils/writer.py
+++ b/nerfstudio/utils/writer.py
@@ -24,16 +24,16 @@
from time import time
from typing import Any, Dict, List, Optional, Union
+import comet_ml
import torch
import wandb
from jaxtyping import Float
-from torch import Tensor
-from torch.utils.tensorboard import SummaryWriter
-
from nerfstudio.configs import base_config as cfg
from nerfstudio.utils.decorators import check_main_thread, decorate_all
from nerfstudio.utils.printing import human_format
from nerfstudio.utils.rich_utils import CONSOLE
+from torch import Tensor
+from torch.utils.tensorboard import SummaryWriter
def to8b(x):
@@ -150,7 +150,7 @@ def put_time(name: str, duration: float, step: int, avg_over_steps: bool = True,
put_scalar(name, duration, step)
if update_eta:
- ## NOTE: eta should be called with avg train iteration time
+ # NOTE: eta should be called with avg train iteration time
remain_iter = GLOBAL_BUFFER["max_iter"] - step
remain_time = remain_iter * GLOBAL_BUFFER["events"][name]["avg"]
put_scalar(EventName.ETA, remain_time, step)
@@ -185,7 +185,7 @@ def setup_local_writer(config: cfg.LoggingConfig, max_iter: int, banner_messages
else:
CONSOLE.log("disabled local writer")
- ## configure all the global buffer basic information
+ # configure all the global buffer basic information
GLOBAL_BUFFER["max_iter"] = max_iter
GLOBAL_BUFFER["max_buffer_size"] = config.max_buffer_size
GLOBAL_BUFFER["steps_per_log"] = config.steps_per_log
@@ -203,6 +203,7 @@ def is_initialized():
def setup_event_writer(
is_wandb_enabled: bool,
is_tensorboard_enabled: bool,
+ is_comet_enabled: bool,
log_dir: Path,
experiment_name: str,
project_name: str = "nerfstudio-project",
@@ -214,6 +215,11 @@ def setup_event_writer(
banner_messages: list of messages to always display at bottom of screen
"""
using_event_writer = False
+
+ if is_comet_enabled:
+ curr_writer = CometWriter(log_dir=log_dir, experiment_name=experiment_name, project_name=project_name)
+ EVENT_WRITERS.append(curr_writer)
+ using_event_writer = True
if is_wandb_enabled:
curr_writer = WandbWriter(log_dir=log_dir, experiment_name=experiment_name, project_name=project_name)
EVENT_WRITERS.append(curr_writer)
@@ -225,7 +231,7 @@ def setup_event_writer(
if using_event_writer:
string = f"logging events to: {log_dir}"
else:
- string = "Disabled tensorboard/wandb event writers"
+ string = "Disabled comet/tensorboard/wandb event writers"
CONSOLE.print(f"[bold yellow]{string}")
@@ -346,6 +352,30 @@ def write_config(self, name: str, config_dict: Dict[str, Any], step: int):
self.tb_writer.add_text("config", str(config_dict))
+@decorate_all([check_main_thread])
+class CometWriter(Writer):
+ """Comet_ML Writer Class"""
+
+ def __init__(self, log_dir: Path, experiment_name: str, project_name: str = "nerfstudio-project"):
+ self.experiment = comet_ml.Experiment(project_name=project_name)
+ if experiment_name != "unnamed":
+ self.experiment.set_name(experiment_name)
+
+ def write_image(self, name: str, image: Float[Tensor, "H W C"], step: int) -> None:
+ self.experiment.log_image(image, name, step=step)
+
+ def write_scalar(self, name: str, scalar: Union[float, torch.Tensor], step: int) -> None:
+ self.experiment.log_metric(name, scalar, step)
+
+ def write_config(self, name: str, config_dict: Dict[str, Any], step: int):
+ """Function that writes out the config to Comet
+
+ Args:
+ config: config dictionary to write out
+ """
+ self.experiment.log_parameters(config_dict, step=step)
+
+
def _cursorup(x: int):
"""utility tool to move the cursor up on the terminal
diff --git a/pyproject.toml b/pyproject.toml
index 1347f954b2..add21cf6d1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,6 +16,7 @@ classifiers = [
dependencies = [
"appdirs>=1.4",
"av>=9.2.0",
+ "comet_ml>=3.33.8",
"cryptography>=38",
"tyro>=0.5.3",
"gdown>=4.6.0",
From ef2fd3dbe1d5b6f35781716b747ea142fe21f5e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Kulh=C3=A1nek?=
Date: Fri, 15 Sep 2023 23:18:23 +0200
Subject: [PATCH 007/101] Fix import error when tcnn installed but CUDA runtime
not available (#2429)
---
Dockerfile | 6 ++-
nerfstudio/field_components/encodings.py | 8 +---
nerfstudio/field_components/mlp.py | 8 +---
nerfstudio/fields/sdf_field.py | 7 +--
nerfstudio/utils/external.py | 58 ++++++++++++++++++++++++
tests/field_components/test_fields.py | 7 ++-
6 files changed, 68 insertions(+), 26 deletions(-)
create mode 100644 nerfstudio/utils/external.py
diff --git a/Dockerfile b/Dockerfile
index b48b0dd872..d73c487343 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -168,6 +168,8 @@ RUN cd nerfstudio && \
# Change working directory
WORKDIR /workspace
-# Install nerfstudio cli auto completion and enter shell if no command was provided.
-CMD ns-install-cli --mode install && /bin/bash
+# Install nerfstudio cli auto completion
+RUN ns-install-cli --mode install
+# Bash as default entrypoint.
+CMD /bin/bash -l
diff --git a/nerfstudio/field_components/encodings.py b/nerfstudio/field_components/encodings.py
index 7145ae5996..437ee805fe 100644
--- a/nerfstudio/field_components/encodings.py
+++ b/nerfstudio/field_components/encodings.py
@@ -29,13 +29,7 @@
from nerfstudio.field_components.base_field_component import FieldComponent
from nerfstudio.utils.math import components_from_spherical_harmonics, expected_sin
from nerfstudio.utils.printing import print_tcnn_speed_warning
-
-try:
- import tinycudann as tcnn
-
- TCNN_EXISTS = True
-except ModuleNotFoundError:
- TCNN_EXISTS = False
+from nerfstudio.utils.external import tcnn, TCNN_EXISTS
class Encoding(FieldComponent):
diff --git a/nerfstudio/field_components/mlp.py b/nerfstudio/field_components/mlp.py
index 52eabd29df..2585a49e4e 100644
--- a/nerfstudio/field_components/mlp.py
+++ b/nerfstudio/field_components/mlp.py
@@ -25,13 +25,7 @@
from nerfstudio.utils.printing import print_tcnn_speed_warning
from nerfstudio.utils.rich_utils import CONSOLE
-
-try:
- import tinycudann as tcnn
-
- TCNN_EXISTS = True
-except ModuleNotFoundError:
- TCNN_EXISTS = False
+from nerfstudio.utils.external import TCNN_EXISTS, tcnn
def activation_to_tcnn_string(activation: Union[nn.Module, None]) -> str:
diff --git a/nerfstudio/fields/sdf_field.py b/nerfstudio/fields/sdf_field.py
index 2f730d2c54..ed936d4b2b 100644
--- a/nerfstudio/fields/sdf_field.py
+++ b/nerfstudio/fields/sdf_field.py
@@ -33,12 +33,7 @@
from nerfstudio.field_components.field_heads import FieldHeadNames
from nerfstudio.field_components.spatial_distortions import SpatialDistortion
from nerfstudio.fields.base_field import Field, FieldConfig
-
-try:
- import tinycudann as tcnn
-except ModuleNotFoundError:
- # tinycudann module doesn't exist
- pass
+from nerfstudio.utils.external import tcnn
class LearnedVariance(nn.Module):
diff --git a/nerfstudio/utils/external.py b/nerfstudio/utils/external.py
new file mode 100644
index 0000000000..f14219bd56
--- /dev/null
+++ b/nerfstudio/utils/external.py
@@ -0,0 +1,58 @@
+# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+
+
+class _LazyError:
+ def __init__(self, data):
+ self.__data = data # pylint: disable=unused-private-member
+
+ class LazyErrorObj:
+ def __init__(self, data):
+ self.__data = data # pylint: disable=unused-private-member
+
+ def __call__(self, *args, **kwds):
+ name, exc = object.__getattribute__(self, "__data")
+ raise RuntimeError(f"Could not load package {name}.") from exc
+
+ def __getattr__(self, __name: str):
+ name, exc = object.__getattribute__(self, "__data")
+ raise RuntimeError(f"Could not load package {name}") from exc
+
+ def __getattr__(self, __name: str):
+ return _LazyError.LazyErrorObj(object.__getattribute__(self, "__data"))
+
+
+TCNN_EXISTS = False
+tcnn_import_exception = None
+tcnn = None
+try:
+ import tinycudann
+
+ tcnn = tinycudann
+ del tinycudann
+ TCNN_EXISTS = True
+except ModuleNotFoundError as _exp:
+ tcnn_import_exception = _exp
+except ImportError as _exp:
+ tcnn_import_exception = _exp
+except EnvironmentError as _exp:
+ if "Unknown compute capability" not in _exp.args[0]:
+ raise _exp
+ print("Could not load tinycudann: " + str(_exp), file=sys.stderr)
+ tcnn_import_exception = _exp
+
+if tcnn_import_exception is not None:
+ tcnn = _LazyError(tcnn_import_exception)
diff --git a/tests/field_components/test_fields.py b/tests/field_components/test_fields.py
index 74d7d6c3af..fd5332776e 100644
--- a/tests/field_components/test_fields.py
+++ b/tests/field_components/test_fields.py
@@ -5,15 +5,14 @@
from nerfstudio.cameras.rays import Frustums, RaySamples
from nerfstudio.fields.nerfacto_field import NerfactoField
+from nerfstudio.utils.external import TCNN_EXISTS, tcnn_import_exception
def test_nerfacto_field():
"""Test the Nerfacto field"""
- try:
- import tinycudann as tcnn # noqa: F401
- except ModuleNotFoundError as e:
+ if not TCNN_EXISTS:
# tinycudann module doesn't exist
- print(e)
+ print(tcnn_import_exception)
return
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
aabb_scale = 1.0
From b0e700137bd8bde21fbc7867f433dd36b2905e1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E3=83=95=E3=83=A9=E3=83=8F=E3=83=86=E3=82=A3=E3=80=80?=
=?UTF-8?q?=E4=BB=81?=
Date: Tue, 19 Sep 2023 13:13:48 +0900
Subject: [PATCH 008/101] Allow the option for timestamp to be generated for
`ns-train --load-config` (#2442)
* Fix timestamp for ns-train --load-config
* Remove whitespace
---
nerfstudio/scripts/train.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/nerfstudio/scripts/train.py b/nerfstudio/scripts/train.py
index 9ca18c71a7..fde0a5c37a 100644
--- a/nerfstudio/scripts/train.py
+++ b/nerfstudio/scripts/train.py
@@ -226,7 +226,6 @@ def launch(
def main(config: TrainerConfig) -> None:
"""Main function."""
- config.set_timestamp()
if config.data:
CONSOLE.log("Using --data alias for --data.pipeline.datamanager.data")
config.pipeline.datamanager.data = config.data
@@ -239,6 +238,8 @@ def main(config: TrainerConfig) -> None:
CONSOLE.log(f"Loading pre-set config from: {config.load_config}")
config = yaml.load(config.load_config.read_text(), Loader=yaml.Loader)
+ config.set_timestamp()
+
# print and save config
config.print_to_terminal()
config.save_config()
From 3e1d4af8217134fc34b42690345c5847147bc732 Mon Sep 17 00:00:00 2001
From: Brent Yi
Date: Tue, 19 Sep 2023 16:22:55 -0700
Subject: [PATCH 009/101] Add share link support for beta viewer (#2445)
* Add share link support for beta viewer
* More consistent CLI experience
* fix viewer jitter by adding atomic update on camera pose
* change to make_share_url
* fix
* remove incorrect assert
---------
Co-authored-by: Justin Kerr
---
nerfstudio/configs/base_config.py | 2 ++
nerfstudio/configs/experiment_config.py | 10 ++++++++--
nerfstudio/engine/trainer.py | 1 +
nerfstudio/scripts/viewer/run_viewer.py | 8 ++++++--
nerfstudio/viewer_beta/viewer.py | 25 ++++++++++++++-----------
pyproject.toml | 2 +-
6 files changed, 32 insertions(+), 16 deletions(-)
diff --git a/nerfstudio/configs/base_config.py b/nerfstudio/configs/base_config.py
index 315659acbb..fc906fae9e 100644
--- a/nerfstudio/configs/base_config.py
+++ b/nerfstudio/configs/base_config.py
@@ -146,3 +146,5 @@ class ViewerConfig(PrintableConfig):
"""Image format viewer should use; jpeg is lossy compression, while png is lossless."""
jpeg_quality: int = 90
"""Quality tradeoff to use for jpeg compression."""
+ make_share_url: bool = False
+ """Viewer beta feature: print a shareable URL. `vis` must be set to viewer_beta; this flag is otherwise ignored."""
diff --git a/nerfstudio/configs/experiment_config.py b/nerfstudio/configs/experiment_config.py
index 0da8893ed1..42cc4399ec 100644
--- a/nerfstudio/configs/experiment_config.py
+++ b/nerfstudio/configs/experiment_config.py
@@ -22,7 +22,13 @@
from typing import Any, Dict, Literal, Optional
import yaml
-from nerfstudio.configs.base_config import InstantiateConfig, LoggingConfig, MachineConfig, ViewerConfig
+
+from nerfstudio.configs.base_config import (
+ InstantiateConfig,
+ LoggingConfig,
+ MachineConfig,
+ ViewerConfig,
+)
from nerfstudio.configs.config_utils import to_immutable_dict
from nerfstudio.engine.optimizers import OptimizerConfig
from nerfstudio.engine.schedulers import SchedulerConfig
@@ -73,7 +79,7 @@ class ExperimentConfig(InstantiateConfig):
relative_model_dir: Path = Path("nerfstudio_models/")
"""Relative path to save all checkpoints."""
load_scheduler: bool = True
- """Whether to load the scheduler state_dict to resume training, if exists"""
+ """Whether to load the scheduler state_dict to resume training, if it exists."""
def is_viewer_enabled(self) -> bool:
"""Checks if a viewer is enabled."""
diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py
index c8ae500433..04fa8e05b3 100644
--- a/nerfstudio/engine/trainer.py
+++ b/nerfstudio/engine/trainer.py
@@ -177,6 +177,7 @@ def setup(self, test_mode: Literal["test", "val", "inference"] = "val") -> None:
pipeline=self.pipeline,
trainer=self,
train_lock=self.train_lock,
+ share=self.config.viewer.make_share_url,
)
banner_messages = [f"Viewer Beta at: {self.viewer_state.viewer_url}"]
self._check_viewer_warnings()
diff --git a/nerfstudio/scripts/viewer/run_viewer.py b/nerfstudio/scripts/viewer/run_viewer.py
index 56a712026f..de210a30d8 100644
--- a/nerfstudio/scripts/viewer/run_viewer.py
+++ b/nerfstudio/scripts/viewer/run_viewer.py
@@ -55,6 +55,8 @@ class RunViewer:
"""Viewer configuration"""
vis: Literal["viewer", "viewer_beta"] = "viewer"
"""Type of viewer"""
+ make_share_url: bool = False
+ """Viewer beta feature: print a shareable URL. `vis` must be set to viewer_beta; this flag is otherwise ignored."""
def main(self) -> None:
"""Main function."""
@@ -66,6 +68,7 @@ def main(self) -> None:
num_rays_per_chunk = config.viewer.num_rays_per_chunk
assert self.viewer.num_rays_per_chunk == -1
config.vis = self.vis
+ config.viewer.make_share_url = self.make_share_url
config.viewer = self.viewer.as_viewer_config()
config.viewer.num_rays_per_chunk = num_rays_per_chunk
@@ -103,6 +106,7 @@ def _start_viewer(config: TrainerConfig, pipeline: Pipeline, step: int):
log_filename=viewer_log_path,
datapath=base_dir,
pipeline=pipeline,
+ share=config.viewer.make_share_url,
)
banner_messages = [f"Viewer Beta at: {viewer_state.viewer_url}"]
@@ -126,11 +130,11 @@ def _start_viewer(config: TrainerConfig, pipeline: Pipeline, step: int):
def entrypoint():
"""Entrypoint for use with pyproject scripts."""
tyro.extras.set_accent_color("bright_yellow")
- tyro.cli(RunViewer).main()
+ tyro.cli(tyro.conf.FlagConversionOff[RunViewer]).main()
if __name__ == "__main__":
entrypoint()
# For sphinx docs
-get_parser_fn = lambda: tyro.extras.get_parser(RunViewer) # noqa
+get_parser_fn = lambda: tyro.extras.get_parser(tyro.conf.FlagConversionOff[RunViewer]) # noqa
diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py
index 9e2314ad41..d2a53b198f 100644
--- a/nerfstudio/viewer_beta/viewer.py
+++ b/nerfstudio/viewer_beta/viewer.py
@@ -26,6 +26,8 @@
import viser
import viser.theme
import viser.transforms as vtf
+
+from nerfstudio.cameras.camera_optimizers import CameraOptimizer
from nerfstudio.configs import base_config as cfg
from nerfstudio.data.datasets.base_dataset import InputDataset
from nerfstudio.models.base_model import Model
@@ -40,8 +42,6 @@
from nerfstudio.viewer_beta.utils import CameraState, parse_object
from nerfstudio.viewer_beta.viewer_elements import ViewerControl, ViewerElement
-from nerfstudio.cameras.camera_optimizers import CameraOptimizer
-
if TYPE_CHECKING:
from nerfstudio.engine.trainer import Trainer
@@ -59,6 +59,7 @@ class Viewer:
datapath: path to data
pipeline: pipeline object to use
trainer: trainer object to use
+ share: print a shareable URL
Attributes:
viewer_url: url to open viewer
@@ -77,6 +78,7 @@ def __init__(
pipeline: Pipeline,
trainer: Optional[Trainer] = None,
train_lock: Optional[threading.Lock] = None,
+ share: bool = False,
):
self.config = config
self.trainer = trainer
@@ -103,7 +105,7 @@ def __init__(
self._prev_train_state: Literal["training", "paused", "completed"] = "training"
self.client: Optional[viser.ClientHandle] = None
- self.viser_server = viser.ViserServer(host=config.websocket_host, port=websocket_port)
+ self.viser_server = viser.ViserServer(host=config.websocket_host, port=websocket_port, share=share)
buttons = (
viser.theme.TitlebarButton(
text="Getting Started",
@@ -191,14 +193,15 @@ def handle_new_client(self, client: viser.ClientHandle) -> None:
@client.camera.on_update
def _(cam: viser.CameraHandle) -> None:
assert self.client is not None
- self.last_move_time = time.time()
- R = vtf.SO3(wxyz=self.client.camera.wxyz)
- R = R @ vtf.SO3.from_x_radians(np.pi)
- R = torch.tensor(R.as_matrix())
- pos = torch.tensor(self.client.camera.position, dtype=torch.float64) / VISER_NERFSTUDIO_SCALE_RATIO
- c2w = torch.concatenate([R, pos[:, None]], dim=1)
- self.camera_state = CameraState(fov=self.client.camera.fov, aspect=self.client.camera.aspect, c2w=c2w)
- self.render_statemachine.action(RenderAction("move", self.camera_state))
+ with client.atomic():
+ self.last_move_time = time.time()
+ R = vtf.SO3(wxyz=self.client.camera.wxyz)
+ R = R @ vtf.SO3.from_x_radians(np.pi)
+ R = torch.tensor(R.as_matrix())
+ pos = torch.tensor(self.client.camera.position, dtype=torch.float64) / VISER_NERFSTUDIO_SCALE_RATIO
+ c2w = torch.concatenate([R, pos[:, None]], dim=1)
+ self.camera_state = CameraState(fov=self.client.camera.fov, aspect=self.client.camera.aspect, c2w=c2w)
+ self.render_statemachine.action(RenderAction("move", self.camera_state))
def set_camera_visibility(self, visible: bool) -> None:
"""Toggle the visibility of the training cameras."""
diff --git a/pyproject.toml b/pyproject.toml
index add21cf6d1..c641526755 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,7 +53,7 @@ dependencies = [
"torchvision>=0.14.1",
"torchmetrics[image]>=1.0.1",
"typing_extensions>=4.4.0",
- "viser==0.1.2",
+ "viser==0.1.3",
"nuscenes-devkit>=1.1.1",
"wandb>=0.13.3",
"xatlas",
From e0d8f0426752dc0f97bd332fd6b84e95e8377210 Mon Sep 17 00:00:00 2001
From: Justin Kerr
Date: Tue, 19 Sep 2023 16:48:54 -0700
Subject: [PATCH 010/101] Render oriented crops in viewer beta (#2447)
* add obb to the rendering pipeline
* useless line
* lint
* remove todo
* pyright
* more pyright
---
nerfstudio/engine/trainer.py | 5 +-
nerfstudio/scripts/render.py | 33 +++---
nerfstudio/viewer_beta/render_panel.py | 135 +++++++++++++++++++++----
nerfstudio/viewer_beta/viewer.py | 4 +-
4 files changed, 139 insertions(+), 38 deletions(-)
diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py
index 04fa8e05b3..b4ccba9eea 100644
--- a/nerfstudio/engine/trainer.py
+++ b/nerfstudio/engine/trainer.py
@@ -170,10 +170,13 @@ def setup(self, test_mode: Literal["test", "val", "inference"] = "val") -> None:
)
banner_messages = [f"Viewer at: {self.viewer_state.viewer_url}"]
if self.config.is_viewer_beta_enabled() and self.local_rank == 0:
+ datapath = self.config.data
+ if datapath is None:
+ datapath = self.base_dir
self.viewer_state = ViewerBetaState(
self.config.viewer,
log_filename=viewer_log_path,
- datapath=self.base_dir,
+ datapath=datapath,
pipeline=self.pipeline,
trainer=self,
train_lock=self.train_lock,
diff --git a/nerfstudio/scripts/render.py b/nerfstudio/scripts/render.py
index 5a3ca78bd0..99a7cae01d 100644
--- a/nerfstudio/scripts/render.py
+++ b/nerfstudio/scripts/render.py
@@ -54,7 +54,7 @@
)
from nerfstudio.cameras.cameras import Cameras, CameraType
from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager
-from nerfstudio.data.scene_box import SceneBox
+from nerfstudio.data.scene_box import OrientedBox
from nerfstudio.model_components import renderers
from nerfstudio.pipelines.base_pipeline import Pipeline
from nerfstudio.utils import colormaps, install_checks
@@ -127,12 +127,10 @@ def _render_trajectory_video(
with progress:
for camera_idx in progress.track(range(cameras.size), description=""):
- aabb_box = None
+ obb_box = None
if crop_data is not None:
- bounding_box_min = crop_data.center - crop_data.scale / 2.0
- bounding_box_max = crop_data.center + crop_data.scale / 2.0
- aabb_box = SceneBox(torch.stack([bounding_box_min, bounding_box_max]).to(pipeline.device))
- camera_ray_bundle = cameras.generate_rays(camera_indices=camera_idx, aabb_box=aabb_box)
+ obb_box = crop_data.obb
+ camera_ray_bundle = cameras.generate_rays(camera_indices=camera_idx, obb_box=obb_box)
if crop_data is not None:
with renderers.background_color_override_context(
@@ -289,10 +287,17 @@ class CropData:
background_color: Float[Tensor, "3"] = torch.Tensor([0.0, 0.0, 0.0])
"""background color"""
- center: Float[Tensor, "3"] = torch.Tensor([0.0, 0.0, 0.0])
- """center of the crop"""
- scale: Float[Tensor, "3"] = torch.Tensor([2.0, 2.0, 2.0])
- """scale of the crop"""
+ obb: OrientedBox = OrientedBox(R=torch.eye(3), T=torch.zeros(3), S=torch.ones(3) * 2)
+ """Oriented box representing the crop region"""
+
+ # properties for backwards-compatibility interface
+ @property
+ def center(self):
+ return self.obb.T
+
+ @property
+ def scale(self):
+ return self.obb.S
def get_crop_from_json(camera_json: Dict[str, Any]) -> Optional[CropData]:
@@ -305,13 +310,13 @@ def get_crop_from_json(camera_json: Dict[str, Any]) -> Optional[CropData]:
"""
if "crop" not in camera_json or camera_json["crop"] is None:
return None
-
bg_color = camera_json["crop"]["crop_bg_color"]
-
+ center = camera_json["crop"]["crop_center"]
+ scale = camera_json["crop"]["crop_scale"]
+ rot = (0.0, 0.0, 0.0) if "crop_rot" not in camera_json["crop"] else tuple(camera_json["crop"]["crop_rot"])
return CropData(
background_color=torch.Tensor([bg_color["r"] / 255.0, bg_color["g"] / 255.0, bg_color["b"] / 255.0]),
- center=torch.Tensor(camera_json["crop"]["crop_center"]),
- scale=torch.Tensor(camera_json["crop"]["crop_scale"]),
+ obb=OrientedBox.from_params(center, rot, scale),
)
diff --git a/nerfstudio/viewer_beta/render_panel.py b/nerfstudio/viewer_beta/render_panel.py
index 02d4d6ce07..7342b0f388 100644
--- a/nerfstudio/viewer_beta/render_panel.py
+++ b/nerfstudio/viewer_beta/render_panel.py
@@ -13,17 +13,19 @@
# limitations under the License.
from __future__ import annotations
-
+from pathlib import Path
import colorsys
import dataclasses
import threading
import time
from typing import Dict, List, Optional, Tuple
-
+import datetime
+from nerfstudio.viewer_beta.control_panel import ControlPanel
import numpy as onp
import splines
import splines.quaternion
import viser
+import json
import viser.transforms as tf
@@ -256,7 +258,11 @@ def update_spline(self) -> None:
)
-def populate_render_tab(server: viser.ViserServer) -> None:
+def populate_render_tab(
+ server: viser.ViserServer, config_path: Path, datapath: Path, control_panel: ControlPanel
+) -> None:
+ from nerfstudio.viewer_beta.viewer import VISER_NERFSTUDIO_SCALE_RATIO
+
fov_degrees = server.add_gui_slider(
"FOV",
initial_value=90.0,
@@ -528,6 +534,11 @@ def _(_) -> None:
play_button.visible = True
pause_button.visible = False
+ # set the initial value to the current date-time string
+ now = datetime.datetime.now()
+ render_name_text = server.add_gui_text(
+ "Render Name", initial_value=now.strftime("%Y-%m-%d-%H-%M-%S"), hint="Name of the render"
+ )
render_button = server.add_gui_button(
"Generate Command",
color="green",
@@ -537,10 +548,107 @@ def _(_) -> None:
@render_button.on_click
def _(event: viser.GuiEvent) -> None:
- """TODO: write the render JSON and show the render command."""
assert event.client is not None
- with event.client.add_gui_modal("TODO") as modal:
- event.client.add_gui_markdown("TODO")
+ num_frames = int(framerate_slider.value * duration_number.value)
+ json_data = {}
+ # json data has the properties:
+ # keyframes: list of keyframes with
+ # matrix : flattened 4x4 matrix
+ # fov: float in degrees
+ # aspect: float
+ # camera_type: string of camera type
+ # render_height: int
+ # render_width: int
+ # fps: int
+ # seconds: float
+ # is_cycle: bool
+ # smoothness_value: float
+ # camera_path: list of frames with properties
+ # camera_to_world: flattened 4x4 matrix
+ # fov: float in degrees
+ # aspect: float
+ # first populate the keyframes:
+ keyframes = []
+ for keyframe, dummy in camera_path._keyframes.values():
+ pose = tf.SE3.from_rotation_and_translation(
+ tf.SO3(keyframe.wxyz) @ tf.SO3.from_x_radians(onp.pi),
+ keyframe.position / VISER_NERFSTUDIO_SCALE_RATIO,
+ )
+ keyframes.append(
+ {
+ "matrix": pose.as_matrix().flatten().tolist(),
+ "fov": onp.rad2deg(keyframe.override_fov_value)
+ if keyframe.override_fov_enabled
+ else fov_degrees.value,
+ "aspect": keyframe.aspect,
+ }
+ )
+ json_data["keyframes"] = keyframes
+ json_data["camera_type"] = "perspective"
+ json_data["render_height"] = resolution.value[1]
+ json_data["render_width"] = resolution.value[0]
+ json_data["fps"] = framerate_slider.value
+ json_data["seconds"] = duration_number.value
+ json_data["is_cycle"] = loop.value
+ json_data["smoothness_value"] = smoothness.value
+ # now populate the camera path:
+ camera_path_list = []
+ for i in range(num_frames):
+ maybe_pose_and_fov = camera_path.interpolate_pose_and_fov(i / num_frames)
+ if maybe_pose_and_fov is None:
+ return
+ pose, fov = maybe_pose_and_fov
+ # rotate the axis of the camera 180 about x axis
+ pose = tf.SE3.from_rotation_and_translation(
+ pose.rotation() @ tf.SO3.from_x_radians(onp.pi),
+ pose.translation() / VISER_NERFSTUDIO_SCALE_RATIO,
+ )
+ camera_path_list.append(
+ {
+ "camera_to_world": pose.as_matrix().flatten().tolist(),
+ "fov": onp.rad2deg(fov),
+ "aspect": resolution.value[0] / resolution.value[1],
+ }
+ )
+ json_data["camera_path"] = camera_path_list
+ # finally add crop data if crop is enabled
+ if control_panel.crop_viewport:
+ obb = control_panel.crop_obb
+ rpy = tf.SO3.from_matrix(obb.R.numpy()).as_rpy_radians()
+ color = control_panel.background_color
+ json_data["crop"] = {
+ "crop_center": obb.T.tolist(),
+ "crop_scale": obb.S.tolist(),
+ "crop_rot": [rpy.roll, rpy.pitch, rpy.yaw],
+ "crop_bg_color": {"r": color[0], "g": color[1], "b": color[2]},
+ }
+
+ # now write the json file
+ json_outfile = datapath / "camera_paths" / f"{render_name_text.value}.json"
+ with open(json_outfile.absolute(), "w") as outfile:
+ json.dump(json_data, outfile)
+ # now show the command
+ with event.client.add_gui_modal("Render Command") as modal:
+ dataname = datapath.name
+ command = " ".join(
+ [
+ "ns-render camera-path",
+ f"--load-config {config_path}",
+ f"--camera-path-filename {json_outfile.absolute()}",
+ f"--output-path renders/{dataname}/{render_name_text.value}.mp4",
+ ]
+ )
+ event.client.add_gui_markdown(
+ "\n".join(
+ [
+ "To render the trajectory, run the following from the command line:",
+ "",
+ "```",
+ command,
+ "```",
+ ]
+ )
+ )
close_button = event.client.add_gui_button("Close")
@close_button.on_click
@@ -551,18 +659,3 @@ def _(_) -> None:
camera_path.default_fov = fov_degrees.value / 180.0 * onp.pi
transform_controls: List[viser.SceneNodeHandle] = []
-
-
-def main() -> None:
- """Launch a GUI with just the render panel, for development purposes."""
- server = viser.ViserServer()
- server.configure_theme(dark_mode=True, control_layout="collapsible")
- server.world_axes.visible = True
-
- populate_render_tab(server)
- while True:
- time.sleep(10.0)
-
-
-if __name__ == "__main__":
- main()
diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py
index d2a53b198f..6e6e02a294 100644
--- a/nerfstudio/viewer_beta/viewer.py
+++ b/nerfstudio/viewer_beta/viewer.py
@@ -154,11 +154,11 @@ def __init__(
self._toggle_training_state,
self.set_camera_visibility,
)
+ config_path = self.log_filename.parents[0] / "config.yml"
with tabs.add_tab("Render", viser.Icon.CAMERA):
- populate_render_tab(self.viser_server)
+ populate_render_tab(self.viser_server, config_path, self.datapath, self.control_panel)
with tabs.add_tab("Export", viser.Icon.PACKAGE_EXPORT):
- config_path = self.log_filename.parents[0] / "config.yml"
populate_export_tab(self.viser_server, self.control_panel, config_path)
def nested_folder_install(folder_labels: List[str], element: ViewerElement):
From 76f8bcf953dbc001fe3db32839db1448bcd6ad85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Kulh=C3=A1nek?=
Date: Thu, 21 Sep 2023 22:51:35 +0200
Subject: [PATCH 011/101] Disable loading colmap points by default (#2451)
---
nerfstudio/data/dataparsers/colmap_dataparser.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/nerfstudio/data/dataparsers/colmap_dataparser.py b/nerfstudio/data/dataparsers/colmap_dataparser.py
index 851a30367f..c68a9cf6a9 100644
--- a/nerfstudio/data/dataparsers/colmap_dataparser.py
+++ b/nerfstudio/data/dataparsers/colmap_dataparser.py
@@ -72,7 +72,7 @@ class ColmapDataParserConfig(DataParserConfig):
"""Path to depth maps directory. If not set, depths are not loaded."""
colmap_path: Path = Path("sparse/0")
"""Path to the colmap reconstruction directory relative to the data path."""
- load_3D_points: bool = True
+ load_3D_points: bool = False
"""Whether to load the 3D points from the colmap reconstruction."""
max_2D_matches_per_3D_point: int = -1
"""Maximum number of 2D matches per 3D point. If set to -1, all 2D matches are loaded. If set to 0, no 2D matches are loaded."""
From c87ebe34ba8b11172971ce48e44b6a8e8eb7a6fc Mon Sep 17 00:00:00 2001
From: Matthew Tancik
Date: Thu, 21 Sep 2023 18:38:22 -0700
Subject: [PATCH 012/101] v0.3.4 (#2452)
---
pyproject.toml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pyproject.toml b/pyproject.toml
index c641526755..c041858ba3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "nerfstudio"
-version = "0.3.3"
+version = "0.3.4"
description = "All-in-one repository for state-of-the-art NeRFs"
readme = "README.md"
license = { text="Apache 2.0"}
From 2fd8b59d20e73f55106012623d188b227a976df5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E3=83=95=E3=83=A9=E3=83=8F=E3=83=86=E3=82=A3=E3=80=80?=
=?UTF-8?q?=E4=BB=81?=
Date: Fri, 22 Sep 2023 23:40:20 +0900
Subject: [PATCH 013/101] Add `background_color` config to keep consistent with
other models (#2455)
* add backgorund_color config
* removed unused color import
---
nerfstudio/models/generfacto.py | 7 ++++---
nerfstudio/models/mipnerf.py | 4 ++--
nerfstudio/models/tensorf.py | 4 +++-
nerfstudio/models/vanilla_nerf.py | 8 +++++---
4 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/nerfstudio/models/generfacto.py b/nerfstudio/models/generfacto.py
index bf34eb3357..c07f904c7d 100644
--- a/nerfstudio/models/generfacto.py
+++ b/nerfstudio/models/generfacto.py
@@ -57,7 +57,7 @@
from nerfstudio.model_components.scene_colliders import AABBBoxCollider, SphereCollider
from nerfstudio.model_components.shaders import LambertianShader, NormalsShader
from nerfstudio.models.base_model import Model, ModelConfig
-from nerfstudio.utils import colormaps, colors, math, misc
+from nerfstudio.utils import colormaps, math, misc
@dataclass
@@ -68,7 +68,8 @@ class GenerfactoModelConfig(ModelConfig):
"""target class to instantiate"""
prompt: str = "a high quality photo of a ripe pineapple"
"""prompt for stable dreamfusion"""
-
+ background_color: Literal["random", "last_sample", "black", "white"] = "white"
+ """Whether to randomize the background color."""
orientation_loss_mult: Tuple[float, float] = (0.001, 10.0)
"""Orientation loss multipier on computed normals."""
orientation_loss_mult_range: Tuple[int, int] = (0, 15000)
@@ -251,7 +252,7 @@ def update_schedule(step):
)
# renderers
- self.renderer_rgb = RGBRenderer(background_color=colors.WHITE)
+ self.renderer_rgb = RGBRenderer(background_color=self.config.background_color)
self.renderer_accumulation = AccumulationRenderer()
self.renderer_depth = DepthRenderer()
self.renderer_normals = NormalsRenderer()
diff --git a/nerfstudio/models/mipnerf.py b/nerfstudio/models/mipnerf.py
index a3a7834615..67ca632514 100644
--- a/nerfstudio/models/mipnerf.py
+++ b/nerfstudio/models/mipnerf.py
@@ -38,7 +38,7 @@
)
from nerfstudio.models.base_model import Model
from nerfstudio.models.vanilla_nerf import VanillaModelConfig
-from nerfstudio.utils import colormaps, colors, misc
+from nerfstudio.utils import colormaps, misc
class MipNerfModel(Model):
@@ -81,7 +81,7 @@ def populate_modules(self):
self.sampler_pdf = PDFSampler(num_samples=self.config.num_importance_samples, include_original=False)
# renderers
- self.renderer_rgb = RGBRenderer(background_color=colors.WHITE)
+ self.renderer_rgb = RGBRenderer(background_color=self.config.background_color)
self.renderer_accumulation = AccumulationRenderer()
self.renderer_depth = DepthRenderer()
diff --git a/nerfstudio/models/tensorf.py b/nerfstudio/models/tensorf.py
index a2cd9901d9..a0380d42e4 100644
--- a/nerfstudio/models/tensorf.py
+++ b/nerfstudio/models/tensorf.py
@@ -89,6 +89,8 @@ class TensoRFModelConfig(ModelConfig):
tensorf_encoding: Literal["triplane", "vm", "cp"] = "vm"
regularization: Literal["none", "l1", "tv"] = "l1"
"""Regularization method used in tensorf paper"""
+ background_color: Literal["random", "last_sample", "black", "white"] = "white"
+ """Whether to randomize the background color."""
class TensoRFModel(Model):
@@ -234,7 +236,7 @@ def populate_modules(self):
self.sampler_pdf = PDFSampler(num_samples=self.config.num_samples, single_jitter=True, include_original=False)
# renderers
- self.renderer_rgb = RGBRenderer(background_color=colors.WHITE)
+ self.renderer_rgb = RGBRenderer(background_color=self.config.background_color)
self.renderer_accumulation = AccumulationRenderer()
self.renderer_depth = DepthRenderer()
diff --git a/nerfstudio/models/vanilla_nerf.py b/nerfstudio/models/vanilla_nerf.py
index ea5344fb87..e95c8dfa49 100644
--- a/nerfstudio/models/vanilla_nerf.py
+++ b/nerfstudio/models/vanilla_nerf.py
@@ -19,7 +19,7 @@
from __future__ import annotations
from dataclasses import dataclass, field
-from typing import Any, Dict, List, Tuple, Type
+from typing import Any, Dict, List, Tuple, Type, Literal
import torch
from torch.nn import Parameter
@@ -41,7 +41,7 @@
RGBRenderer,
)
from nerfstudio.models.base_model import Model, ModelConfig
-from nerfstudio.utils import colormaps, colors, misc
+from nerfstudio.utils import colormaps, misc
@dataclass
@@ -58,6 +58,8 @@ class VanillaModelConfig(ModelConfig):
"""Specifies whether or not to include ray warping based on time."""
temporal_distortion_params: Dict[str, Any] = to_immutable_dict({"kind": TemporalDistortionKind.DNERF})
"""Parameters to instantiate temporal distortion with"""
+ background_color: Literal["random", "last_sample", "black", "white"] = "white"
+ """Whether to randomize the background color."""
class NeRFModel(Model):
@@ -110,7 +112,7 @@ def populate_modules(self):
self.sampler_pdf = PDFSampler(num_samples=self.config.num_importance_samples)
# renderers
- self.renderer_rgb = RGBRenderer(background_color=colors.WHITE)
+ self.renderer_rgb = RGBRenderer(background_color=self.config.background_color)
self.renderer_accumulation = AccumulationRenderer()
self.renderer_depth = DepthRenderer()
From 52f26e82b3adb9ba89d61e7ca938a00faa7994dd Mon Sep 17 00:00:00 2001
From: Maxim Bonnaerens
Date: Fri, 22 Sep 2023 17:36:51 +0200
Subject: [PATCH 014/101] Do not materialize mlp_head if no field_heads (#2457)
In case it is used in a proposal network we avoid initializing the head.
Co-authored-by: Matthew Tancik
---
nerfstudio/fields/vanilla_nerf_field.py | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/nerfstudio/fields/vanilla_nerf_field.py b/nerfstudio/fields/vanilla_nerf_field.py
index c9d2bbb78b..22e11a25b8 100644
--- a/nerfstudio/fields/vanilla_nerf_field.py
+++ b/nerfstudio/fields/vanilla_nerf_field.py
@@ -74,15 +74,15 @@ def __init__(
skip_connections=skip_connections,
out_activation=nn.ReLU(),
)
-
- self.mlp_head = MLP(
- in_dim=self.mlp_base.get_out_dim() + self.direction_encoding.get_out_dim(),
- num_layers=head_mlp_num_layers,
- layer_width=head_mlp_layer_width,
- out_activation=nn.ReLU(),
- )
-
self.field_output_density = DensityFieldHead(in_dim=self.mlp_base.get_out_dim())
+
+ if field_heads:
+ self.mlp_head = MLP(
+ in_dim=self.mlp_base.get_out_dim() + self.direction_encoding.get_out_dim(),
+ num_layers=head_mlp_num_layers,
+ layer_width=head_mlp_layer_width,
+ out_activation=nn.ReLU(),
+ )
self.field_heads = nn.ModuleList([field_head() for field_head in field_heads] if field_heads else []) # type: ignore
for field_head in self.field_heads:
field_head.set_in_dim(self.mlp_head.get_out_dim()) # type: ignore
From 8812d7a5b092beb822ddd2b191c5e82cd3f66ed5 Mon Sep 17 00:00:00 2001
From: Maxim Bonnaerens
Date: Fri, 22 Sep 2023 19:45:05 +0200
Subject: [PATCH 015/101] .vscode settings match move to
ms-python.black-formatter (#2456)
Co-authored-by: Brent Yi
---
.vscode/settings.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 4c3a7447af..8b0f1d7501 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -33,7 +33,7 @@
"editor.formatOnSave": true,
"python.envFile": "${workspaceFolder}/.env",
"python.formatting.provider": "none",
- "python.formatting.blackArgs": ["--line-length=120"],
+ "black-formatter.args": ["--line-length=120"],
"python.linting.pylintEnabled": false,
"python.linting.flake8Enabled": false,
"python.linting.enabled": true,
From e0f0cd05aa06bdd148a20ed62271d49ff0a6ac9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ruilong=20Li=28=E6=9D=8E=E7=91=9E=E9=BE=99=29?=
Date: Fri, 22 Sep 2023 23:04:02 -0700
Subject: [PATCH 016/101] Migrate doc from RTD to gh-pages (#2460)
* gh-pages workflow
* update doc.yml
* update
* remove RTD
* change action to main
* test fail on warning
* fail on warnings
* get back .readthedocs.yml, a bit cleanup
* revert SHA of index.md file
---
.github/workflows/doc.yml | 33 +++++++++++++++++++++++++++++++++
docs/Makefile | 2 +-
docs/make.bat | 1 +
3 files changed, 35 insertions(+), 1 deletion(-)
create mode 100644 .github/workflows/doc.yml
diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml
new file mode 100644
index 0000000000..51d07554db
--- /dev/null
+++ b/.github/workflows/doc.yml
@@ -0,0 +1,33 @@
+name: Docs
+on:
+ push:
+ branches: [main]
+ pull_request:
+ branches: [main]
+ workflow_dispatch:
+
+permissions:
+ contents: write
+jobs:
+ docs:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-python@v3
+ with:
+ python-version: '3.9'
+ - name: Install dependencies
+ run: |
+ pip install .[docs]
+ - name: Sphinx build
+ # fail on warnings
+ run: |
+ sphinx-build docs _build -W --keep-going
+ - name: Deploy
+ uses: peaceiris/actions-gh-pages@v3
+ with:
+ publish_branch: gh-pages
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ publish_dir: _build/
+ force_orphan: true
+ # cname: docs.nerf.studio
diff --git a/docs/Makefile b/docs/Makefile
index d4bb2cbb9e..5f83226609 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -3,7 +3,7 @@
# You can set these variables from the command line, and also
# from the environment for the first two.
-SPHINXOPTS ?=
+SPHINXOPTS ?= -W --keep-going # build fail on warning
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
diff --git a/docs/make.bat b/docs/make.bat
index 32bb24529f..008ca8c48c 100644
--- a/docs/make.bat
+++ b/docs/make.bat
@@ -9,6 +9,7 @@ if "%SPHINXBUILD%" == "" (
)
set SOURCEDIR=.
set BUILDDIR=_build
+set SPHINXOPTS="-W --keep-going"
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
From 53037ba8a790a7aa089c8a4983c2aa210e01111c Mon Sep 17 00:00:00 2001
From: Maxim Bonnaerens
Date: Sun, 24 Sep 2023 12:06:16 +0200
Subject: [PATCH 017/101] Allow custom sampler in ProposalNetworkSampler
(#2464)
* Allow custom sampler in ProposalNetworkSampler
This allows custom (pdf)samplers in distinct repositories while still
using the base ProposalNetworkSampler.
* sampler should inherit from pdf sampler
* docstring
---
nerfstudio/model_components/ray_samplers.py | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/nerfstudio/model_components/ray_samplers.py b/nerfstudio/model_components/ray_samplers.py
index e18c680ae3..7a2052b639 100644
--- a/nerfstudio/model_components/ray_samplers.py
+++ b/nerfstudio/model_components/ray_samplers.py
@@ -530,6 +530,7 @@ class ProposalNetworkSampler(Sampler):
single_jitter: Use a same random jitter for all samples along a ray.
update_sched: A function that takes the iteration number of steps between updates.
initial_sampler: Sampler to use for the first iteration. Uses UniformLinDispPiecewise if not set.
+ pdf_sampler: PDFSampler to use after the first iteration. Uses PDFSampler if not set.
"""
def __init__(
@@ -540,6 +541,7 @@ def __init__(
single_jitter: bool = False,
update_sched: Callable = lambda x: 1,
initial_sampler: Optional[Sampler] = None,
+ pdf_sampler: Optional[PDFSampler] = None,
) -> None:
super().__init__()
self.num_proposal_samples_per_ray = num_proposal_samples_per_ray
@@ -554,7 +556,10 @@ def __init__(
self.initial_sampler = UniformLinDispPiecewiseSampler(single_jitter=single_jitter)
else:
self.initial_sampler = initial_sampler
- self.pdf_sampler = PDFSampler(include_original=False, single_jitter=single_jitter)
+ if pdf_sampler is None:
+ self.pdf_sampler = PDFSampler(include_original=False, single_jitter=single_jitter)
+ else:
+ self.pdf_sampler = pdf_sampler
self._anneal = 1.0
self._steps_since_update = 0
From 2864c52618fa9511639015fd390f50a38b00cd61 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Kulh=C3=A1nek?=
Date: Mon, 25 Sep 2023 17:37:08 +0200
Subject: [PATCH 018/101] Fix COLMAP dataparser masks and depthmaps (#2467)
---
nerfstudio/data/dataparsers/colmap_dataparser.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/nerfstudio/data/dataparsers/colmap_dataparser.py b/nerfstudio/data/dataparsers/colmap_dataparser.py
index c68a9cf6a9..071a11e1ea 100644
--- a/nerfstudio/data/dataparsers/colmap_dataparser.py
+++ b/nerfstudio/data/dataparsers/colmap_dataparser.py
@@ -147,11 +147,11 @@ def _get_all_images_and_cameras(self, recon_dir: Path):
frame.update(cameras[im_data.camera_id])
if self.config.masks_path is not None:
frame["mask_path"] = (
- (self.config.data / self.config.masks_path / im_data.name).with_suffix(".png").as_posix(),
+ (self.config.data / self.config.masks_path / im_data.name).with_suffix(".png").as_posix()
)
if self.config.depths_path is not None:
frame["depth_path"] = (
- (self.config.data / self.config.depths_path / im_data.name).with_suffix(".png").as_posix(),
+ (self.config.data / self.config.depths_path / im_data.name).with_suffix(".png").as_posix()
)
frames.append(frame)
if camera_model is not None:
From 4cdeaa6eec186afe8fd6e1293cecd582215f1ff4 Mon Sep 17 00:00:00 2001
From: Chung Min Kim
Date: Mon, 25 Sep 2023 13:14:51 -0700
Subject: [PATCH 019/101] Track all viser GUI folders to avoid folder
duplicates (viewer-beta) (#2466)
* Track all viser GUI folders to avoid folder duplicates
* lint
---------
Co-authored-by: Justin Kerr
---
nerfstudio/viewer_beta/viewer.py | 26 ++++++++++++++++++++++----
1 file changed, 22 insertions(+), 4 deletions(-)
diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py
index 6e6e02a294..d4fbf973b3 100644
--- a/nerfstudio/viewer_beta/viewer.py
+++ b/nerfstudio/viewer_beta/viewer.py
@@ -161,22 +161,40 @@ def __init__(
with tabs.add_tab("Export", viser.Icon.PACKAGE_EXPORT):
populate_export_tab(self.viser_server, self.control_panel, config_path)
- def nested_folder_install(folder_labels: List[str], element: ViewerElement):
+ # Keep track of the pointers to generated GUI folders, because each generated folder holds a unique ID.
+ viewer_gui_folders = dict()
+
+ def nested_folder_install(folder_labels: List[str], prev_labels: List[str], element: ViewerElement):
if len(folder_labels) == 0:
element.install(self.viser_server)
# also rewire the hook to rerender
prev_cb = element.cb_hook
element.cb_hook = lambda element: [prev_cb(element), self._interrupt_render(element)]
else:
- with self.viser_server.add_gui_folder(folder_labels[0]):
- nested_folder_install(folder_labels[1:], element)
+ # recursively create folders
+ # If the folder name is "Custom Elements/a/b", then:
+ # in the beginning: folder_path will be
+ # "/".join([] + ["Custom Elements"]) --> "Custom Elements"
+ # later, folder_path will be
+ # "/".join(["Custom Elements"] + ["a"]) --> "Custom Elements/a"
+ # "/".join(["Custom Elements", "a"] + ["b"]) --> "Custom Elements/a/b"
+ # --> the element will be installed in the folder "Custom Elements/a/b"
+ #
+ # Note that the gui_folder is created only when the folder is not in viewer_gui_folders,
+ # and we use the folder_path as the key to check if the folder is already created.
+ # Otherwise, use the existing folder as context manager.
+ folder_path = "/".join(prev_labels + [folder_labels[0]])
+ if folder_path not in viewer_gui_folders:
+ viewer_gui_folders[folder_path] = self.viser_server.add_gui_folder(folder_labels[0])
+ with viewer_gui_folders[folder_path]:
+ nested_folder_install(folder_labels[1:], prev_labels + [folder_labels[0]], element)
with control_tab:
self.viewer_elements = []
self.viewer_elements.extend(parse_object(pipeline, ViewerElement, "Custom Elements"))
for param_path, element in self.viewer_elements:
folder_labels = param_path.split("/")[:-1]
- nested_folder_install(folder_labels, element)
+ nested_folder_install(folder_labels, [], element)
# scrape the trainer/pipeline for any ViewerControl objects to initialize them
self.viewer_controls: List[ViewerControl] = [
From 593950efec18bdfb28297f1c9d17248f9a5e2ac0 Mon Sep 17 00:00:00 2001
From: Gina Wu <42229107+ginazhouhuiwu@users.noreply.github.com>
Date: Wed, 27 Sep 2023 10:45:58 -0700
Subject: [PATCH 020/101] Update colmap installation instructions with conda
(#2473)
---
docs/quickstart/custom_dataset.md | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/docs/quickstart/custom_dataset.md b/docs/quickstart/custom_dataset.md
index c826979360..723a97c91e 100644
--- a/docs/quickstart/custom_dataset.md
+++ b/docs/quickstart/custom_dataset.md
@@ -57,10 +57,16 @@ There are many ways to install COLMAP, unfortunately it can sometimes be a bit f
::::::{tab-set}
:::::{tab-item} Linux
-We recommend trying `apt`:
+We recommend trying `conda`:
```
-sudo apt install colmap
+conda install -c conda-forge colmap
+```
+
+Check that COLMAP 3.8 with CUDA is successfully installed:
+
+```
+colmap -h
```
If that doesn't work, you can try VKPG:
From 169030199ee9e8c7e47a7f30bf542b749a4bdf06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ruilong=20Li=28=E6=9D=8E=E7=91=9E=E9=BE=99=29?=
Date: Thu, 28 Sep 2023 18:14:40 -0700
Subject: [PATCH 021/101] Update doc.yml (#2479)
---
.github/workflows/doc.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml
index 51d07554db..0878bdea99 100644
--- a/.github/workflows/doc.yml
+++ b/.github/workflows/doc.yml
@@ -30,4 +30,4 @@ jobs:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: _build/
force_orphan: true
- # cname: docs.nerf.studio
+ cname: docs.nerf.studio
From 88d5b03a802c7ec6b7dc21078ec8237e942a268e Mon Sep 17 00:00:00 2001
From: Matthew Tancik
Date: Thu, 28 Sep 2023 18:15:35 -0700
Subject: [PATCH 022/101] Remove rtd (#2478)
---
.readthedocs.yaml | 37 -------------------------------------
1 file changed, 37 deletions(-)
delete mode 100644 .readthedocs.yaml
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
deleted file mode 100644
index d66aefd16a..0000000000
--- a/.readthedocs.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-# .readthedocs.yaml
-# Read the Docs configuration file
-# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
-
-# Required
-version: 2
-
-# Set the version of Python and other tools you might need
-build:
- os: ubuntu-20.04
- tools:
- python: '3.9'
- # You can also specify other tool versions:
- # nodejs: "16"
- # rust: "1.55"
- # golang: "1.17"
-
-# Build documentation in the docs/ directory with Sphinx
-sphinx:
- fail_on_warning: true
- configuration: docs/conf.py
-
-# If using Sphinx, optionally build your docs in additional formats such as PDF
-# formats:
-# - pdf
-
-# Optionally declare the Python requirements required to build your docs
-python:
- install:
- # Equivalent to 'pip install .'
- - method: pip
- path: .
- # Equivalent to 'pip install .[docs]'
- - method: pip
- path: .
- extra_requirements:
- - docs
From bbe31736f31ae3246579df9e3c256c1e81bef9af Mon Sep 17 00:00:00 2001
From: Matthew Tancik
Date: Fri, 29 Sep 2023 10:48:51 -0700
Subject: [PATCH 023/101] Update README.md
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index b6f05f9be0..7e1a0ad210 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
-
+
From c93b374d08b2c1e4f95032f61d55b5744e384dfd Mon Sep 17 00:00:00 2001
From: Brent Yi
Date: Fri, 29 Sep 2023 12:33:19 -0700
Subject: [PATCH 024/101] Don't deploy docs from pull requests (#2482)
---
.github/workflows/doc.yml | 1 +
1 file changed, 1 insertion(+)
diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml
index 0878bdea99..7b46b3429e 100644
--- a/.github/workflows/doc.yml
+++ b/.github/workflows/doc.yml
@@ -31,3 +31,4 @@ jobs:
publish_dir: _build/
force_orphan: true
cname: docs.nerf.studio
+ if: github.event_name != 'pull_request'
From 7856aa006011404e0f0b4b77a6252f1ad5e8cc28 Mon Sep 17 00:00:00 2001
From: pierremerriaux-leddartech
<42007976+pierremerriaux-leddartech@users.noreply.github.com>
Date: Fri, 29 Sep 2023 19:49:20 -0400
Subject: [PATCH 025/101] fix:correction of PairPixelSampler in case of using
masks with depth-nerfacto (#2477)
fix:correction of PairPixelSampler in case of using masks
---
nerfstudio/data/pixel_samplers.py | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/nerfstudio/data/pixel_samplers.py b/nerfstudio/data/pixel_samplers.py
index 463d08294c..d7e76806d8 100644
--- a/nerfstudio/data/pixel_samplers.py
+++ b/nerfstudio/data/pixel_samplers.py
@@ -399,10 +399,11 @@ def sample_method( # pylint: disable=no-self-use
mask: Optional[Tensor] = None,
device: Union[torch.device, str] = "cpu",
) -> Int[Tensor, "batch_size 3"]:
+ rays_to_sample = self.rays_to_sample
if isinstance(mask, Tensor):
m = erode_mask(mask.permute(0, 3, 1, 2).float(), pixel_radius=self.radius)
nonzero_indices = torch.nonzero(m[:, 0], as_tuple=False).to(device)
- chosen_indices = random.sample(range(len(nonzero_indices)), k=self.rays_to_sample)
+ chosen_indices = random.sample(range(len(nonzero_indices)), k=rays_to_sample)
indices = nonzero_indices[chosen_indices]
else:
rays_to_sample = self.rays_to_sample
@@ -418,12 +419,12 @@ def sample_method( # pylint: disable=no-self-use
ws = torch.randint(self.radius, image_width - self.radius, s, dtype=torch.long, device=device)
indices = torch.concat((ns, hs, ws), dim=1)
- pair_indices = torch.hstack(
- (
- torch.zeros(rays_to_sample, 1, device=device, dtype=torch.long),
- torch.randint(-self.radius, self.radius, (rays_to_sample, 2), device=device, dtype=torch.long),
- )
+ pair_indices = torch.hstack(
+ (
+ torch.zeros(rays_to_sample, 1, device=device, dtype=torch.long),
+ torch.randint(-self.radius, self.radius, (rays_to_sample, 2), device=device, dtype=torch.long),
)
- pair_indices += indices
- indices = torch.hstack((indices, pair_indices)).view(rays_to_sample * 2, 3)
+ )
+ pair_indices += indices
+ indices = torch.hstack((indices, pair_indices)).view(rays_to_sample * 2, 3)
return indices
From b78d0faa1289c234f644ff296ae1f5f64ad3a05d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Kulh=C3=A1nek?=
Date: Sun, 1 Oct 2023 22:12:48 +0200
Subject: [PATCH 026/101] ns-render - render all images (#2459)
* ns-render - render all images
* Fix commonprefix->commonpath
* Fix raw rendering
---
nerfstudio/scripts/render.py | 208 ++++++++++++++++++++++++++++++++-
nerfstudio/utils/eval_utils.py | 7 +-
2 files changed, 211 insertions(+), 4 deletions(-)
diff --git a/nerfstudio/scripts/render.py b/nerfstudio/scripts/render.py
index 99a7cae01d..ecb7961cc4 100644
--- a/nerfstudio/scripts/render.py
+++ b/nerfstudio/scripts/render.py
@@ -23,7 +23,8 @@
import struct
import shutil
import sys
-from contextlib import ExitStack
+import gzip
+from contextlib import ExitStack, contextmanager
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Literal, Optional, Union
@@ -52,8 +53,11 @@
get_path_from_json,
get_spiral_path,
)
-from nerfstudio.cameras.cameras import Cameras, CameraType
-from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager
+from nerfstudio.cameras.cameras import Cameras, CameraType, RayBundle
+from nerfstudio.data.datasets.base_dataset import Dataset
+from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager, VanillaDataManagerConfig
+from nerfstudio.data.utils.dataloaders import FixedIndicesEvalDataloader
+from nerfstudio.engine.trainer import TrainerConfig
from nerfstudio.data.scene_box import OrientedBox
from nerfstudio.model_components import renderers
from nerfstudio.pipelines.base_pipeline import Pipeline
@@ -585,11 +589,209 @@ def main(self) -> None:
)
+@contextmanager
+def _disable_datamanager_setup(cls):
+ """
+ Disables setup_train or setup_eval for faster initialization.
+ """
+ old_setup_train = getattr(cls, "setup_train")
+ old_setup_eval = getattr(cls, "setup_eval")
+ setattr(cls, "setup_train", lambda *args, **kwargs: None)
+ setattr(cls, "setup_eval", lambda *args, **kwargs: None)
+ yield cls
+ setattr(cls, "setup_train", old_setup_train)
+ setattr(cls, "setup_eval", old_setup_eval)
+
+
+@dataclass
+class DatasetRender(BaseRender):
+ """Render all images in the dataset."""
+
+ output_path: Path = Path("renders")
+ """Path to output video file."""
+ data: Optional[Path] = None
+ """Override path to the dataset."""
+ downscale_factor: Optional[float] = None
+ """Scaling factor to apply to the camera image resolution."""
+ split: Literal["train", "val", "test", "train+test"] = "test"
+ """Split to render."""
+ rendered_output_names: Optional[List[str]] = field(default_factory=lambda: None)
+ """Name of the renderer outputs to use. rgb, depth, raw-depth, gt-rgb etc. By default all outputs are rendered."""
+
+ def main(self):
+ config: TrainerConfig
+
+ def update_config(config: TrainerConfig) -> TrainerConfig:
+ data_manager_config = config.pipeline.datamanager
+ assert isinstance(data_manager_config, VanillaDataManagerConfig)
+ data_manager_config.eval_image_indices = None
+ data_manager_config.eval_num_images_to_sample_from = -1
+ data_manager_config.eval_num_times_to_repeat_images = -1
+ data_manager_config.train_num_images_to_sample_from = -1
+ data_manager_config.train_num_times_to_repeat_images = -1
+ data_manager_config.data = self.data
+ if self.downscale_factor is not None:
+ assert hasattr(data_manager_config.dataparser, "downscale_factor")
+ setattr(data_manager_config.dataparser, "downscale_factor", self.downscale_factor)
+ return config
+
+ config, pipeline, _, _ = eval_setup(
+ self.load_config,
+ eval_num_rays_per_chunk=self.eval_num_rays_per_chunk,
+ test_mode="inference",
+ update_config_callback=update_config,
+ )
+ data_manager_config = config.pipeline.datamanager
+ assert isinstance(data_manager_config, VanillaDataManagerConfig)
+
+ for split in self.split.split("+"):
+ datamanager: VanillaDataManager
+ dataset: Dataset
+ if split == "train":
+ with _disable_datamanager_setup(data_manager_config._target): # pylint: disable=protected-access
+ datamanager = data_manager_config.setup(test_mode="test", device=pipeline.device)
+
+ dataset = datamanager.train_dataset
+ dataparser_outputs = getattr(dataset, "_dataparser_outputs", datamanager.train_dataparser_outputs)
+ else:
+ with _disable_datamanager_setup(data_manager_config._target): # pylint: disable=protected-access
+ datamanager = data_manager_config.setup(test_mode=split, device=pipeline.device)
+
+ dataset = datamanager.eval_dataset
+ dataparser_outputs = getattr(dataset, "_dataparser_outputs", None)
+ if dataparser_outputs is None:
+ dataparser_outputs = datamanager.dataparser.get_dataparser_outputs(split=datamanager.test_split)
+ dataloader = FixedIndicesEvalDataloader(
+ input_dataset=dataset,
+ device=datamanager.device,
+ num_workers=datamanager.world_size * 4,
+ )
+ images_root = Path(os.path.commonpath(dataparser_outputs.image_filenames))
+ with Progress(
+ TextColumn(f":movie_camera: Rendering split {split} :movie_camera:"),
+ BarColumn(),
+ TaskProgressColumn(
+ text_format="[progress.percentage]{task.completed}/{task.total:>.0f}({task.percentage:>3.1f}%)",
+ show_speed=True,
+ ),
+ ItersPerSecColumn(suffix="fps"),
+ TimeRemainingColumn(elapsed_when_finished=False, compact=False),
+ TimeElapsedColumn(),
+ ) as progress:
+ for camera_idx, (ray_bundle, batch) in enumerate(progress.track(dataloader, total=len(dataset))):
+ ray_bundle: RayBundle
+ with torch.no_grad():
+ outputs = pipeline.model.get_outputs_for_camera_ray_bundle(ray_bundle)
+
+ gt_batch = batch.copy()
+ gt_batch["rgb"] = gt_batch.pop("image")
+ all_outputs = (
+ list(outputs.keys())
+ + [f"raw-{x}" for x in outputs.keys()]
+ + [f"gt-{x}" for x in gt_batch.keys()]
+ + [f"raw-gt-{x}" for x in gt_batch.keys()]
+ )
+ rendered_output_names = self.rendered_output_names
+ if rendered_output_names is None:
+ rendered_output_names = ["gt-rgb"] + list(outputs.keys())
+ for rendered_output_name in rendered_output_names:
+ if rendered_output_name not in all_outputs:
+ CONSOLE.rule("Error", style="red")
+ CONSOLE.print(
+ f"Could not find {rendered_output_name} in the model outputs", justify="center"
+ )
+ CONSOLE.print(
+ f"Please set --rendered-output-name to one of: {all_outputs}", justify="center"
+ )
+ sys.exit(1)
+
+ is_raw = False
+ is_depth = rendered_output_name.find("depth") != -1
+ image_name = f"{camera_idx:05d}"
+
+ # Try to get the original filename
+ image_name = (
+ dataparser_outputs.image_filenames[camera_idx].with_suffix("").relative_to(images_root)
+ )
+
+ output_path = self.output_path / split / rendered_output_name / image_name
+ output_path.parent.mkdir(exist_ok=True, parents=True)
+
+ output_name = rendered_output_name
+ if output_name.startswith("raw-"):
+ output_name = output_name[4:]
+ is_raw = True
+ if output_name.startswith("gt-"):
+ output_name = output_name[3:]
+ output_image = gt_batch[output_name]
+ else:
+ output_image = outputs[output_name]
+ if is_depth:
+ # Divide by the dataparser scale factor
+ output_image.div_(dataparser_outputs.dataparser_scale)
+ else:
+ if output_name.startswith("gt-"):
+ output_name = output_name[3:]
+ output_image = gt_batch[output_name]
+ else:
+ output_image = outputs[output_name]
+ del output_name
+
+ # Map to color spaces / numpy
+ if is_raw:
+ output_image = output_image.cpu().numpy()
+ elif is_depth:
+ output_image = (
+ colormaps.apply_depth_colormap(
+ output_image,
+ accumulation=outputs["accumulation"],
+ near_plane=self.depth_near_plane,
+ far_plane=self.depth_far_plane,
+ colormap_options=self.colormap_options,
+ )
+ .cpu()
+ .numpy()
+ )
+ else:
+ output_image = (
+ colormaps.apply_colormap(
+ image=output_image,
+ colormap_options=self.colormap_options,
+ )
+ .cpu()
+ .numpy()
+ )
+
+ # Save to file
+ if is_raw:
+ with gzip.open(output_path.with_suffix(".npy.gz"), "wb") as f:
+ np.save(f, output_image)
+ elif self.image_format == "png":
+ media.write_image(output_path.with_suffix(".png"), output_image, fmt="png")
+ elif self.image_format == "jpeg":
+ media.write_image(
+ output_path.with_suffix(".jpg"), output_image, fmt="jpeg", quality=self.jpeg_quality
+ )
+ else:
+ raise ValueError(f"Unknown image format {self.image_format}")
+
+ table = Table(
+ title=None,
+ show_header=False,
+ box=box.MINIMAL,
+ title_style=style.Style(bold=True),
+ )
+ for split in self.split.split("+"):
+ table.add_row(f"Outputs {split}", str(self.output_path / split))
+ CONSOLE.print(Panel(table, title="[bold][green]:tada: Render on split {} Complete :tada:[/bold]", expand=False))
+
+
Commands = tyro.conf.FlagConversionOff[
Union[
Annotated[RenderCameraPath, tyro.conf.subcommand(name="camera-path")],
Annotated[RenderInterpolated, tyro.conf.subcommand(name="interpolate")],
Annotated[SpiralRender, tyro.conf.subcommand(name="spiral")],
+ Annotated[DatasetRender, tyro.conf.subcommand(name="dataset")],
]
]
diff --git a/nerfstudio/utils/eval_utils.py b/nerfstudio/utils/eval_utils.py
index 13948678bb..7e04005368 100644
--- a/nerfstudio/utils/eval_utils.py
+++ b/nerfstudio/utils/eval_utils.py
@@ -20,7 +20,7 @@
import os
import sys
from pathlib import Path
-from typing import Literal, Optional, Tuple
+from typing import Literal, Optional, Tuple, Callable
import torch
import yaml
@@ -69,6 +69,7 @@ def eval_setup(
config_path: Path,
eval_num_rays_per_chunk: Optional[int] = None,
test_mode: Literal["test", "val", "inference"] = "test",
+ update_config_callback: Optional[Callable[[TrainerConfig], TrainerConfig]] = None,
) -> Tuple[TrainerConfig, Pipeline, Path, int]:
"""Shared setup for loading a saved pipeline for evaluation.
@@ -79,6 +80,7 @@ def eval_setup(
'val': loads train/val datasets into memory
'test': loads train/test dataset into memory
'inference': does not load any dataset into memory
+ update_config_callback: Callback to update the config before loading the pipeline
Returns:
@@ -92,6 +94,9 @@ def eval_setup(
if eval_num_rays_per_chunk:
config.pipeline.model.eval_num_rays_per_chunk = eval_num_rays_per_chunk
+ if update_config_callback is not None:
+ config = update_config_callback(config)
+
# load checkpoints from wherever they were saved
# TODO: expose the ability to choose an arbitrary checkpoint
config.load_dir = config.get_checkpoint_dir()
From c2f5e68d548b66114a3dab0ad7707906a47126c9 Mon Sep 17 00:00:00 2001
From: Reza Kermani
Date: Sun, 1 Oct 2023 23:31:02 -0400
Subject: [PATCH 027/101] fixing broken links in README (#2483)
fixing broken links
---
README.md | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/README.md b/README.md
index 7e1a0ad210..a853192ad4 100644
--- a/README.md
+++ b/README.md
@@ -242,15 +242,15 @@ Using an existing dataset is great, but likely you want to use your own data! We
| Data | Capture Device | Requirements | `ns-process-data` Speed |
| ---------------------------------------------------------------------------------------------------- | -------------- | ----------------------------------------------------------------- | ----------------------- |
-| π· [Images](https://docs.nerf.studio/en/latest/quickstart/custom_dataset.html#images-and-video) | Any | [COLMAP](https://colmap.github.io/install.html) | π’ |
-| πΉ [Video](https://docs.nerf.studio/en/latest/quickstart/custom_dataset.html#images-and-video) | Any | [COLMAP](https://colmap.github.io/install.html) | π’ |
-| π [360 Data](https://docs.nerf.studio/en/latest/quickstart/custom_dataset.html#360_data) | Any | [COLMAP](https://colmap.github.io/install.html) | π’ |
-| π± [Polycam](https://docs.nerf.studio/en/latest/quickstart/custom_dataset.html#polycam-capture) | IOS with LiDAR | [Polycam App](https://poly.cam/) | π |
-| π± [KIRI Engine](https://docs.nerf.studio/en/latest/quickstart/custom_dataset.html#kiri-capture) | IOS or Android | [KIRI Engine App](https://www.kiriengine.com/) | π |
-| π± [Record3D](https://docs.nerf.studio/en/latest/quickstart/custom_dataset.html#record3d-capture) | IOS with LiDAR | [Record3D app](https://record3d.app/) | π |
-| π₯ [Metashape](https://docs.nerf.studio/en/latest/quickstart/custom_dataset.html#metashape) | Any | [Metashape](https://www.agisoft.com/) | π |
-| π₯ [RealityCapture](https://docs.nerf.studio/en/latest/quickstart/custom_dataset.html#realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | π |
-| π [Custom](https://docs.nerf.studio/en/latest/quickstart/data_conventions.html) | Any | Camera Poses | π |
+| π· [Images](https://docs.nerf.studio/quickstart/custom_dataset.html#images-or-video) | Any | [COLMAP](https://colmap.github.io/install.html) | π’ |
+| πΉ [Video](https://docs.nerf.studio/quickstart/custom_dataset.html#images-or-video) | Any | [COLMAP](https://colmap.github.io/install.html) | π’ |
+| π [360 Data](https://docs.nerf.studio/quickstart/custom_dataset.html#data-equirectangular) | Any | [COLMAP](https://colmap.github.io/install.html) | π’ |
+| π± [Polycam](https://docs.nerf.studio/quickstart/custom_dataset.html#polycam-capture) | IOS with LiDAR | [Polycam App](https://poly.cam/) | π |
+| π± [KIRI Engine](https://docs.nerf.studio/quickstart/custom_dataset.html#kiri-engine-capture) | IOS or Android | [KIRI Engine App](https://www.kiriengine.com/) | π |
+| π± [Record3D](https://docs.nerf.studio/quickstart/custom_dataset.html#record3d-capture) | IOS with LiDAR | [Record3D app](https://record3d.app/) | π |
+| π₯ [Metashape](https://docs.nerf.studio/quickstart/custom_dataset.html#metashape) | Any | [Metashape](https://www.agisoft.com/) | π |
+| π₯ [RealityCapture](https://docs.nerf.studio/quickstart/custom_dataset.html#realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | π |
+| π [Custom](https://docs.nerf.studio/quickstart/data_conventions.html) | Any | Camera Poses | π |
## 5. Advanced Options
From 0d746b136347ed7b2eae6b9bd44df66f9b8edf34 Mon Sep 17 00:00:00 2001
From: Maxim Bonnaerens
Date: Mon, 2 Oct 2023 23:07:34 +0200
Subject: [PATCH 028/101] Fourier Feature encodings and polyhedron encodings
(#2463)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* Fourier Feature encodings and polyhedron encodings
Rework RFFEncoding to be a subclass of the more general Fourier Feature
Encodings and introduce Polyhedron encodings as introduced in
mipnerf360.
* b_matrix -> basis
* Add typing
* fighting pyright
* use scale argument
* continue the fight
* ignore
* ignore em all
* add docstring and rename generate_basis to generate_polyhedron_basis
* Try to please pyright with assert
* Immediately allocate tensor on correct device
* private functions and docstrings update
* doc fix continued
---------
Co-authored-by: JonΓ‘Ε‘ KulhΓ‘nek
---
nerfstudio/field_components/encodings.py | 98 +++++++++++---
nerfstudio/utils/math.py | 161 ++++++++++++++++++++++-
2 files changed, 237 insertions(+), 22 deletions(-)
diff --git a/nerfstudio/field_components/encodings.py b/nerfstudio/field_components/encodings.py
index 437ee805fe..4ba28f0c4e 100644
--- a/nerfstudio/field_components/encodings.py
+++ b/nerfstudio/field_components/encodings.py
@@ -27,9 +27,13 @@
from torch import Tensor, nn
from nerfstudio.field_components.base_field_component import FieldComponent
-from nerfstudio.utils.math import components_from_spherical_harmonics, expected_sin
+from nerfstudio.utils.external import TCNN_EXISTS, tcnn
+from nerfstudio.utils.math import (
+ components_from_spherical_harmonics,
+ expected_sin,
+ generate_polyhedron_basis,
+)
from nerfstudio.utils.printing import print_tcnn_speed_warning
-from nerfstudio.utils.external import tcnn, TCNN_EXISTS
class Encoding(FieldComponent):
@@ -153,7 +157,7 @@ def pytorch_fwd(
Output values will be between -1 and 1
"""
scaled_in_tensor = 2 * torch.pi * in_tensor # scale to [0, 2pi]
- freqs = 2 ** torch.linspace(self.min_freq, self.max_freq, self.num_frequencies).to(in_tensor.device)
+ freqs = 2 ** torch.linspace(self.min_freq, self.max_freq, self.num_frequencies, device=in_tensor.device)
scaled_inputs = scaled_in_tensor[..., None] * freqs # [..., "input_dim", "num_scales"]
scaled_inputs = scaled_inputs.view(*scaled_inputs.shape[:-2], -1) # [..., "input_dim" * "num_scales"]
@@ -178,34 +182,40 @@ def forward(
return self.pytorch_fwd(in_tensor, covs)
-class RFFEncoding(Encoding):
- """Random Fourier Feature encoding. Supports integrated encodings.
+class FFEncoding(Encoding):
+ """Fourier Feature encoding. Supports integrated encodings.
Args:
in_dim: Input dimension of tensor
- num_frequencies: Number of encoding frequencies
- scale: Std of Gaussian to sample frequencies. Must be greater than zero
+ basis: Basis matrix from which to construct the Fourier features.
+ num_frequencies: Number of encoded frequencies per axis
+ min_freq_exp: Minimum frequency exponent
+ max_freq_exp: Maximum frequency exponent
include_input: Append the input coordinate to the encoding
"""
- def __init__(self, in_dim: int, num_frequencies: int, scale: float, include_input: bool = False) -> None:
+ def __init__(
+ self,
+ in_dim: int,
+ basis: Float[Tensor, "M N"],
+ num_frequencies: int,
+ min_freq_exp: float,
+ max_freq_exp: float,
+ include_input: bool = False,
+ ) -> None:
super().__init__(in_dim)
-
self.num_frequencies = num_frequencies
- if not scale > 0:
- raise ValueError("RFF encoding scale should be greater than zero")
- self.scale = scale
- if self.in_dim is None:
- raise ValueError("Input dimension has not been set")
- b_matrix = torch.normal(mean=0, std=self.scale, size=(self.in_dim, self.num_frequencies))
- self.register_buffer(name="b_matrix", tensor=b_matrix)
+ self.min_freq = min_freq_exp
+ self.max_freq = max_freq_exp
+ self.register_buffer(name="b_matrix", tensor=basis)
self.include_input = include_input
def get_out_dim(self) -> int:
- out_dim = self.num_frequencies * 2
+ if self.in_dim is None:
+ raise ValueError("Input dimension has not been set")
+ assert isinstance(self.b_matrix, Tensor)
+ out_dim = self.b_matrix.shape[1] * self.num_frequencies * 2
if self.include_input:
- if self.in_dim is None:
- raise ValueError("Input dimension has not been set")
out_dim += self.in_dim
return out_dim
@@ -214,7 +224,7 @@ def forward(
in_tensor: Float[Tensor, "*bs input_dim"],
covs: Optional[Float[Tensor, "*bs input_dim input_dim"]] = None,
) -> Float[Tensor, "*bs output_dim"]:
- """Calculates RFF encoding. If covariances are provided the encodings will be integrated as proposed
+ """Calculates FF encoding. If covariances are provided the encodings will be integrated as proposed
in mip-NeRF.
Args:
@@ -226,11 +236,16 @@ def forward(
"""
scaled_in_tensor = 2 * torch.pi * in_tensor # scale to [0, 2pi]
scaled_inputs = scaled_in_tensor @ self.b_matrix # [..., "num_frequencies"]
+ freqs = 2 ** torch.linspace(self.min_freq, self.max_freq, self.num_frequencies, device=in_tensor.device)
+ scaled_inputs = scaled_inputs[..., None] * freqs # [..., "input_dim", "num_scales"]
+ scaled_inputs = scaled_inputs.view(*scaled_inputs.shape[:-2], -1) # [..., "input_dim" * "num_scales"]
if covs is None:
encoded_inputs = torch.sin(torch.cat([scaled_inputs, scaled_inputs + torch.pi / 2.0], dim=-1))
else:
input_var = torch.sum((covs @ self.b_matrix) * self.b_matrix, -2)
+ input_var = input_var[..., :, None] * freqs[None, :] ** 2
+ input_var = input_var.reshape((*input_var.shape[:-2], -1))
encoded_inputs = expected_sin(
torch.cat([scaled_inputs, scaled_inputs + torch.pi / 2.0], dim=-1), torch.cat(2 * [input_var], dim=-1)
)
@@ -241,6 +256,49 @@ def forward(
return encoded_inputs
+class RFFEncoding(FFEncoding):
+ """Random Fourier Feature encoding. Supports integrated encodings.
+
+ Args:
+ in_dim: Input dimension of tensor
+ num_frequencies: Number of encoding frequencies
+ scale: Std of Gaussian to sample frequencies. Must be greater than zero
+ include_input: Append the input coordinate to the encoding
+ """
+
+ def __init__(self, in_dim: int, num_frequencies: int, scale: float, include_input: bool = False) -> None:
+ if not scale > 0:
+ raise ValueError("RFF encoding scale should be greater than zero")
+
+ b_matrix = torch.normal(mean=0, std=scale, size=(in_dim, num_frequencies))
+ super().__init__(in_dim, b_matrix, 1, 0.0, 0.0, include_input)
+
+
+class PolyhedronFFEncoding(FFEncoding):
+ """Fourier Feature encoding using polyhedron basis as proposed by mip-NeRF360. Supports integrated encodings.
+
+ Args:
+ num_frequencies: Number of encoded frequencies per axis
+ min_freq_exp: Minimum frequency exponent
+ max_freq_exp: Maximum frequency exponent
+ basis_shape: Shape of polyhedron basis. Either "octahedron" or "icosahedron"
+ basis_subdivisions: Number of times to tesselate the polyhedron.
+ include_input: Append the input coordinate to the encoding
+ """
+
+ def __init__(
+ self,
+ num_frequencies: int,
+ min_freq_exp: float,
+ max_freq_exp: float,
+ basis_shape: Literal["octahedron", "icosahedron"] = "octahedron",
+ basis_subdivisions: int = 1,
+ include_input: bool = False,
+ ) -> None:
+ basis_t = generate_polyhedron_basis(basis_shape, basis_subdivisions).T
+ super().__init__(3, basis_t, num_frequencies, min_freq_exp, max_freq_exp, include_input)
+
+
class HashEncoding(Encoding):
"""Hash encoding
diff --git a/nerfstudio/utils/math.py b/nerfstudio/utils/math.py
index 8567a8b289..0ba9e6a51c 100644
--- a/nerfstudio/utils/math.py
+++ b/nerfstudio/utils/math.py
@@ -14,6 +14,8 @@
""" Math Helper Functions """
+import itertools
+import math
from dataclasses import dataclass
from typing import Literal, Tuple
@@ -195,7 +197,6 @@ def expected_sin(x_means: torch.Tensor, x_vars: torch.Tensor) -> torch.Tensor:
Returns:
torch.Tensor: The expected value of sin.
"""
-
return torch.exp(-0.5 * x_vars) * torch.sin(x_means)
@@ -360,4 +361,160 @@ def normalized_depth_scale_and_shift(
shift[valid] = (-a_01[valid] * b_0[valid] + a_00[valid] * b_1[valid]) / det[valid]
return scale, shift
- return scale, shift
+
+
+def columnwise_squared_l2_distance(
+ x: Float[Tensor, "*M N"],
+ y: Float[Tensor, "*M N"],
+) -> Float[Tensor, "N N"]:
+ """Compute the squared Euclidean distance between all pairs of columns.
+ Adapted from https://github.com/google-research/multinerf/blob/5b4d4f64608ec8077222c52fdf814d40acc10bc1/internal/geopoly.py
+
+ Args:
+ x: tensor of floats, with shape [M, N].
+ y: tensor of floats, with shape [M, N].
+ Returns:
+ sq_dist: tensor of floats, with shape [N, N].
+ """
+ # Use the fact that ||x - y||^2 == ||x||^2 + ||y||^2 - 2 x^T y.
+ sq_norm_x = torch.sum(x**2, 0)
+ sq_norm_y = torch.sum(y**2, 0)
+ sq_dist = sq_norm_x[:, None] + sq_norm_y[None, :] - 2 * x.T @ y
+ return sq_dist
+
+
+def _compute_tesselation_weights(v: int) -> Tensor:
+ """Tesselate the vertices of a triangle by a factor of `v`.
+ Adapted from https://github.com/google-research/multinerf/blob/5b4d4f64608ec8077222c52fdf814d40acc10bc1/internal/geopoly.py
+
+ Args:
+ v: int, the factor of the tesselation (v==1 is a no-op to the triangle).
+
+ Returns:
+ weights: tesselated weights.
+ """
+ if v < 1:
+ raise ValueError(f"v {v} must be >= 1")
+ int_weights = []
+ for i in range(v + 1):
+ for j in range(v + 1 - i):
+ int_weights.append((i, j, v - (i + j)))
+ int_weights = torch.FloatTensor(int_weights)
+ weights = int_weights / v # Barycentric weights.
+ return weights
+
+
+def _tesselate_geodesic(
+ vertices: Float[Tensor, "N 3"], faces: Float[Tensor, "M 3"], v: int, eps: float = 1e-4
+) -> Tensor:
+ """Tesselate the vertices of a geodesic polyhedron.
+
+ Adapted from https://github.com/google-research/multinerf/blob/5b4d4f64608ec8077222c52fdf814d40acc10bc1/internal/geopoly.py
+
+ Args:
+ vertices: tensor of floats, the vertex coordinates of the geodesic.
+ faces: tensor of ints, the indices of the vertices of base_verts that
+ constitute eachface of the polyhedra.
+ v: int, the factor of the tesselation (v==1 is a no-op).
+ eps: float, a small value used to determine if two vertices are the same.
+
+ Returns:
+ verts: a tensor of floats, the coordinates of the tesselated vertices.
+ """
+ tri_weights = _compute_tesselation_weights(v)
+
+ verts = []
+ for face in faces:
+ new_verts = torch.matmul(tri_weights, vertices[face, :])
+ new_verts /= torch.sqrt(torch.sum(new_verts**2, 1, keepdim=True))
+ verts.append(new_verts)
+ verts = torch.concatenate(verts, 0)
+
+ sq_dist = columnwise_squared_l2_distance(verts.T, verts.T)
+ assignment = torch.tensor([torch.min(torch.argwhere(d <= eps)) for d in sq_dist])
+ unique = torch.unique(assignment)
+ verts = verts[unique, :]
+ return verts
+
+
+def generate_polyhedron_basis(
+ basis_shape: Literal["icosahedron", "octahedron"],
+ angular_tesselation: int,
+ remove_symmetries: bool = True,
+ eps: float = 1e-4,
+) -> Tensor:
+ """Generates a 3D basis by tesselating a geometric polyhedron.
+ Basis is used to construct Fourier features for positional encoding.
+ See Mip-Nerf360 paper: https://arxiv.org/abs/2111.12077
+ Adapted from https://github.com/google-research/multinerf/blob/5b4d4f64608ec8077222c52fdf814d40acc10bc1/internal/geopoly.py
+
+ Args:
+ base_shape: string, the name of the starting polyhedron, must be either
+ 'icosahedron' or 'octahedron'.
+ angular_tesselation: int, the number of times to tesselate the polyhedron,
+ must be >= 1 (a value of 1 is a no-op to the polyhedron).
+ remove_symmetries: bool, if True then remove the symmetric basis columns,
+ which is usually a good idea because otherwise projections onto the basis
+ will have redundant negative copies of each other.
+ eps: float, a small number used to determine symmetries.
+
+ Returns:
+ basis: a matrix with shape [3, n].
+ """
+ if basis_shape == "icosahedron":
+ a = (math.sqrt(5) + 1) / 2
+ verts = torch.FloatTensor(
+ [
+ (-1, 0, a),
+ (1, 0, a),
+ (-1, 0, -a),
+ (1, 0, -a),
+ (0, a, 1),
+ (0, a, -1),
+ (0, -a, 1),
+ (0, -a, -1),
+ (a, 1, 0),
+ (-a, 1, 0),
+ (a, -1, 0),
+ (-a, -1, 0),
+ ]
+ ) / math.sqrt(a + 2)
+ faces = torch.tensor(
+ [
+ (0, 4, 1),
+ (0, 9, 4),
+ (9, 5, 4),
+ (4, 5, 8),
+ (4, 8, 1),
+ (8, 10, 1),
+ (8, 3, 10),
+ (5, 3, 8),
+ (5, 2, 3),
+ (2, 7, 3),
+ (7, 10, 3),
+ (7, 6, 10),
+ (7, 11, 6),
+ (11, 0, 6),
+ (0, 1, 6),
+ (6, 1, 10),
+ (9, 0, 11),
+ (9, 11, 2),
+ (9, 2, 5),
+ (7, 2, 11),
+ ]
+ )
+ verts = _tesselate_geodesic(verts, faces, angular_tesselation)
+ elif basis_shape == "octahedron":
+ verts = torch.FloatTensor([(0, 0, -1), (0, 0, 1), (0, -1, 0), (0, 1, 0), (-1, 0, 0), (1, 0, 0)])
+ corners = torch.FloatTensor(list(itertools.product([-1, 1], repeat=3)))
+ pairs = torch.argwhere(columnwise_squared_l2_distance(corners.T, verts.T) == 2)
+ faces, _ = torch.sort(torch.reshape(pairs[:, 1], [3, -1]).T, 1)
+ verts = _tesselate_geodesic(verts, faces, angular_tesselation)
+
+ if remove_symmetries:
+ # Remove elements of `verts` that are reflections of each other.
+ match = columnwise_squared_l2_distance(verts.T, -verts.T) < eps
+ verts = verts[torch.any(torch.triu(match), 1), :]
+
+ basis = verts.flip(-1)
+ return basis
From afcc8434d031d85e609c5a6491998cc333d55138 Mon Sep 17 00:00:00 2001
From: Rohan Mathur
Date: Tue, 3 Oct 2023 00:20:58 -0700
Subject: [PATCH 029/101] fixed link to viser to be functional link cause
previous 404ed (#2486)
---
docs/developer_guides/viewer/index.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/developer_guides/viewer/index.md b/docs/developer_guides/viewer/index.md
index 93204a2035..9df5c60d5a 100644
--- a/docs/developer_guides/viewer/index.md
+++ b/docs/developer_guides/viewer/index.md
@@ -16,7 +16,7 @@ local_viewer
We thank the authors and contributors to the following repos, which we've started, used, and modified for our use-cases.
-- [Viser](https://github.com/brentyi/viser/tree/main/viser) - made by [Brent Yi](https://github.com/brentyi)
+- [Viser](https://github.com/brentyi/viser/) - made by [Brent Yi](https://github.com/brentyi)
- [meshcat-python](https://github.com/rdeits/meshcat-python) - made by [Robin Deits](https://github.com/rdeits)
- [meshcat](https://github.com/rdeits/meshcat) - made by [Robin Deits](https://github.com/rdeits)
- [ThreeJS](https://threejs.org/)
From a484d255b4f71c55915afcfc52d90ec88963779f Mon Sep 17 00:00:00 2001
From: Boris Feld
Date: Wed, 4 Oct 2023 21:02:46 +0200
Subject: [PATCH 030/101] Add support of using both Comet and Viewer at the
same time (#2488)
---
nerfstudio/configs/experiment_config.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/nerfstudio/configs/experiment_config.py b/nerfstudio/configs/experiment_config.py
index 42cc4399ec..d4b3d4de66 100644
--- a/nerfstudio/configs/experiment_config.py
+++ b/nerfstudio/configs/experiment_config.py
@@ -83,7 +83,7 @@ class ExperimentConfig(InstantiateConfig):
def is_viewer_enabled(self) -> bool:
"""Checks if a viewer is enabled."""
- return ("viewer" == self.vis) | ("viewer+wandb" == self.vis) | ("viewer+tensorboard" == self.vis)
+ return self.vis in ("viewer", "viewer+wandb", "viewer+tensorboard", "viewer+comet")
def is_viewer_beta_enabled(self) -> bool:
"""Checks if a viewer beta is enabled."""
From 242a1cf4fe460df4d0ed9a47b2376134eaa43a00 Mon Sep 17 00:00:00 2001
From: Ji Shi
Date: Mon, 9 Oct 2023 03:59:58 +0800
Subject: [PATCH 031/101] Fix DatasetRender to override dataset path only when
provided (#2494)
---
nerfstudio/scripts/render.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/nerfstudio/scripts/render.py b/nerfstudio/scripts/render.py
index ecb7961cc4..7f6102428f 100644
--- a/nerfstudio/scripts/render.py
+++ b/nerfstudio/scripts/render.py
@@ -629,7 +629,8 @@ def update_config(config: TrainerConfig) -> TrainerConfig:
data_manager_config.eval_num_times_to_repeat_images = -1
data_manager_config.train_num_images_to_sample_from = -1
data_manager_config.train_num_times_to_repeat_images = -1
- data_manager_config.data = self.data
+ if self.data is not None:
+ data_manager_config.data = self.data
if self.downscale_factor is not None:
assert hasattr(data_manager_config.dataparser, "downscale_factor")
setattr(data_manager_config.dataparser, "downscale_factor", self.downscale_factor)
From d325593e0229ae741920416ebe1812fc0ac4af50 Mon Sep 17 00:00:00 2001
From: Matthew Tancik
Date: Mon, 9 Oct 2023 04:35:08 +0200
Subject: [PATCH 032/101] Remove unused config param (#2495)
Remove unused config.
---
docs/developer_guides/pipelines/datamanagers.md | 2 --
nerfstudio/data/datamanagers/base_datamanager.py | 2 --
nerfstudio/scripts/render.py | 1 -
nerfstudio/utils/eval_utils.py | 3 ---
tests/data/configs/test_config1.yml | 2 --
tests/data/configs/test_config2.yml | 2 --
6 files changed, 12 deletions(-)
diff --git a/docs/developer_guides/pipelines/datamanagers.md b/docs/developer_guides/pipelines/datamanagers.md
index 32c55b63e9..78aec242da 100644
--- a/docs/developer_guides/pipelines/datamanagers.md
+++ b/docs/developer_guides/pipelines/datamanagers.md
@@ -62,8 +62,6 @@ class VanillaDataManagerConfig(InstantiateConfig):
"""number of rays per batch to use per eval iteration"""
eval_num_images_to_sample_from: int = -1
"""number of images to sample during eval iteration"""
- eval_image_indices: Optional[Tuple[int, ...]] = (0,)
- """specifies the image indices to use during eval; if None, uses all"""
camera_optimizer: CameraOptimizerConfig = CameraOptimizerConfig()
"""specifies the camera pose optimizer used during training"""
```
diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py
index 52fd0af0ff..cb63202dff 100644
--- a/nerfstudio/data/datamanagers/base_datamanager.py
+++ b/nerfstudio/data/datamanagers/base_datamanager.py
@@ -335,8 +335,6 @@ class VanillaDataManagerConfig(DataManagerConfig):
eval_num_times_to_repeat_images: int = -1
"""When not evaluating on all images, number of iterations before picking
new images. If -1, never pick new images."""
- eval_image_indices: Optional[Tuple[int, ...]] = (0,)
- """Specifies the image indices to use during eval; if None, uses all."""
camera_optimizer: CameraOptimizerConfig = CameraOptimizerConfig()
"""Specifies the camera pose optimizer used during training. Helpful if poses are noisy, such as for data from
Record3D."""
diff --git a/nerfstudio/scripts/render.py b/nerfstudio/scripts/render.py
index 7f6102428f..307bb5adba 100644
--- a/nerfstudio/scripts/render.py
+++ b/nerfstudio/scripts/render.py
@@ -624,7 +624,6 @@ def main(self):
def update_config(config: TrainerConfig) -> TrainerConfig:
data_manager_config = config.pipeline.datamanager
assert isinstance(data_manager_config, VanillaDataManagerConfig)
- data_manager_config.eval_image_indices = None
data_manager_config.eval_num_images_to_sample_from = -1
data_manager_config.eval_num_times_to_repeat_images = -1
data_manager_config.train_num_images_to_sample_from = -1
diff --git a/nerfstudio/utils/eval_utils.py b/nerfstudio/utils/eval_utils.py
index 7e04005368..0b7306791c 100644
--- a/nerfstudio/utils/eval_utils.py
+++ b/nerfstudio/utils/eval_utils.py
@@ -26,7 +26,6 @@
import yaml
from nerfstudio.configs.method_configs import all_methods
-from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManagerConfig
from nerfstudio.engine.trainer import TrainerConfig
from nerfstudio.pipelines.base_pipeline import Pipeline
from nerfstudio.utils.rich_utils import CONSOLE
@@ -100,8 +99,6 @@ def eval_setup(
# load checkpoints from wherever they were saved
# TODO: expose the ability to choose an arbitrary checkpoint
config.load_dir = config.get_checkpoint_dir()
- if isinstance(config.pipeline.datamanager, VanillaDataManagerConfig):
- config.pipeline.datamanager.eval_image_indices = None
# setup pipeline (which includes the DataManager)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
diff --git a/tests/data/configs/test_config1.yml b/tests/data/configs/test_config1.yml
index 3cf0147603..67ce18cda1 100644
--- a/tests/data/configs/test_config1.yml
+++ b/tests/data/configs/test_config1.yml
@@ -111,8 +111,6 @@ pipeline: !!python/object:nerfstudio.pipelines.base_pipeline.VanillaPipelineConf
- data
- varun
- cheezit-scaled
- eval_image_indices: !!python/tuple
- - 0
eval_num_images_to_sample_from: -1
eval_num_rays_per_batch: 4096
eval_num_times_to_repeat_images: -1
diff --git a/tests/data/configs/test_config2.yml b/tests/data/configs/test_config2.yml
index c1ccda3791..90737789fd 100644
--- a/tests/data/configs/test_config2.yml
+++ b/tests/data/configs/test_config2.yml
@@ -105,8 +105,6 @@ pipeline: !!python/object:nerfstudio.pipelines.base_pipeline.VanillaPipelineConf
- data
- varun
- cheezit-scaled
- eval_image_indices: !!python/tuple
- - 0
eval_num_images_to_sample_from: -1
eval_num_rays_per_batch: 4096
eval_num_times_to_repeat_images: -1
From 79297c7c61749e1a8e27966f90106bab4aa53efc Mon Sep 17 00:00:00 2001
From: Ruoyu Wang
Date: Mon, 9 Oct 2023 12:22:53 +0800
Subject: [PATCH 033/101] (minor) Align upsampling in TensorCPEncoding with
TensorVMEncoding (#2493)
* (minor) Align upsampling in TensorCPEncoding with TensorVMEncoding
* Fix black
---------
Co-authored-by: Matthew Tancik
Co-authored-by: Matthew Tancik
---
nerfstudio/field_components/encodings.py | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/nerfstudio/field_components/encodings.py b/nerfstudio/field_components/encodings.py
index 4ba28f0c4e..ee2aac93c8 100644
--- a/nerfstudio/field_components/encodings.py
+++ b/nerfstudio/field_components/encodings.py
@@ -482,9 +482,8 @@ def upsample_grid(self, resolution: int) -> None:
resolution: Target resolution.
"""
- self.line_coef.data = F.interpolate(
- self.line_coef.data, size=(resolution, 1), mode="bilinear", align_corners=True
- )
+ line_coef = F.interpolate(self.line_coef.data, size=(resolution, 1), mode="bilinear", align_corners=True)
+ self.line_coef = torch.nn.Parameter(line_coef)
self.resolution = resolution
From 81db1808a72b798d0e314d5bdb6abb25bd9c71c8 Mon Sep 17 00:00:00 2001
From: Jami Pekkanen
Date: Mon, 9 Oct 2023 07:26:10 +0300
Subject: [PATCH 034/101] Add documentation about unknown depth pixel
convention (#2490)
Updated the documentation to explicate that zero value in depth images is treated as unknown depth. This behavior is defined in https://github.com/nerfstudio-project/nerfstudio/blob/a484d255b4f71c55915afcfc52d90ec88963779f/nerfstudio/model_components/losses.py#L242
Co-authored-by: Matthew Tancik
---
docs/quickstart/data_conventions.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/quickstart/data_conventions.md b/docs/quickstart/data_conventions.md
index 34324e30e0..2e9e76effa 100644
--- a/docs/quickstart/data_conventions.md
+++ b/docs/quickstart/data_conventions.md
@@ -82,7 +82,7 @@ For a transform matrix, the first 3 columns are the +X, +Y, and +Z defining the
### Depth images
-To train with depth supervision, you can also provide a `depth_file_path` for each frame in your `transforms.json` and use one of the methods that support additional depth losses (e.g., depth-nerfacto). The depths are assumed to be 16-bit or 32-bit and to be in millimeters to remain consistent with [Polyform](https://github.com/PolyCam/polyform). You can adjust this scaling factor using the `depth_unit_scale_factor` parameter in `NerfstudioDataParserConfig`. Note that by default, we resize the depth images to match the shape of the RGB images.
+To train with depth supervision, you can also provide a `depth_file_path` for each frame in your `transforms.json` and use one of the methods that support additional depth losses (e.g., depth-nerfacto). The depths are assumed to be 16-bit or 32-bit and to be in millimeters to remain consistent with [Polyform](https://github.com/PolyCam/polyform). Zero-value in the depth image is treated as unknown depth. You can adjust this scaling factor using the `depth_unit_scale_factor` parameter in `NerfstudioDataParserConfig`. Note that by default, we resize the depth images to match the shape of the RGB images.
```json
{
From 1b85fb528b087f6ef9939addc4693f6a6e507219 Mon Sep 17 00:00:00 2001
From: Justin Kerr
Date: Tue, 10 Oct 2023 13:09:09 -0700
Subject: [PATCH 035/101] patch for mkdirs on camera path json filepath (#2502)
fix camera path json mkdir in viewer beta
---
nerfstudio/viewer_beta/render_panel.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/nerfstudio/viewer_beta/render_panel.py b/nerfstudio/viewer_beta/render_panel.py
index 7342b0f388..690027f8d0 100644
--- a/nerfstudio/viewer_beta/render_panel.py
+++ b/nerfstudio/viewer_beta/render_panel.py
@@ -625,6 +625,7 @@ def _(event: viser.GuiEvent) -> None:
# now write the json file
json_outfile = datapath / "camera_paths" / f"{render_name_text.value}.json"
+ json_outfile.parent.mkdir(parents=True, exist_ok=True)
with open(json_outfile.absolute(), "w") as outfile:
json.dump(json_data, outfile)
# now show the command
From e694f341a7ad49e2f6842b69cbaa95c370dd7a8a Mon Sep 17 00:00:00 2001
From: Justin Kerr
Date: Tue, 10 Oct 2023 14:52:25 -0700
Subject: [PATCH 036/101] Move camera optimization out of datamanager and
parallelize dataloading (#2092)
---
nerfstudio/cameras/camera_optimizers.py | 99 ++++--
nerfstudio/configs/method_configs.py | 105 +++---
.../data/datamanagers/base_datamanager.py | 46 +--
.../data/datamanagers/parallel_datamanager.py | 321 ++++++++++++++++++
nerfstudio/data/pixel_samplers.py | 4 +-
nerfstudio/engine/optimizers.py | 19 ++
nerfstudio/engine/trainer.py | 7 -
nerfstudio/fields/nerfacto_field.py | 1 +
nerfstudio/model_components/ray_generators.py | 7 +-
nerfstudio/models/nerfacto.py | 18 +-
nerfstudio/models/tensorf.py | 15 +
nerfstudio/pipelines/base_pipeline.py | 15 +-
nerfstudio/utils/tensor_dataclass.py | 8 +
13 files changed, 531 insertions(+), 134 deletions(-)
create mode 100644 nerfstudio/data/datamanagers/parallel_datamanager.py
diff --git a/nerfstudio/cameras/camera_optimizers.py b/nerfstudio/cameras/camera_optimizers.py
index df8296ea1a..7b76488842 100644
--- a/nerfstudio/cameras/camera_optimizers.py
+++ b/nerfstudio/cameras/camera_optimizers.py
@@ -23,19 +23,16 @@
from typing import Literal, Optional, Type, Union
import torch
-import tyro
from jaxtyping import Float, Int
from torch import Tensor, nn
from typing_extensions import assert_never
from nerfstudio.cameras.lie_groups import exp_map_SE3, exp_map_SO3xR3
+from nerfstudio.cameras.rays import RayBundle
from nerfstudio.configs.base_config import InstantiateConfig
-from nerfstudio.engine.optimizers import AdamOptimizerConfig, OptimizerConfig
-from nerfstudio.engine.schedulers import (
- ExponentialDecaySchedulerConfig,
- SchedulerConfig,
-)
from nerfstudio.utils import poses as pose_utils
+from nerfstudio.engine.optimizers import OptimizerConfig
+from nerfstudio.engine.schedulers import SchedulerConfig
@dataclass
@@ -47,21 +44,38 @@ class CameraOptimizerConfig(InstantiateConfig):
mode: Literal["off", "SO3xR3", "SE3"] = "off"
"""Pose optimization strategy to use. If enabled, we recommend SO3xR3."""
- position_noise_std: float = 0.0
- """Noise to add to initial positions. Useful for debugging."""
+ trans_l2_penalty: float = 1e-2
+ """L2 penalty on translation parameters."""
- orientation_noise_std: float = 0.0
- """Noise to add to initial orientations. Useful for debugging."""
+ rot_l2_penalty: float = 1e-3
+ """L2 penalty on rotation parameters."""
- optimizer: OptimizerConfig = field(default_factory=lambda: AdamOptimizerConfig(lr=6e-4, eps=1e-15))
- """ADAM parameters for camera optimization."""
+ optimizer: Optional[OptimizerConfig] = field(default=None)
+ """Deprecated, now specified inside the optimizers dict"""
- scheduler: SchedulerConfig = field(default_factory=lambda: ExponentialDecaySchedulerConfig(max_steps=10000))
- """Learning rate scheduler for camera optimizer.."""
+ scheduler: Optional[SchedulerConfig] = field(default=None)
+ """Deprecated, now specified inside the optimizers dict"""
- param_group: tyro.conf.Suppress[str] = "camera_opt"
- """Name of the parameter group used for pose optimization. Can be any string that doesn't conflict with other
- groups."""
+ def __post_init__(self):
+ if self.optimizer is not None:
+ import warnings
+ from nerfstudio.utils.rich_utils import CONSOLE
+
+ CONSOLE.print(
+ "\noptimizer is no longer specified in the CameraOptimizerConfig, it is now defined with the rest of the param groups inside the config file under the name 'camera_opt'\n",
+ style="bold yellow",
+ )
+ warnings.warn("above message coming from", FutureWarning, stacklevel=3)
+
+ if self.scheduler is not None:
+ import warnings
+ from nerfstudio.utils.rich_utils import CONSOLE
+
+ CONSOLE.print(
+ "\nscheduler is no longer specified in the CameraOptimizerConfig, it is now defined with the rest of the param groups inside the config file under the name 'camera_opt'\n",
+ style="bold yellow",
+ )
+ warnings.warn("above message coming from", FutureWarning, stacklevel=3)
class CameraOptimizer(nn.Module):
@@ -91,16 +105,6 @@ def __init__(
else:
assert_never(self.config.mode)
- # Initialize pose noise; useful for debugging.
- if config.position_noise_std != 0.0 or config.orientation_noise_std != 0.0:
- assert config.position_noise_std >= 0.0 and config.orientation_noise_std >= 0.0
- std_vector = torch.tensor(
- [config.position_noise_std] * 3 + [config.orientation_noise_std] * 3, device=device
- )
- self.pose_noise = exp_map_SE3(torch.normal(torch.zeros((num_cameras, 6), device=device), std_vector))
- else:
- self.pose_noise = None
-
def forward(
self,
indices: Int[Tensor, "camera_indices"],
@@ -125,13 +129,46 @@ def forward(
assert_never(self.config.mode)
# Detach non-trainable indices by setting to identity transform
if self.non_trainable_camera_indices is not None:
- outputs[0][self.non_trainable_camera_indices] = torch.eye(4, device=self.device)[:3, :4]
+ if self.non_trainable_camera_indices.device != self.pose_adjustment.device:
+ self.non_trainable_camera_indices = self.non_trainable_camera_indices.to(self.pose_adjustment.device)
+ outputs[0][self.non_trainable_camera_indices] = torch.eye(4, device=self.pose_adjustment.device)[:3, :4]
- # Apply initial pose noise.
- if self.pose_noise is not None:
- outputs.append(self.pose_noise[indices, :, :])
# Return: identity if no transforms are needed, otherwise multiply transforms together.
if len(outputs) == 0:
# Note that using repeat() instead of tile() here would result in unnecessary copies.
return torch.eye(4, device=self.device)[None, :3, :4].tile(indices.shape[0], 1, 1)
return functools.reduce(pose_utils.multiply, outputs)
+
+ def apply_to_raybundle(self, raybundle: RayBundle) -> None:
+ """Apply the pose correction to the raybundle"""
+ if self.config.mode != "off":
+ correction_matrices = self(raybundle.camera_indices.squeeze()) # type: ignore
+ raybundle.origins = raybundle.origins + correction_matrices[:, :3, 3]
+ raybundle.directions = torch.bmm(correction_matrices[:, :3, :3], raybundle.directions[..., None]).squeeze()
+
+ def get_loss_dict(self, loss_dict: dict) -> None:
+ """Add regularization"""
+ if self.config.mode != "off":
+ loss_dict["camera_opt_regularizer"] = (
+ self.pose_adjustment[:, :3].norm(dim=-1).mean() * self.config.trans_l2_penalty
+ + self.pose_adjustment[:, 3:].norm(dim=-1).mean() * self.config.rot_l2_penalty
+ )
+
+ def get_correction_matrices(self):
+ """Get optimized pose correction matrices"""
+ return self(torch.arange(0, self.num_cameras).long())
+
+ def get_metrics_dict(self, metrics_dict: dict) -> None:
+ """Get camera optimizer metrics"""
+ if self.config.mode != "off":
+ metrics_dict["camera_opt_translation"] = self.pose_adjustment[:, :3].norm()
+ metrics_dict["camera_opt_rotation"] = self.pose_adjustment[:, 3:].norm()
+
+ def get_param_groups(self, param_groups: dict) -> None:
+ """Get camera optimizer parameters"""
+ camera_opt_params = list(self.parameters())
+ if self.config.mode != "off":
+ assert len(camera_opt_params) > 0
+ param_groups["camera_opt"] = camera_opt_params
+ else:
+ assert len(camera_opt_params) == 0
diff --git a/nerfstudio/configs/method_configs.py b/nerfstudio/configs/method_configs.py
index a92c042bf9..74cabbafcb 100644
--- a/nerfstudio/configs/method_configs.py
+++ b/nerfstudio/configs/method_configs.py
@@ -27,15 +27,23 @@
from nerfstudio.cameras.camera_optimizers import CameraOptimizerConfig
from nerfstudio.configs.base_config import ViewerConfig
from nerfstudio.configs.external_methods import get_external_methods
-
-from nerfstudio.data.datamanagers.random_cameras_datamanager import RandomCamerasDataManagerConfig
-from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager, VanillaDataManagerConfig
-
+from nerfstudio.data.datamanagers.base_datamanager import (
+ VanillaDataManager,
+ VanillaDataManagerConfig,
+)
+from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManagerConfig
+from nerfstudio.data.datamanagers.random_cameras_datamanager import (
+ RandomCamerasDataManagerConfig,
+)
from nerfstudio.data.dataparsers.blender_dataparser import BlenderDataParserConfig
from nerfstudio.data.dataparsers.dnerf_dataparser import DNeRFDataParserConfig
-from nerfstudio.data.dataparsers.instant_ngp_dataparser import InstantNGPDataParserConfig
+from nerfstudio.data.dataparsers.instant_ngp_dataparser import (
+ InstantNGPDataParserConfig,
+)
from nerfstudio.data.dataparsers.nerfstudio_dataparser import NerfstudioDataParserConfig
-from nerfstudio.data.dataparsers.phototourism_dataparser import PhototourismDataParserConfig
+from nerfstudio.data.dataparsers.phototourism_dataparser import (
+ PhototourismDataParserConfig,
+)
from nerfstudio.data.dataparsers.sdfstudio_dataparser import SDFStudioDataParserConfig
from nerfstudio.data.dataparsers.sitcoms3d_dataparser import Sitcoms3DDataParserConfig
from nerfstudio.data.datasets.depth_dataset import DepthDataset
@@ -88,17 +96,15 @@
max_num_iterations=30000,
mixed_precision=True,
pipeline=VanillaPipelineConfig(
- datamanager=VanillaDataManagerConfig(
+ datamanager=ParallelDataManagerConfig(
dataparser=NerfstudioDataParserConfig(),
train_num_rays_per_batch=4096,
eval_num_rays_per_batch=4096,
- camera_optimizer=CameraOptimizerConfig(
- mode="SO3xR3",
- optimizer=AdamOptimizerConfig(lr=6e-4, eps=1e-8, weight_decay=1e-2),
- scheduler=ExponentialDecaySchedulerConfig(lr_final=6e-6, max_steps=200000),
- ),
),
- model=NerfactoModelConfig(eval_num_rays_per_chunk=1 << 15),
+ model=NerfactoModelConfig(
+ eval_num_rays_per_chunk=1 << 15,
+ camera_optimizer=CameraOptimizerConfig(mode="SO3xR3"),
+ ),
),
optimizers={
"proposal_networks": {
@@ -109,10 +115,15 @@
"optimizer": AdamOptimizerConfig(lr=1e-2, eps=1e-15),
"scheduler": ExponentialDecaySchedulerConfig(lr_final=0.0001, max_steps=200000),
},
+ "camera_opt": {
+ "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15),
+ "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=5000),
+ },
},
viewer=ViewerConfig(num_rays_per_chunk=1 << 15),
vis="viewer",
)
+
method_configs["nerfacto-big"] = TrainerConfig(
method_name="nerfacto",
steps_per_eval_batch=500,
@@ -120,14 +131,10 @@
max_num_iterations=100000,
mixed_precision=True,
pipeline=VanillaPipelineConfig(
- datamanager=VanillaDataManagerConfig(
+ datamanager=ParallelDataManagerConfig(
dataparser=NerfstudioDataParserConfig(),
train_num_rays_per_batch=8192,
eval_num_rays_per_batch=4096,
- camera_optimizer=CameraOptimizerConfig(
- mode="SO3xR3",
- optimizer=RAdamOptimizerConfig(lr=6e-4, eps=1e-8, weight_decay=1e-3),
- ),
),
model=NerfactoModelConfig(
eval_num_rays_per_chunk=1 << 15,
@@ -139,6 +146,7 @@
max_res=4096,
proposal_weights_anneal_max_num_iters=5000,
log2_hashmap_size=21,
+ camera_optimizer=CameraOptimizerConfig(mode="SO3xR3"),
),
),
optimizers={
@@ -150,10 +158,15 @@
"optimizer": RAdamOptimizerConfig(lr=1e-2, eps=1e-15),
"scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=50000),
},
+ "camera_opt": {
+ "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15),
+ "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=5000),
+ },
},
viewer=ViewerConfig(num_rays_per_chunk=1 << 15),
vis="viewer",
)
+
method_configs["nerfacto-huge"] = TrainerConfig(
method_name="nerfacto",
steps_per_eval_batch=500,
@@ -161,15 +174,10 @@
max_num_iterations=100000,
mixed_precision=True,
pipeline=VanillaPipelineConfig(
- datamanager=VanillaDataManagerConfig(
+ datamanager=ParallelDataManagerConfig(
dataparser=NerfstudioDataParserConfig(),
train_num_rays_per_batch=16384,
eval_num_rays_per_batch=4096,
- camera_optimizer=CameraOptimizerConfig(
- mode="SO3xR3",
- optimizer=RAdamOptimizerConfig(lr=6e-4, eps=1e-8, weight_decay=1e-3),
- scheduler=ExponentialDecaySchedulerConfig(lr_final=6e-5, max_steps=50000),
- ),
),
model=NerfactoModelConfig(
eval_num_rays_per_chunk=1 << 15,
@@ -185,6 +193,7 @@
max_res=8192,
proposal_weights_anneal_max_num_iters=5000,
log2_hashmap_size=21,
+ camera_optimizer=CameraOptimizerConfig(mode="SO3xR3"),
),
),
optimizers={
@@ -196,6 +205,10 @@
"optimizer": RAdamOptimizerConfig(lr=1e-2, eps=1e-15),
"scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=50000),
},
+ "camera_opt": {
+ "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15),
+ "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=5000),
+ },
},
viewer=ViewerConfig(num_rays_per_chunk=1 << 15),
vis="viewer",
@@ -214,11 +227,11 @@
dataparser=NerfstudioDataParserConfig(),
train_num_rays_per_batch=4096,
eval_num_rays_per_batch=4096,
- camera_optimizer=CameraOptimizerConfig(
- mode="SO3xR3", optimizer=AdamOptimizerConfig(lr=6e-4, eps=1e-8, weight_decay=1e-2)
- ),
),
- model=DepthNerfactoModelConfig(eval_num_rays_per_chunk=1 << 15),
+ model=DepthNerfactoModelConfig(
+ eval_num_rays_per_chunk=1 << 15,
+ camera_optimizer=CameraOptimizerConfig(mode="SO3xR3"),
+ ),
),
optimizers={
"proposal_networks": {
@@ -229,6 +242,10 @@
"optimizer": AdamOptimizerConfig(lr=1e-2, eps=1e-15),
"scheduler": None,
},
+ "camera_opt": {
+ "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15),
+ "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=5000),
+ },
},
viewer=ViewerConfig(num_rays_per_chunk=1 << 15),
vis="viewer",
@@ -258,7 +275,6 @@
vis="viewer",
)
-
method_configs["instant-ngp-bounded"] = TrainerConfig(
method_name="instant-ngp-bounded",
steps_per_eval_batch=500,
@@ -286,12 +302,12 @@
viewer=ViewerConfig(num_rays_per_chunk=1 << 12),
vis="viewer",
)
-
-
+#
+#
method_configs["mipnerf"] = TrainerConfig(
method_name="mipnerf",
pipeline=VanillaPipelineConfig(
- datamanager=VanillaDataManagerConfig(dataparser=NerfstudioDataParserConfig(), train_num_rays_per_batch=1024),
+ datamanager=ParallelDataManagerConfig(dataparser=NerfstudioDataParserConfig(), train_num_rays_per_batch=1024),
model=VanillaModelConfig(
_target=MipNerfModel,
loss_coefficients={"rgb_loss_coarse": 0.1, "rgb_loss_fine": 1.0},
@@ -364,13 +380,14 @@
max_num_iterations=30000,
mixed_precision=False,
pipeline=VanillaPipelineConfig(
- datamanager=VanillaDataManagerConfig(
+ datamanager=ParallelDataManagerConfig(
dataparser=BlenderDataParserConfig(),
train_num_rays_per_batch=4096,
eval_num_rays_per_batch=4096,
),
model=TensoRFModelConfig(
regularization="tv",
+ camera_optimizer=CameraOptimizerConfig(mode="off"),
),
),
optimizers={
@@ -382,6 +399,10 @@
"optimizer": AdamOptimizerConfig(lr=0.02),
"scheduler": ExponentialDecaySchedulerConfig(lr_final=0.002, max_steps=30000),
},
+ "camera_opt": {
+ "optimizer": AdamOptimizerConfig(lr=1e-4, eps=1e-15),
+ "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-5, max_steps=5000),
+ },
},
viewer=ViewerConfig(num_rays_per_chunk=1 << 15),
vis="viewer",
@@ -420,16 +441,16 @@
dataparser=PhototourismDataParserConfig(), # NOTE: one of the only differences with nerfacto
train_num_rays_per_batch=4096,
eval_num_rays_per_batch=4096,
- camera_optimizer=CameraOptimizerConfig(
- mode="SO3xR3", optimizer=AdamOptimizerConfig(lr=6e-4, eps=1e-8, weight_decay=1e-2)
- ),
# Large dataset, so using prior values from VariableResDataManager.
train_num_images_to_sample_from=40,
train_num_times_to_repeat_images=100,
eval_num_images_to_sample_from=40,
eval_num_times_to_repeat_images=100,
),
- model=NerfactoModelConfig(eval_num_rays_per_chunk=1 << 15),
+ model=NerfactoModelConfig(
+ eval_num_rays_per_chunk=1 << 15,
+ camera_optimizer=CameraOptimizerConfig(mode="SO3xR3"),
+ ),
),
optimizers={
"proposal_networks": {
@@ -440,6 +461,10 @@
"optimizer": AdamOptimizerConfig(lr=1e-2, eps=1e-15),
"scheduler": None,
},
+ "camera_opt": {
+ "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15),
+ "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=5000),
+ },
},
viewer=ViewerConfig(num_rays_per_chunk=1 << 15),
vis="viewer",
@@ -504,9 +529,6 @@
dataparser=SDFStudioDataParserConfig(),
train_num_rays_per_batch=1024,
eval_num_rays_per_batch=1024,
- camera_optimizer=CameraOptimizerConfig(
- mode="off", optimizer=AdamOptimizerConfig(lr=6e-4, eps=1e-8, weight_decay=1e-2)
- ),
),
model=NeuSModelConfig(eval_num_rays_per_chunk=1024),
),
@@ -538,9 +560,6 @@
dataparser=SDFStudioDataParserConfig(),
train_num_rays_per_batch=2048,
eval_num_rays_per_batch=2048,
- camera_optimizer=CameraOptimizerConfig(
- mode="SO3xR3", optimizer=AdamOptimizerConfig(lr=6e-4, eps=1e-8, weight_decay=1e-2)
- ),
),
model=NeuSFactoModelConfig(
# proposal network allows for significantly smaller sdf/color network
diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py
index cb63202dff..0b937f64c6 100644
--- a/nerfstudio/data/datamanagers/base_datamanager.py
+++ b/nerfstudio/data/datamanagers/base_datamanager.py
@@ -113,8 +113,6 @@ class DataManagerConfig(InstantiateConfig):
"""Target class to instantiate."""
data: Optional[Path] = None
"""Source of data, may not be used by all models."""
- camera_optimizer: Optional[CameraOptimizerConfig] = None
- """Specifies the camera pose optimizer used during training. Helpful if poses are noisy."""
masks_on_gpu: bool = False
"""Process masks on GPU for speed at the expense of memory, if True."""
images_on_gpu: bool = False
@@ -335,9 +333,8 @@ class VanillaDataManagerConfig(DataManagerConfig):
eval_num_times_to_repeat_images: int = -1
"""When not evaluating on all images, number of iterations before picking
new images. If -1, never pick new images."""
- camera_optimizer: CameraOptimizerConfig = CameraOptimizerConfig()
- """Specifies the camera pose optimizer used during training. Helpful if poses are noisy, such as for data from
- Record3D."""
+ eval_image_indices: Optional[Tuple[int, ...]] = (0,)
+ """Specifies the image indices to use during eval; if None, uses all."""
collate_fn: Callable[[Any], Any] = cast(Any, staticmethod(nerfstudio_collate))
"""Specifies the collate function to use for the train and eval dataloaders."""
camera_res_scale_factor: float = 1.0
@@ -346,9 +343,21 @@ class VanillaDataManagerConfig(DataManagerConfig):
"""
patch_size: int = 1
"""Size of patch to sample from. If >1, patch-based sampling will be used."""
+ camera_optimizer: Optional[CameraOptimizerConfig] = field(default=None)
+ """Deprecated, has been moved to the model config."""
pixel_sampler: PixelSamplerConfig = PixelSamplerConfig()
"""Specifies the pixel sampler used to sample pixels from images."""
+ def __post_init__(self):
+ """Warn user of camera optimizer change."""
+ if self.camera_optimizer is not None:
+ import warnings
+
+ CONSOLE.print(
+ "\nCameraOptimizerConfig has been moved from the DataManager to the Model.\n", style="bold yellow"
+ )
+ warnings.warn("above message coming from", FutureWarning, stacklevel=3)
+
TDataset = TypeVar("TDataset", bound=InputDataset, default=InputDataset)
@@ -486,13 +495,7 @@ def setup_train(self):
)
self.iter_train_image_dataloader = iter(self.train_image_dataloader)
self.train_pixel_sampler = self._get_pixel_sampler(self.train_dataset, self.config.train_num_rays_per_batch)
- self.train_camera_optimizer = self.config.camera_optimizer.setup(
- num_cameras=self.train_dataset.cameras.size, device=self.device
- )
- self.train_ray_generator = RayGenerator(
- self.train_dataset.cameras.to(self.device),
- self.train_camera_optimizer,
- )
+ self.train_ray_generator = RayGenerator(self.train_dataset.cameras.to(self.device))
def setup_eval(self):
"""Sets up the data loader for evaluation"""
@@ -510,13 +513,7 @@ def setup_eval(self):
)
self.iter_eval_image_dataloader = iter(self.eval_image_dataloader)
self.eval_pixel_sampler = self._get_pixel_sampler(self.eval_dataset, self.config.eval_num_rays_per_batch)
- self.eval_camera_optimizer = self.config.camera_optimizer.setup(
- num_cameras=self.eval_dataset.cameras.size, device=self.device
- )
- self.eval_ray_generator = RayGenerator(
- self.eval_dataset.cameras.to(self.device),
- self.eval_camera_optimizer,
- )
+ self.eval_ray_generator = RayGenerator(self.eval_dataset.cameras.to(self.device))
# for loading full images
self.fixed_indices_eval_dataloader = FixedIndicesEvalDataloader(
input_dataset=self.eval_dataset,
@@ -572,13 +569,4 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]:
Returns:
A list of dictionaries containing the data manager's param groups.
"""
- param_groups = {}
-
- camera_opt_params = list(self.train_camera_optimizer.parameters())
- if self.config.camera_optimizer.mode != "off":
- assert len(camera_opt_params) > 0
- param_groups[self.config.camera_optimizer.param_group] = camera_opt_params
- else:
- assert len(camera_opt_params) == 0
-
- return param_groups
+ return {}
diff --git a/nerfstudio/data/datamanagers/parallel_datamanager.py b/nerfstudio/data/datamanagers/parallel_datamanager.py
new file mode 100644
index 0000000000..8d541d79ef
--- /dev/null
+++ b/nerfstudio/data/datamanagers/parallel_datamanager.py
@@ -0,0 +1,321 @@
+# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Parallel data manager that generates training data in multiple python processes.
+"""
+from __future__ import annotations
+
+import concurrent.futures
+import queue
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import (
+ Dict,
+ Generic,
+ List,
+ Literal,
+ Optional,
+ Tuple,
+ Type,
+ Union,
+)
+
+import torch
+import torch.multiprocessing as mp
+from rich.progress import track
+from torch.nn import Parameter
+
+from nerfstudio.cameras.cameras import CameraType
+from nerfstudio.cameras.rays import RayBundle
+from nerfstudio.data.datamanagers.base_datamanager import (
+ DataManager,
+ VanillaDataManagerConfig,
+ TDataset,
+ variable_res_collate,
+)
+from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs
+from nerfstudio.data.pixel_samplers import (
+ PixelSampler,
+ PixelSamplerConfig,
+ PatchPixelSamplerConfig,
+)
+from nerfstudio.data.utils.dataloaders import (
+ CacheDataloader,
+ FixedIndicesEvalDataloader,
+ RandIndicesEvalDataloader,
+)
+from nerfstudio.model_components.ray_generators import RayGenerator
+from nerfstudio.utils.rich_utils import CONSOLE
+
+
+@dataclass
+class ParallelDataManagerConfig(VanillaDataManagerConfig):
+ """Config for a `ParallelDataManager` which reads data in multiple processes"""
+
+ _target: Type = field(default_factory=lambda: ParallelDataManager)
+ """Target class to instantiate."""
+ num_processes: int = 1
+ """Number of processes to use for train data loading. More than 1 doesn't result in that much better performance"""
+ queue_size: int = 2
+ """Size of shared data queue containing generated ray bundles and batches.
+ If queue_size <= 0, the queue size is infinite."""
+ max_thread_workers: Optional[int] = None
+ """Maximum number of threads to use in thread pool executor. If None, use ThreadPool default."""
+
+
+class DataProcessor(mp.Process):
+ """Parallel dataset batch processor.
+
+ This class is responsible for generating ray bundles from an input dataset
+ in parallel python processes.
+
+ Args:
+ out_queue: the output queue for storing the processed data
+ config: configuration object for the parallel data manager
+ dataparser_outputs: outputs from the dataparser
+ dataset: input dataset
+ pixel_sampler: The pixel sampler for sampling rays
+ """
+
+ def __init__(
+ self,
+ out_queue: mp.Queue,
+ config: ParallelDataManagerConfig,
+ dataparser_outputs: DataparserOutputs,
+ dataset: TDataset,
+ pixel_sampler: PixelSampler,
+ ):
+ super().__init__()
+ self.daemon = True
+ self.out_queue = out_queue
+ self.config = config
+ self.dataparser_outputs = dataparser_outputs
+ self.dataset = dataset
+ self.exclude_batch_keys_from_device = self.dataset.exclude_batch_keys_from_device
+ self.pixel_sampler = pixel_sampler
+ self.ray_generator = RayGenerator(self.dataset.cameras)
+ self.cache_images()
+
+ def run(self):
+ """Append out queue in parallel with ray bundles and batches."""
+ while True:
+ batch = self.pixel_sampler.sample(self.img_data)
+ ray_indices = batch["indices"]
+ ray_bundle: RayBundle = self.ray_generator(ray_indices)
+ # check that GPUs are available
+ if torch.cuda.is_available():
+ ray_bundle = ray_bundle.pin_memory()
+ while True:
+ try:
+ self.out_queue.put_nowait((ray_bundle, batch))
+ break
+ except queue.Full:
+ time.sleep(0.0001)
+ except Exception:
+ CONSOLE.print_exception()
+ CONSOLE.print("[bold red]Error: Error occured in parallel datamanager queue.")
+
+ def cache_images(self):
+ """Caches all input images into a NxHxWx3 tensor."""
+ indices = range(len(self.dataset))
+ batch_list = []
+ results = []
+ with concurrent.futures.ThreadPoolExecutor(max_workers=self.config.max_thread_workers) as executor:
+ for idx in indices:
+ res = executor.submit(self.dataset.__getitem__, idx)
+ results.append(res)
+ for res in track(results, description="Loading data batch", transient=False):
+ batch_list.append(res.result())
+ self.img_data = self.config.collate_fn(batch_list)
+
+
+class ParallelDataManager(DataManager, Generic[TDataset]):
+ """Data manager implementation for parallel dataloading.
+
+ Args:
+ config: the DataManagerConfig used to instantiate class
+ """
+
+ def __init__(
+ self,
+ config: ParallelDataManagerConfig,
+ device: Union[torch.device, str] = "cpu",
+ test_mode: Literal["test", "val", "inference"] = "val",
+ world_size: int = 1,
+ local_rank: int = 0,
+ **kwargs,
+ ):
+ self.dataset_type: Type[TDataset] = kwargs.get("_dataset_type", getattr(TDataset, "__default__"))
+ self.config = config
+ self.device = device
+ self.world_size = world_size
+ self.local_rank = local_rank
+ self.test_mode = test_mode
+ self.test_split = "test" if test_mode in ["test", "inference"] else "val"
+ self.dataparser_config = self.config.dataparser
+ if self.config.data is not None:
+ self.config.dataparser.data = Path(self.config.data)
+ else:
+ self.config.data = self.config.dataparser.data
+ self.dataparser = self.dataparser_config.setup()
+ if test_mode == "inference":
+ self.dataparser.downscale_factor = 1 # Avoid opening images
+ self.includes_time = self.dataparser.includes_time
+ self.train_dataparser_outputs: DataparserOutputs = self.dataparser.get_dataparser_outputs(split="train")
+ self.eval_dataparser_outputs: DataparserOutputs = self.dataparser.get_dataparser_outputs(split=self.test_split)
+ cameras = self.train_dataparser_outputs.cameras
+ if len(cameras) > 1:
+ for i in range(1, len(cameras)):
+ if cameras[0].width != cameras[i].width or cameras[0].height != cameras[i].height:
+ CONSOLE.print("Variable resolution, using variable_res_collate")
+ self.config.collate_fn = variable_res_collate
+ break
+ self.train_dataset = self.create_train_dataset()
+ self.eval_dataset = self.create_eval_dataset()
+ self.exclude_batch_keys_from_device = self.train_dataset.exclude_batch_keys_from_device
+ # Spawn is critical for not freezing the program (PyTorch compatability issue)
+ # check if spawn is already set
+ if mp.get_start_method(allow_none=True) is None:
+ mp.set_start_method("spawn")
+ super().__init__()
+
+ def create_train_dataset(self) -> TDataset:
+ """Sets up the data loaders for training."""
+ return self.dataset_type(
+ dataparser_outputs=self.train_dataparser_outputs,
+ scale_factor=self.config.camera_res_scale_factor,
+ )
+
+ def create_eval_dataset(self) -> TDataset:
+ """Sets up the data loaders for evaluation."""
+ return self.dataset_type(
+ dataparser_outputs=self.dataparser.get_dataparser_outputs(split=self.test_split),
+ scale_factor=self.config.camera_res_scale_factor,
+ )
+
+ def _get_pixel_sampler(self, dataset: TDataset, num_rays_per_batch: int) -> PixelSampler:
+ """Infer pixel sampler to use."""
+ if self.config.patch_size > 1 and type(self.config.pixel_sampler) is PixelSamplerConfig:
+ return PatchPixelSamplerConfig().setup(
+ patch_size=self.config.patch_size, num_rays_per_batch=num_rays_per_batch
+ )
+ is_equirectangular = (dataset.cameras.camera_type == CameraType.EQUIRECTANGULAR.value).all()
+ if is_equirectangular.any():
+ CONSOLE.print("[bold yellow]Warning: Some cameras are equirectangular, but using default pixel sampler.")
+ return self.config.pixel_sampler.setup(
+ is_equirectangular=is_equirectangular, num_rays_per_batch=num_rays_per_batch
+ )
+
+ def setup_train(self):
+ """Sets up parallel python data processes for training."""
+ assert self.train_dataset is not None
+ self.train_pix_sampler = self._get_pixel_sampler(self.train_dataset, self.config.train_num_rays_per_batch) # type: ignore
+ self.data_queue = mp.Manager().Queue(maxsize=self.config.queue_size)
+ self.data_procs = [
+ DataProcessor(
+ out_queue=self.data_queue, # type: ignore
+ config=self.config,
+ dataparser_outputs=self.train_dataparser_outputs,
+ dataset=self.train_dataset,
+ pixel_sampler=self.train_pix_sampler,
+ )
+ for i in range(self.config.num_processes)
+ ]
+ for proc in self.data_procs:
+ proc.start()
+ print("Started threads")
+
+ # Prime the executor with the first batch
+ self.train_executor = concurrent.futures.ThreadPoolExecutor(max_workers=self.config.max_thread_workers)
+ self.train_batch_fut = self.train_executor.submit(self.data_queue.get)
+
+ def setup_eval(self):
+ """Sets up the data loader for evaluation."""
+ assert self.eval_dataset is not None
+ CONSOLE.print("Setting up evaluation dataset...")
+ self.eval_image_dataloader = CacheDataloader(
+ self.eval_dataset,
+ num_images_to_sample_from=self.config.eval_num_images_to_sample_from,
+ num_times_to_repeat_images=self.config.eval_num_times_to_repeat_images,
+ device=self.device,
+ num_workers=self.world_size * 4,
+ pin_memory=True,
+ collate_fn=self.config.collate_fn,
+ exclude_batch_keys_from_device=self.exclude_batch_keys_from_device,
+ )
+ self.iter_eval_image_dataloader = iter(self.eval_image_dataloader)
+ self.eval_pixel_sampler = self._get_pixel_sampler(self.eval_dataset, self.config.eval_num_rays_per_batch) # type: ignore
+ self.eval_ray_generator = RayGenerator(self.eval_dataset.cameras.to(self.device))
+ # for loading full images
+ self.fixed_indices_eval_dataloader = FixedIndicesEvalDataloader(
+ input_dataset=self.eval_dataset,
+ device=self.device,
+ num_workers=self.world_size * 4,
+ )
+ self.eval_dataloader = RandIndicesEvalDataloader(
+ input_dataset=self.eval_dataset,
+ device=self.device,
+ num_workers=self.world_size * 4,
+ )
+
+ def next_train(self, step: int) -> Tuple[RayBundle, Dict]:
+ """Returns the next batch of data from the parallel training processes."""
+ self.train_count += 1
+
+ # Fetch the next batch in an executor to parallelize the queue get() operation
+ # with the train step
+ bundle, batch = self.train_batch_fut.result()
+ self.train_batch_fut = self.train_executor.submit(self.data_queue.get)
+ ray_bundle = bundle.to(self.device)
+ return ray_bundle, batch
+
+ def next_eval(self, step: int) -> Tuple[RayBundle, Dict]:
+ """Returns the next batch of data from the eval dataloader."""
+ self.eval_count += 1
+ image_batch = next(self.iter_eval_image_dataloader)
+ assert self.eval_pixel_sampler is not None
+ assert isinstance(image_batch, dict)
+ batch = self.eval_pixel_sampler.sample(image_batch)
+ ray_indices = batch["indices"]
+ ray_bundle = self.eval_ray_generator(ray_indices)
+ return ray_bundle, batch
+
+ def next_eval_image(self, step: int) -> Tuple[int, RayBundle, Dict]:
+ """Retrieve the next eval image."""
+ for camera_ray_bundle, batch in self.eval_dataloader:
+ assert camera_ray_bundle.camera_indices is not None
+ image_idx = int(camera_ray_bundle.camera_indices[0, 0, 0])
+ return image_idx, camera_ray_bundle, batch
+ raise ValueError("No more eval images")
+
+ def get_train_rays_per_batch(self) -> int:
+ """Returns the number of rays per batch for training."""
+ return self.config.train_num_rays_per_batch
+
+ def get_eval_rays_per_batch(self) -> int:
+ """Returns the number of rays per batch for evaluation."""
+ return self.config.eval_num_rays_per_batch
+
+ def get_datapath(self) -> Path:
+ """Returns the path to the data. This is used to determine where to save camera paths."""
+ return self.config.dataparser.data
+
+ def get_param_groups(self) -> Dict[str, List[Parameter]]:
+ """Get the param groups for the data manager.
+ Returns:
+ A list of dictionaries containing the data manager's param groups.
+ """
+ return {}
diff --git a/nerfstudio/data/pixel_samplers.py b/nerfstudio/data/pixel_samplers.py
index d7e76806d8..9234a5d420 100644
--- a/nerfstudio/data/pixel_samplers.py
+++ b/nerfstudio/data/pixel_samplers.py
@@ -98,7 +98,7 @@ def sample_method(
chosen_indices = random.sample(range(len(nonzero_indices)), k=batch_size)
indices = nonzero_indices[chosen_indices]
else:
- indices = torch.floor(
+ indices = (
torch.rand((batch_size, 3), device=device)
* torch.tensor([num_images, image_height, image_width], device=device)
).long()
@@ -172,13 +172,11 @@ def collate_image_dataset_batch(self, batch: Dict, num_rays_per_batch: int, keep
collated_batch = {
key: value[c, y, x] for key, value in batch.items() if key != "image_idx" and value is not None
}
-
assert collated_batch["image"].shape[0] == num_rays_per_batch
# Needed to correct the random indices to their actual camera idx locations.
indices[:, 0] = batch["image_idx"][c]
collated_batch["indices"] = indices # with the abs camera indices
-
if keep_full_image:
collated_batch["full_image"] = batch["image"]
diff --git a/nerfstudio/engine/optimizers.py b/nerfstudio/engine/optimizers.py
index b80b77bcb5..9b8d48dba3 100644
--- a/nerfstudio/engine/optimizers.py
+++ b/nerfstudio/engine/optimizers.py
@@ -84,6 +84,25 @@ def __init__(self, config: Dict[str, Any], param_groups: Dict[str, List[Paramete
self.schedulers = {}
self.parameters = {}
for param_group_name, params in param_groups.items():
+ # For deprecation, catch the camera_opt param group and fix it nicely
+ if param_group_name == "camera_opt" and "camera_opt" not in config:
+ from nerfstudio.engine.schedulers import ExponentialDecaySchedulerConfig
+ from nerfstudio.utils.rich_utils import CONSOLE
+
+ CONSOLE.print(
+ "\nThe 'camera_opt' param group should be assigned an optimizer in the config. Assigning default optimizers for now. This will be removed in a future release.\n",
+ style="bold yellow",
+ )
+
+ config["camera_opt"] = {
+ "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15),
+ "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=30000),
+ }
+ # Print some nice warning messages if the user forgot to specify an optimizer
+ if param_group_name not in config:
+ raise RuntimeError(
+ f"""Optimizer config for '{param_group_name}' not found in config file. Make sure you specify an optimizer for each parameter group. Provided configs were: {config.keys()}"""
+ )
lr_init = config[param_group_name]["optimizer"].lr
self.optimizers[param_group_name] = config[param_group_name]["optimizer"].setup(params=params)
self.parameters[param_group_name] = params
diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py
index b4ccba9eea..fe4aa85cea 100644
--- a/nerfstudio/engine/trainer.py
+++ b/nerfstudio/engine/trainer.py
@@ -219,13 +219,6 @@ def setup_optimizers(self) -> Optimizers:
"""
optimizer_config = self.config.optimizers.copy()
param_groups = self.pipeline.get_param_groups()
- camera_optimizer_config = self.config.pipeline.datamanager.camera_optimizer
- if camera_optimizer_config is not None and camera_optimizer_config.mode != "off":
- assert camera_optimizer_config.param_group not in optimizer_config
- optimizer_config[camera_optimizer_config.param_group] = {
- "optimizer": camera_optimizer_config.optimizer,
- "scheduler": camera_optimizer_config.scheduler,
- }
return Optimizers(optimizer_config, param_groups)
def train(self) -> None:
diff --git a/nerfstudio/fields/nerfacto_field.py b/nerfstudio/fields/nerfacto_field.py
index 34c6346fcf..f215a00458 100644
--- a/nerfstudio/fields/nerfacto_field.py
+++ b/nerfstudio/fields/nerfacto_field.py
@@ -116,6 +116,7 @@ def __init__(
self.use_pred_normals = use_pred_normals
self.pass_semantic_gradients = pass_semantic_gradients
self.base_res = base_res
+ self.step = 0
self.direction_encoding = SHEncoding(
levels=4,
diff --git a/nerfstudio/model_components/ray_generators.py b/nerfstudio/model_components/ray_generators.py
index 49754548dc..bad32258d0 100644
--- a/nerfstudio/model_components/ray_generators.py
+++ b/nerfstudio/model_components/ray_generators.py
@@ -18,7 +18,6 @@
from jaxtyping import Int
from torch import Tensor, nn
-from nerfstudio.cameras.camera_optimizers import CameraOptimizer
from nerfstudio.cameras.cameras import Cameras
from nerfstudio.cameras.rays import RayBundle
@@ -34,10 +33,9 @@ class RayGenerator(nn.Module):
image_coords: Tensor
- def __init__(self, cameras: Cameras, pose_optimizer: CameraOptimizer) -> None:
+ def __init__(self, cameras: Cameras) -> None:
super().__init__()
self.cameras = cameras
- self.pose_optimizer = pose_optimizer
self.register_buffer("image_coords", cameras.get_image_coords(), persistent=False)
def forward(self, ray_indices: Int[Tensor, "num_rays 3"]) -> RayBundle:
@@ -51,11 +49,8 @@ def forward(self, ray_indices: Int[Tensor, "num_rays 3"]) -> RayBundle:
x = ray_indices[:, 2] # col indices
coords = self.image_coords[y, x]
- camera_opt_to_camera = self.pose_optimizer(c)
-
ray_bundle = self.cameras.generate_rays(
camera_indices=c.unsqueeze(-1),
coords=coords,
- camera_opt_to_camera=camera_opt_to_camera,
)
return ray_bundle
diff --git a/nerfstudio/models/nerfacto.py b/nerfstudio/models/nerfacto.py
index 1122d5c203..df8eed9b65 100644
--- a/nerfstudio/models/nerfacto.py
+++ b/nerfstudio/models/nerfacto.py
@@ -28,6 +28,7 @@
from torchmetrics.image import PeakSignalNoiseRatio
from torchmetrics.image.lpip import LearnedPerceptualImagePatchSimilarity
+from nerfstudio.cameras.camera_optimizers import CameraOptimizer, CameraOptimizerConfig
from nerfstudio.cameras.rays import RayBundle, RaySamples
from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes, TrainingCallbackLocation
from nerfstudio.field_components.field_heads import FieldHeadNames
@@ -126,6 +127,8 @@ class NerfactoModelConfig(ModelConfig):
"""Which implementation to use for the model."""
appearance_embed_dim: int = 32
"""Dimension of the appearance embedding."""
+ camera_optimizer: CameraOptimizerConfig = CameraOptimizerConfig(mode="SO3xR3")
+ """Config of the camera optimizer to use"""
class NerfactoModel(Model):
@@ -165,6 +168,9 @@ def populate_modules(self):
implementation=self.config.implementation,
)
+ self.camera_optimizer: CameraOptimizer = self.config.camera_optimizer.setup(
+ num_cameras=self.num_train_data, device="cpu"
+ )
self.density_fns = []
num_prop_nets = self.config.num_proposal_iterations
# Build the proposal network(s)
@@ -229,7 +235,7 @@ def update_schedule(step):
# losses
self.rgb_loss = MSELoss()
-
+ self.step = 0
# metrics
self.psnr = PeakSignalNoiseRatio(data_range=1.0)
self.ssim = structural_similarity_index_measure
@@ -240,6 +246,7 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]:
param_groups = {}
param_groups["proposal_networks"] = list(self.proposal_networks.parameters())
param_groups["fields"] = list(self.field.parameters())
+ self.camera_optimizer.get_param_groups(param_groups=param_groups)
return param_groups
def get_training_callbacks(
@@ -252,6 +259,7 @@ def get_training_callbacks(
def set_anneal(step):
# https://arxiv.org/pdf/2111.12077.pdf eq. 18
+ self.step = step
train_frac = np.clip(step / N, 0, 1)
self.step = step
@@ -278,6 +286,9 @@ def bias(x, b):
return callbacks
def get_outputs(self, ray_bundle: RayBundle):
+ # apply the camera optimizer pose tweaks
+ if self.training:
+ self.camera_optimizer.apply_to_raybundle(ray_bundle)
ray_samples: RaySamples
ray_samples, weights_list, ray_samples_list = self.proposal_sampler(ray_bundle, density_fns=self.density_fns)
field_outputs = self.field.forward(ray_samples, compute_normals=self.config.predict_normals)
@@ -324,7 +335,6 @@ def get_outputs(self, ray_bundle: RayBundle):
for i in range(self.config.num_proposal_iterations):
outputs[f"prop_depth_{i}"] = self.renderer_depth(weights=weights_list[i], ray_samples=ray_samples_list[i])
-
return outputs
def get_metrics_dict(self, outputs, batch):
@@ -336,6 +346,8 @@ def get_metrics_dict(self, outputs, batch):
if self.training:
metrics_dict["distortion"] = distortion_loss(outputs["weights_list"], outputs["ray_samples_list"])
+
+ self.camera_optimizer.get_metrics_dict(metrics_dict)
return metrics_dict
def get_loss_dict(self, outputs, batch, metrics_dict=None):
@@ -364,6 +376,8 @@ def get_loss_dict(self, outputs, batch, metrics_dict=None):
loss_dict["pred_normal_loss"] = self.config.pred_normal_loss_mult * torch.mean(
outputs["rendered_pred_normal_loss"]
)
+ # Add loss from camera optimizer
+ self.camera_optimizer.get_loss_dict(loss_dict)
return loss_dict
def get_image_metrics_and_images(
diff --git a/nerfstudio/models/tensorf.py b/nerfstudio/models/tensorf.py
index a0380d42e4..9cc9c7ccf4 100644
--- a/nerfstudio/models/tensorf.py
+++ b/nerfstudio/models/tensorf.py
@@ -53,6 +53,7 @@
from nerfstudio.model_components.scene_colliders import AABBBoxCollider
from nerfstudio.models.base_model import Model, ModelConfig
from nerfstudio.utils import colormaps, colors, misc
+from nerfstudio.cameras.camera_optimizers import CameraOptimizer, CameraOptimizerConfig
@dataclass
@@ -89,6 +90,8 @@ class TensoRFModelConfig(ModelConfig):
tensorf_encoding: Literal["triplane", "vm", "cp"] = "vm"
regularization: Literal["none", "l1", "tv"] = "l1"
"""Regularization method used in tensorf paper"""
+ camera_optimizer: CameraOptimizerConfig = CameraOptimizerConfig(mode="SO3xR3")
+ """Config of the camera optimizer to use"""
background_color: Literal["random", "last_sample", "black", "white"] = "white"
"""Whether to randomize the background color."""
@@ -256,6 +259,11 @@ def populate_modules(self):
if self.config.tensorf_encoding == "cp" and self.config.regularization == "tv":
raise RuntimeError("TV reg not supported for CP decomposition")
+ # (optional) camera optimizer
+ self.camera_optimizer: CameraOptimizer = self.config.camera_optimizer.setup(
+ num_cameras=self.num_train_data, device="cpu"
+ )
+
def get_param_groups(self) -> Dict[str, List[Parameter]]:
param_groups = {}
@@ -267,11 +275,14 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]:
param_groups["encodings"] = list(self.field.color_encoding.parameters()) + list(
self.field.density_encoding.parameters()
)
+ self.camera_optimizer.get_param_groups(param_groups=param_groups)
return param_groups
def get_outputs(self, ray_bundle: RayBundle):
# uniform sampling
+ if self.training:
+ self.camera_optimizer.apply_to_raybundle(ray_bundle)
ray_samples_uniform = self.sampler_uniform(ray_bundle)
dens = self.field.get_density(ray_samples_uniform)
weights = ray_samples_uniform.get_weights(dens)
@@ -334,6 +345,8 @@ def get_loss_dict(self, outputs, batch, metrics_dict=None) -> Dict[str, torch.Te
else:
raise ValueError(f"Regularization {self.config.regularization} not supported")
+ self.camera_optimizer.get_loss_dict(loss_dict)
+
loss_dict = misc.scale_dict(loss_dict, self.config.loss_coefficients)
return loss_dict
@@ -367,5 +380,7 @@ def get_image_metrics_and_images(
"ssim": float(ssim.item()),
"lpips": float(lpips.item()),
}
+ self.camera_optimizer.get_metrics_dict(metrics_dict)
+
images_dict = {"img": combined_rgb, "accumulation": acc, "depth": depth}
return metrics_dict, images_dict
diff --git a/nerfstudio/pipelines/base_pipeline.py b/nerfstudio/pipelines/base_pipeline.py
index 5e202b06c2..10aca0e70a 100644
--- a/nerfstudio/pipelines/base_pipeline.py
+++ b/nerfstudio/pipelines/base_pipeline.py
@@ -45,6 +45,7 @@
DataManagerConfig,
VanillaDataManager,
)
+from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManager
from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes
from nerfstudio.models.base_model import Model, ModelConfig
from nerfstudio.utils import profiler
@@ -298,18 +299,6 @@ def get_train_loss_dict(self, step: int):
ray_bundle, batch = self.datamanager.next_train(step)
model_outputs = self._model(ray_bundle) # train distributed data parallel model if world_size > 1
metrics_dict = self.model.get_metrics_dict(model_outputs, batch)
-
- if self.config.datamanager.camera_optimizer is not None:
- camera_opt_param_group = self.config.datamanager.camera_optimizer.param_group
- if camera_opt_param_group in self.datamanager.get_param_groups():
- # Report the camera optimization metrics
- metrics_dict["camera_opt_translation"] = (
- self.datamanager.get_param_groups()[camera_opt_param_group][0].data[:, :3].norm()
- )
- metrics_dict["camera_opt_rotation"] = (
- self.datamanager.get_param_groups()[camera_opt_param_group][0].data[:, 3:].norm()
- )
-
loss_dict = self.model.get_loss_dict(model_outputs, batch, metrics_dict)
return model_outputs, loss_dict, metrics_dict
@@ -372,7 +361,7 @@ def get_average_eval_image_metrics(
"""
self.eval()
metrics_dict_list = []
- assert isinstance(self.datamanager, VanillaDataManager)
+ assert isinstance(self.datamanager, (VanillaDataManager, ParallelDataManager))
num_images = len(self.datamanager.fixed_indices_eval_dataloader)
with Progress(
TextColumn("[progress.description]{task.description}"),
diff --git a/nerfstudio/utils/tensor_dataclass.py b/nerfstudio/utils/tensor_dataclass.py
index 0ebe7b5ced..a2b8d1dadb 100644
--- a/nerfstudio/utils/tensor_dataclass.py
+++ b/nerfstudio/utils/tensor_dataclass.py
@@ -261,6 +261,14 @@ def to(self: TensorDataclassT, device) -> TensorDataclassT:
"""
return self._apply_fn_to_fields(lambda x: x.to(device))
+ def pin_memory(self: TensorDataclassT) -> TensorDataclassT:
+ """Pins the tensor dataclass memory
+
+ Returns:
+ TensorDataclass: A new TensorDataclass with the same data but pinned.
+ """
+ return self._apply_fn_to_fields(lambda x: x.pin_memory())
+
def _apply_fn_to_fields(
self: TensorDataclassT,
fn: Callable,
From dba20fcb26d9256c4267b578f39b3614a6ffe669 Mon Sep 17 00:00:00 2001
From: Justin Kerr
Date: Tue, 10 Oct 2023 17:18:16 -0700
Subject: [PATCH 037/101] minor patch for exporting pcs and meshes with
parallel datamanager (#2503)
* minor patch for exporting pcs and meshes with parallel datamanager
* format
---
nerfstudio/data/datamanagers/parallel_datamanager.py | 11 +++++++++--
nerfstudio/scripts/exporter.py | 5 +++--
2 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/nerfstudio/data/datamanagers/parallel_datamanager.py b/nerfstudio/data/datamanagers/parallel_datamanager.py
index 8d541d79ef..7a2e5ac559 100644
--- a/nerfstudio/data/datamanagers/parallel_datamanager.py
+++ b/nerfstudio/data/datamanagers/parallel_datamanager.py
@@ -222,7 +222,7 @@ def _get_pixel_sampler(self, dataset: TDataset, num_rays_per_batch: int) -> Pixe
def setup_train(self):
"""Sets up parallel python data processes for training."""
assert self.train_dataset is not None
- self.train_pix_sampler = self._get_pixel_sampler(self.train_dataset, self.config.train_num_rays_per_batch) # type: ignore
+ self.train_pixel_sampler = self._get_pixel_sampler(self.train_dataset, self.config.train_num_rays_per_batch) # type: ignore
self.data_queue = mp.Manager().Queue(maxsize=self.config.queue_size)
self.data_procs = [
DataProcessor(
@@ -230,7 +230,7 @@ def setup_train(self):
config=self.config,
dataparser_outputs=self.train_dataparser_outputs,
dataset=self.train_dataset,
- pixel_sampler=self.train_pix_sampler,
+ pixel_sampler=self.train_pixel_sampler,
)
for i in range(self.config.num_processes)
]
@@ -319,3 +319,10 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]:
A list of dictionaries containing the data manager's param groups.
"""
return {}
+
+ def __del__(self):
+ """Clean up the parallel data processes."""
+ if hasattr(self, "data_procs"):
+ for proc in self.data_procs:
+ proc.terminate()
+ proc.join()
diff --git a/nerfstudio/scripts/exporter.py b/nerfstudio/scripts/exporter.py
index 2e4fc68cac..b104597abd 100644
--- a/nerfstudio/scripts/exporter.py
+++ b/nerfstudio/scripts/exporter.py
@@ -34,6 +34,7 @@
from nerfstudio.cameras.rays import RayBundle
from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager
+from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManager
from nerfstudio.data.scene_box import OrientedBox
from nerfstudio.exporter import texture_utils, tsdf_utils
from nerfstudio.exporter.exporter_utils import (
@@ -137,7 +138,7 @@ def main(self) -> None:
validate_pipeline(self.normal_method, self.normal_output_name, pipeline)
# Increase the batchsize to speed up the evaluation.
- assert isinstance(pipeline.datamanager, VanillaDataManager)
+ assert isinstance(pipeline.datamanager, (VanillaDataManager, ParallelDataManager))
assert pipeline.datamanager.train_pixel_sampler is not None
pipeline.datamanager.train_pixel_sampler.num_rays_per_batch = self.num_rays_per_batch
@@ -307,7 +308,7 @@ def main(self) -> None:
validate_pipeline(self.normal_method, self.normal_output_name, pipeline)
# Increase the batchsize to speed up the evaluation.
- assert isinstance(pipeline.datamanager, VanillaDataManager)
+ assert isinstance(pipeline.datamanager, (VanillaDataManager, ParallelDataManager))
assert pipeline.datamanager.train_pixel_sampler is not None
pipeline.datamanager.train_pixel_sampler.num_rays_per_batch = self.num_rays_per_batch
From 8e2a0651d0dc9bb9d6de5d4a934d0a195ad5032d Mon Sep 17 00:00:00 2001
From: Justin Kerr
Date: Tue, 10 Oct 2023 17:49:52 -0700
Subject: [PATCH 038/101] Fix jitter viewer (#2505)
---
nerfstudio/viewer_beta/control_panel.py | 6 +-----
pyproject.toml | 2 +-
2 files changed, 2 insertions(+), 6 deletions(-)
diff --git a/nerfstudio/viewer_beta/control_panel.py b/nerfstudio/viewer_beta/control_panel.py
index 07f556dd46..7161816941 100644
--- a/nerfstudio/viewer_beta/control_panel.py
+++ b/nerfstudio/viewer_beta/control_panel.py
@@ -269,11 +269,7 @@ def update_step(self, step):
Args:
step: the train step to set the model to
"""
- with self.viser_server.atomic(), self.stat_folder:
- # TODO change to a .value call instead of remove() and add, this makes it jittery
- with self.viser_server.atomic():
- self.markdown.remove()
- self.markdown = self.viser_server.add_gui_markdown(f"Step: {step}")
+ self.markdown.content = f"Step: {step}"
def update_output_options(self, new_options: List[str]):
"""
diff --git a/pyproject.toml b/pyproject.toml
index c041858ba3..fc2b0be03a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,7 +53,7 @@ dependencies = [
"torchvision>=0.14.1",
"torchmetrics[image]>=1.0.1",
"typing_extensions>=4.4.0",
- "viser==0.1.3",
+ "viser==0.1.6",
"nuscenes-devkit>=1.1.1",
"wandb>=0.13.3",
"xatlas",
From 42cabedc600571c6b0095bd967b801c21a2f48d8 Mon Sep 17 00:00:00 2001
From: Piero Toffanin
Date: Thu, 12 Oct 2023 23:08:45 -0400
Subject: [PATCH 039/101] Add ODM data processor (#2517)
* Add ODM data processor
* Fix ruff warnings
* Re-format with black
* Add annotations
* Add explicit dtype
* Add explicit dtype
* Revert "Add explicit dtype"
This reverts commit 6f3cde1671e920f4f38b390c808dcae69ab86a10.
---
README.md | 1 +
docs/quickstart/custom_dataset.md | 29 ++++++
nerfstudio/process_data/odm_utils.py | 148 +++++++++++++++++++++++++++
nerfstudio/scripts/process_data.py | 89 ++++++++++++++++
4 files changed, 267 insertions(+)
create mode 100644 nerfstudio/process_data/odm_utils.py
diff --git a/README.md b/README.md
index a853192ad4..3c7e827b17 100644
--- a/README.md
+++ b/README.md
@@ -250,6 +250,7 @@ Using an existing dataset is great, but likely you want to use your own data! We
| π± [Record3D](https://docs.nerf.studio/quickstart/custom_dataset.html#record3d-capture) | IOS with LiDAR | [Record3D app](https://record3d.app/) | π |
| π₯ [Metashape](https://docs.nerf.studio/quickstart/custom_dataset.html#metashape) | Any | [Metashape](https://www.agisoft.com/) | π |
| π₯ [RealityCapture](https://docs.nerf.studio/quickstart/custom_dataset.html#realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | π |
+| π₯ [ODM](https://docs.nerf.studio/quickstart/custom_dataset.html#ODM) | Any | [ODM](https://github.com/OpenDroneMap/ODM) | π |
| π [Custom](https://docs.nerf.studio/quickstart/data_conventions.html) | Any | Camera Poses | π |
## 5. Advanced Options
diff --git a/docs/quickstart/custom_dataset.md b/docs/quickstart/custom_dataset.md
index 723a97c91e..14a89e4a2d 100644
--- a/docs/quickstart/custom_dataset.md
+++ b/docs/quickstart/custom_dataset.md
@@ -21,6 +21,7 @@ We Currently support the following custom data types:
| π± [Record3D](record3d) | IOS with LiDAR | [Record3D app](https://record3d.app/) | π |
| π₯ [Metashape](metashape) | Any | [Metashape](https://www.agisoft.com/) | π |
| π₯ [RealityCapture](realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | π |
+| π₯ [ODM](odm) | Any | [ODM](https://github.com/OpenDroneMap/ODM) | π |
(images_and_video)=
@@ -319,6 +320,34 @@ ns-process-data realitycapture --data {data directory} --csv {csv file} --output
ns-train nerfacto --data {output directory}
```
+(odm)=
+
+## ODM
+
+All images/videos must be captured with the same camera.
+
+1. Process a dataset using [ODM](https://github.com/OpenDroneMap/ODM#quickstart)
+
+```bash
+$ ls /path/to/dataset
+images
+odm_report
+odm_orthophoto
+...
+```
+
+2. Convert to nerfstudio format.
+
+```bash
+ns-process-data odm --data /path/to/dataset --output-dir {output directory}
+```
+
+4. Train!
+
+```bash
+ns-train nerfacto --data {output directory}
+```
+
(360_data)=
## 360 Data (Equirectangular)
diff --git a/nerfstudio/process_data/odm_utils.py b/nerfstudio/process_data/odm_utils.py
new file mode 100644
index 0000000000..aa9b695410
--- /dev/null
+++ b/nerfstudio/process_data/odm_utils.py
@@ -0,0 +1,148 @@
+# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Helper utils for processing ODM data into the nerfstudio format."""
+
+import json
+from pathlib import Path
+from typing import Dict, List
+import os
+import sys
+import math
+
+import numpy as np
+
+from nerfstudio.process_data.process_data_utils import CAMERA_MODELS
+
+
+def rodrigues_vec_to_rotation_mat(rodrigues_vec: np.ndarray) -> np.ndarray:
+ theta = np.linalg.norm(rodrigues_vec)
+ if theta < sys.float_info.epsilon:
+ rotation_mat = np.eye(3, dtype=float)
+ else:
+ r = rodrigues_vec / theta
+ ident = np.eye(3, dtype=float)
+ r_rT = np.array(
+ [
+ [r[0] * r[0], r[0] * r[1], r[0] * r[2]],
+ [r[1] * r[0], r[1] * r[1], r[1] * r[2]],
+ [r[2] * r[0], r[2] * r[1], r[2] * r[2]],
+ ]
+ )
+ r_cross = np.array([[0, -r[2], r[1]], [r[2], 0, -r[0]], [-r[1], r[0], 0]], dtype=float)
+ rotation_mat = math.cos(theta) * ident + (1 - math.cos(theta)) * r_rT + math.sin(theta) * r_cross
+ return rotation_mat
+
+
+def cameras2nerfds(
+ image_filename_map: Dict[str, Path],
+ cameras_file: Path,
+ shots_file: Path,
+ output_dir: Path,
+ verbose: bool = False,
+) -> List[str]:
+ """Convert ODM cameras into a nerfstudio dataset.
+
+ Args:
+ image_filename_map: Mapping of original image filenames to their saved locations.
+ shots_file: Path to ODM's shots.geojson
+ output_dir: Path to the output directory.
+ verbose: Whether to print verbose output.
+
+ Returns:
+ Summary of the conversion.
+ """
+
+ with open(cameras_file, "r", encoding="utf-8") as f:
+ cameras = json.loads(f.read())
+ with open(shots_file, "r", encoding="utf-8") as f:
+ shots = json.loads(f.read())
+
+ camera_ids = list(cameras.keys())
+ if len(camera_ids) > 1:
+ raise ValueError("Only one camera is supported")
+ camera_id = camera_ids[0]
+ camera = cameras[camera_id]
+ data = {}
+ if camera["projection_type"] in ["brown", "perspective"]:
+ data["camera_model"] = CAMERA_MODELS["perspective"].value
+ elif camera["projection_type"] in ["fisheye", "fisheye_opencv"]:
+ data["camera_model"] = CAMERA_MODELS["fisheye"].value
+ elif camera["projection_type"] in ["spherical", "equirectangular"]:
+ data["camera_model"] = CAMERA_MODELS["equirectangular"].value
+ else:
+ raise ValueError("Unsupported ODM camera model: " + data["camera_model"])
+
+ sensor_dict = {}
+ s = {"w": int(camera["width"]), "h": int(camera["height"])}
+
+ s["fl_x"] = camera.get("focal_x", camera.get("focal")) * max(s["w"], s["h"])
+ s["fl_y"] = camera.get("focal_y", camera.get("focal")) * max(s["w"], s["h"])
+
+ s["cx"] = camera["c_x"] + (s["w"] - 1.0) / 2.0
+ s["cy"] = camera["c_y"] + (s["h"] - 1.0) / 2.0
+
+ for p in ["k1", "k2", "p1", "p2", "k3"]:
+ if p in camera:
+ s[p] = camera[p]
+
+ sensor_dict[camera_id] = s
+
+ shots = shots["features"]
+ shots_dict = {}
+ for shot in shots:
+ props = shot["properties"]
+ filename = props["filename"]
+ rotation = rodrigues_vec_to_rotation_mat(np.array(props["rotation"]) * -1)
+ translation = np.array(props["translation"])
+
+ m = np.eye(4)
+ m[:3, :3] = rotation
+ m[:3, 3] = translation
+
+ name, ext = os.path.splitext(filename)
+ shots_dict[name] = m
+
+ frames = []
+ num_skipped = 0
+
+ for fname in shots_dict:
+ transform = shots_dict[fname]
+ if fname not in image_filename_map:
+ num_skipped += 1
+ continue
+
+ frame = {}
+ frame["file_path"] = image_filename_map[fname].as_posix()
+ frame.update(sensor_dict[camera_id])
+
+ transform = transform[[2, 0, 1, 3], :]
+ transform[:, 1:3] *= -1
+ frame["transform_matrix"] = transform.tolist()
+ frames.append(frame)
+
+ data["frames"] = frames
+
+ with open(output_dir / "transforms.json", "w", encoding="utf-8") as f:
+ json.dump(data, f, indent=4)
+
+ summary = []
+ if num_skipped == 1:
+ summary.append(f"{num_skipped} image skipped because it was missing its camera pose.")
+ if num_skipped > 1:
+ summary.append(f"{num_skipped} images were skipped because they were missing camera poses.")
+
+ summary.append(f"Final dataset is {len(data['frames'])} frames.")
+
+ return summary
diff --git a/nerfstudio/scripts/process_data.py b/nerfstudio/scripts/process_data.py
index 8b4151f757..83cdcca473 100644
--- a/nerfstudio/scripts/process_data.py
+++ b/nerfstudio/scripts/process_data.py
@@ -32,6 +32,7 @@
process_data_utils,
realitycapture_utils,
record3d_utils,
+ odm_utils,
)
from nerfstudio.process_data.colmap_converter_to_nerfstudio_dataset import BaseConverterToNerfstudioDataset
from nerfstudio.process_data.images_to_nerfstudio_dataset import ImagesToNerfstudioDataset
@@ -387,6 +388,93 @@ def main(self) -> None:
CONSOLE.rule()
+@dataclass
+class ProcessODM(BaseConverterToNerfstudioDataset):
+ """Process ODM data into a nerfstudio dataset.
+
+ This script does the following:
+
+ 1. Scales images to a specified size.
+ 2. Converts ODM poses into the nerfstudio format.
+ """
+
+ num_downscales: int = 3
+ """Number of times to downscale the images. Downscales by 2 each time. For example a value of 3
+ will downscale the images by 2x, 4x, and 8x."""
+ max_dataset_size: int = 600
+ """Max number of images to train on. If the dataset has more, images will be sampled approximately evenly. If -1,
+ use all images."""
+
+ def main(self) -> None:
+ """Process images into a nerfstudio dataset."""
+
+ orig_images_dir = self.data / "images"
+ cameras_file = self.data / "cameras.json"
+ shots_file = self.data / "odm_report" / "shots.geojson"
+
+ if not shots_file.exists:
+ raise ValueError(f"shots file {shots_file} doesn't exist")
+ if not shots_file.exists:
+ raise ValueError(f"cameras file {cameras_file} doesn't exist")
+
+ if not orig_images_dir.exists:
+ raise ValueError(f"Images dir {orig_images_dir} doesn't exist")
+
+ if self.eval_data is not None:
+ raise ValueError("Cannot use eval_data since cameras were already aligned with ODM.")
+
+ self.output_dir.mkdir(parents=True, exist_ok=True)
+ image_dir = self.output_dir / "images"
+ image_dir.mkdir(parents=True, exist_ok=True)
+
+ summary_log = []
+
+ # Copy images to output directory
+ image_filenames, num_orig_images = process_data_utils.get_image_filenames(
+ orig_images_dir, self.max_dataset_size
+ )
+ copied_image_paths = process_data_utils.copy_images_list(
+ image_filenames,
+ image_dir=image_dir,
+ verbose=self.verbose,
+ num_downscales=self.num_downscales,
+ )
+ num_frames = len(copied_image_paths)
+
+ copied_image_paths = [Path("images/" + copied_image_path.name) for copied_image_path in copied_image_paths]
+ original_names = [image_path.stem for image_path in image_filenames]
+ image_filename_map = dict(zip(original_names, copied_image_paths))
+
+ if self.max_dataset_size > 0 and num_frames != num_orig_images:
+ summary_log.append(f"Started with {num_frames} images out of {num_orig_images} total")
+ summary_log.append(
+ "To change the size of the dataset add the argument [yellow]--max_dataset_size[/yellow] to "
+ f"larger than the current value ({self.max_dataset_size}), or -1 to use all images."
+ )
+ else:
+ summary_log.append(f"Started with {num_frames} images")
+
+ # Save json
+ if num_frames == 0:
+ CONSOLE.print("[bold red]No images found, exiting")
+ sys.exit(1)
+ summary_log.extend(
+ odm_utils.cameras2nerfds(
+ image_filename_map=image_filename_map,
+ cameras_file=cameras_file,
+ shots_file=shots_file,
+ output_dir=self.output_dir,
+ verbose=self.verbose,
+ )
+ )
+
+ CONSOLE.rule("[bold green]:tada: :tada: :tada: All DONE :tada: :tada: :tada:")
+
+ for summary in summary_log:
+ CONSOLE.print(summary, justify="center")
+ CONSOLE.rule()
+
+
Commands = Union[
Annotated[ImagesToNerfstudioDataset, tyro.conf.subcommand(name="images")],
Annotated[VideoToNerfstudioDataset, tyro.conf.subcommand(name="video")],
@@ -394,6 +482,7 @@ def main(self) -> None:
Annotated[ProcessMetashape, tyro.conf.subcommand(name="metashape")],
Annotated[ProcessRealityCapture, tyro.conf.subcommand(name="realitycapture")],
Annotated[ProcessRecord3D, tyro.conf.subcommand(name="record3d")],
+ Annotated[ProcessODM, tyro.conf.subcommand(name="odm")],
]
From d3934737a121296af8d236c2fa59e2c931a73444 Mon Sep 17 00:00:00 2001
From: Brent Yi
Date: Sat, 14 Oct 2023 13:00:17 +0800
Subject: [PATCH 040/101] Upgrade pyright (#2526)
* Upgrade pyright
* Resolve pyright errors
* Suppress trimesh error
* Run ruff
---
nerfstudio/exporter/marching_cubes.py | 2 +-
nerfstudio/process_data/hloc_utils.py | 29 ++++++++++++-----------
nerfstudio/scripts/render.py | 14 +++++++----
nerfstudio/utils/plotly_utils.py | 1 +
nerfstudio/viewer/server/control_panel.py | 2 +-
nerfstudio/viewer/server/viewer_state.py | 8 +++----
nerfstudio/viewer/viser/message_api.py | 4 ++--
nerfstudio/viewer_beta/control_panel.py | 9 +++----
nerfstudio/viewer_beta/viewer.py | 2 +-
pyproject.toml | 4 ++--
10 files changed, 42 insertions(+), 33 deletions(-)
diff --git a/nerfstudio/exporter/marching_cubes.py b/nerfstudio/exporter/marching_cubes.py
index a833075010..ee13422b1f 100644
--- a/nerfstudio/exporter/marching_cubes.py
+++ b/nerfstudio/exporter/marching_cubes.py
@@ -249,7 +249,7 @@ def evaluate(points: torch.Tensor) -> torch.Tensor:
)
verts = verts + np.array([x_min, y_min, z_min])
- meshcrop = trimesh.Trimesh(verts, faces, normals)
+ meshcrop = trimesh.Trimesh(verts, faces, normals) # type: ignore
meshes.append(meshcrop)
combined_mesh: trimesh.Trimesh = trimesh.util.concatenate(meshes) # type: ignore
diff --git a/nerfstudio/process_data/hloc_utils.py b/nerfstudio/process_data/hloc_utils.py
index 85049b5bf8..a833f51edd 100644
--- a/nerfstudio/process_data/hloc_utils.py
+++ b/nerfstudio/process_data/hloc_utils.py
@@ -29,7 +29,7 @@
try:
# TODO(1480) un-hide pycolmap import
import pycolmap
- from hloc import (
+ from hloc import ( # type: ignore
extract_features,
match_features,
pairs_from_exhaustive,
@@ -38,6 +38,7 @@
)
except ImportError:
_HAS_HLOC = False
+
else:
_HAS_HLOC = True
@@ -95,24 +96,24 @@ def run_hloc(
features = outputs / "features.h5"
matches = outputs / "matches.h5"
- retrieval_conf = extract_features.confs["netvlad"]
- feature_conf = extract_features.confs[feature_type]
- matcher_conf = match_features.confs[matcher_type]
+ retrieval_conf = extract_features.confs["netvlad"] # type: ignore
+ feature_conf = extract_features.confs[feature_type] # type: ignore
+ matcher_conf = match_features.confs[matcher_type] # type: ignore
references = [p.relative_to(image_dir).as_posix() for p in image_dir.iterdir()]
- extract_features.main(feature_conf, image_dir, image_list=references, feature_path=features)
+ extract_features.main(feature_conf, image_dir, image_list=references, feature_path=features) # type: ignore
if matching_method == "exhaustive":
- pairs_from_exhaustive.main(sfm_pairs, image_list=references)
+ pairs_from_exhaustive.main(sfm_pairs, image_list=references) # type: ignore
else:
- retrieval_path = extract_features.main(retrieval_conf, image_dir, outputs)
+ retrieval_path = extract_features.main(retrieval_conf, image_dir, outputs) # type: ignore
if num_matched >= len(references):
num_matched = len(references)
- pairs_from_retrieval.main(retrieval_path, sfm_pairs, num_matched=num_matched)
- match_features.main(matcher_conf, sfm_pairs, features=features, matches=matches)
+ pairs_from_retrieval.main(retrieval_path, sfm_pairs, num_matched=num_matched) # type: ignore
+ match_features.main(matcher_conf, sfm_pairs, features=features, matches=matches) # type: ignore
- image_options = pycolmap.ImageReaderOptions(camera_model=camera_model.value)
+ image_options = pycolmap.ImageReaderOptions(camera_model=camera_model.value) # type: ignore
if refine_pixsfm:
- sfm = PixSfM(
+ sfm = PixSfM( # type: ignore
conf={
"dense_features": {"use_cache": True},
"KA": {"dense_features": {"use_cache": True}, "max_kps_per_problem": 1000},
@@ -126,20 +127,20 @@ def run_hloc(
features,
matches,
image_list=references,
- camera_mode=pycolmap.CameraMode.SINGLE,
+ camera_mode=pycolmap.CameraMode.SINGLE, # type: ignore
image_options=image_options,
verbose=verbose,
)
print("Refined", refined.summary())
else:
- reconstruction.main(
+ reconstruction.main( # type: ignore
sfm_dir,
image_dir,
sfm_pairs,
features,
matches,
- camera_mode=pycolmap.CameraMode.SINGLE,
+ camera_mode=pycolmap.CameraMode.SINGLE, # type: ignore
image_options=image_options,
verbose=verbose,
)
diff --git a/nerfstudio/scripts/render.py b/nerfstudio/scripts/render.py
index 307bb5adba..5e7a52e015 100644
--- a/nerfstudio/scripts/render.py
+++ b/nerfstudio/scripts/render.py
@@ -18,12 +18,12 @@
"""
from __future__ import annotations
+import gzip
import json
import os
-import struct
import shutil
+import struct
import sys
-import gzip
from contextlib import ExitStack, contextmanager
from dataclasses import dataclass, field
from pathlib import Path
@@ -54,11 +54,14 @@
get_spiral_path,
)
from nerfstudio.cameras.cameras import Cameras, CameraType, RayBundle
+from nerfstudio.data.datamanagers.base_datamanager import (
+ VanillaDataManager,
+ VanillaDataManagerConfig,
+)
from nerfstudio.data.datasets.base_dataset import Dataset
-from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager, VanillaDataManagerConfig
+from nerfstudio.data.scene_box import OrientedBox
from nerfstudio.data.utils.dataloaders import FixedIndicesEvalDataloader
from nerfstudio.engine.trainer import TrainerConfig
-from nerfstudio.data.scene_box import OrientedBox
from nerfstudio.model_components import renderers
from nerfstudio.pipelines.base_pipeline import Pipeline
from nerfstudio.utils import colormaps, install_checks
@@ -318,6 +321,9 @@ def get_crop_from_json(camera_json: Dict[str, Any]) -> Optional[CropData]:
center = camera_json["crop"]["crop_center"]
scale = camera_json["crop"]["crop_scale"]
rot = (0.0, 0.0, 0.0) if "crop_rot" not in camera_json["crop"] else tuple(camera_json["crop"]["crop_rot"])
+ assert len(center) == 3
+ assert len(scale) == 3
+ assert len(rot) == 3
return CropData(
background_color=torch.Tensor([bg_color["r"] / 255.0, bg_color["g"] / 255.0, bg_color["b"] / 255.0]),
obb=OrientedBox.from_params(center, rot, scale),
diff --git a/nerfstudio/utils/plotly_utils.py b/nerfstudio/utils/plotly_utils.py
index 00cab47605..1ab7da1fb1 100644
--- a/nerfstudio/utils/plotly_utils.py
+++ b/nerfstudio/utils/plotly_utils.py
@@ -239,6 +239,7 @@ def get_cube(
if isinstance(side_length, float):
pts *= side_length / 2.0
else:
+ assert isinstance(side_length, torch.Tensor)
pts[0] *= side_length[0].item() / 2.0
pts[1] *= side_length[1].item() / 2.0
pts[2] *= side_length[2].item() / 2.0
diff --git a/nerfstudio/viewer/server/control_panel.py b/nerfstudio/viewer/server/control_panel.py
index 7039b61c3d..0f789a8ae3 100644
--- a/nerfstudio/viewer/server/control_panel.py
+++ b/nerfstudio/viewer/server/control_panel.py
@@ -196,7 +196,7 @@ def update_output_options(self, new_options: List[str]):
self._split_output_render.set_options(new_options)
self._split_output_render.value = new_options[-1]
- def add_element(self, e: ViewerElement, additional_tags: Tuple[str] = tuple()) -> None:
+ def add_element(self, e: ViewerElement, additional_tags: Tuple[str, ...] = tuple()) -> None:
"""Adds an element to the control panel
Args:
diff --git a/nerfstudio/viewer/server/viewer_state.py b/nerfstudio/viewer/server/viewer_state.py
index ad80822ce8..3bbce5e2d5 100644
--- a/nerfstudio/viewer/server/viewer_state.py
+++ b/nerfstudio/viewer/server/viewer_state.py
@@ -198,8 +198,8 @@ def _crop_params_update(self, _) -> None:
self.viser_server.send_crop_params(
crop_enabled=self.control_panel.crop_viewport,
crop_bg_color=self.control_panel.background_color,
- crop_scale=tuple(crop_scale.tolist()),
- crop_center=tuple(crop_center.tolist()),
+ crop_scale=tuple(crop_scale.tolist()), # type: ignore
+ crop_center=tuple(crop_center.tolist()), # type: ignore
)
if self.camera_message is not None:
self.render_statemachine.action(RenderAction("rerender", self.camera_message))
@@ -258,8 +258,8 @@ def _handle_crop_params_message(self, message: NerfstudioMessage) -> None:
scale = np.array(message.crop_scale)
crop_min = center - scale / 2.0
crop_max = center + scale / 2.0
- self.control_panel.crop_min = tuple(crop_min.tolist())
- self.control_panel.crop_max = tuple(crop_max.tolist())
+ self.control_panel.crop_min = tuple(crop_min.tolist()) # type: ignore
+ self.control_panel.crop_max = tuple(crop_max.tolist()) # type: ignore
def _handle_click_message(self, message: NerfstudioMessage) -> None:
"""Handle click message from viewer."""
diff --git a/nerfstudio/viewer/viser/message_api.py b/nerfstudio/viewer/viser/message_api.py
index 3c8ce752c9..bb6449cf8e 100644
--- a/nerfstudio/viewer/viser/message_api.py
+++ b/nerfstudio/viewer/viser/message_api.py
@@ -501,8 +501,8 @@ def update_scene_box(self, scene_box: SceneBox) -> None:
"""
self._queue(
messages.SceneBoxMessage(
- min=tuple(scene_box.aabb[0].tolist()),
- max=tuple(scene_box.aabb[1].tolist()),
+ min=tuple(scene_box.aabb[0].tolist()), # type: ignore
+ max=tuple(scene_box.aabb[1].tolist()), # type: ignore
)
)
diff --git a/nerfstudio/viewer_beta/control_panel.py b/nerfstudio/viewer_beta/control_panel.py
index 7161816941..a6fdef970d 100644
--- a/nerfstudio/viewer_beta/control_panel.py
+++ b/nerfstudio/viewer_beta/control_panel.py
@@ -19,6 +19,8 @@
import numpy as np
import torch
import viser.transforms as vtf
+from viser import ViserServer
+
from nerfstudio.data.scene_box import OrientedBox
from nerfstudio.utils.colormaps import ColormapOptions, Colormaps
from nerfstudio.viewer_beta.viewer_elements import ( # ViewerButtonGroup,
@@ -32,7 +34,6 @@
ViewerSlider,
ViewerVec3,
)
-from viser import ViserServer
class ControlPanel:
@@ -147,7 +148,7 @@ def __init__(
self._crop_handle = self.viser_server.add_transform_controls("Crop", depth_test=False, line_width=4.0)
def update_center(han):
- self._crop_handle.position = tuple(p * self.viser_scale_ratio for p in han.value)
+ self._crop_handle.position = tuple(p * self.viser_scale_ratio for p in han.value) # type: ignore
self._crop_center = ViewerVec3(
"Crop Center",
@@ -175,7 +176,7 @@ def update_rot(han):
@self._crop_handle.on_update
def _update_crop_handle(han):
pos = self._crop_handle.position
- self._crop_center.value = tuple(p / self.viser_scale_ratio for p in pos)
+ self._crop_center.value = tuple(p / self.viser_scale_ratio for p in pos) # type: ignore
rpy = vtf.SO3(self._crop_handle.wxyz).as_rpy_radians()
self._crop_rot.value = (float(rpy.roll), float(rpy.pitch), float(rpy.yaw))
@@ -280,7 +281,7 @@ def update_output_options(self, new_options: List[str]):
self._split_output_render.set_options(new_options)
self._split_output_render.value = new_options[-1]
- def add_element(self, e: ViewerElement, additional_tags: Tuple[str] = tuple()) -> None:
+ def add_element(self, e: ViewerElement, additional_tags: Tuple[str, ...] = tuple()) -> None:
"""Adds an element to the control panel
Args:
diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py
index d4fbf973b3..d122cf0946 100644
--- a/nerfstudio/viewer_beta/viewer.py
+++ b/nerfstudio/viewer_beta/viewer.py
@@ -326,7 +326,7 @@ def init_scene(
)
@camera_handle.on_click
- def _(event: viser.ClickEvent[viser.CameraFrustumHandle]) -> None:
+ def _(event: viser.SceneNodePointerEvent[viser.CameraFrustumHandle]) -> None:
assert self.client is not None
with self.client.atomic():
self.client.camera.position = event.target.position
diff --git a/pyproject.toml b/pyproject.toml
index fc2b0be03a..835a569daf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,7 +53,7 @@ dependencies = [
"torchvision>=0.14.1",
"torchmetrics[image]>=1.0.1",
"typing_extensions>=4.4.0",
- "viser==0.1.6",
+ "viser==0.1.7",
"nuscenes-devkit>=1.1.1",
"wandb>=0.13.3",
"xatlas",
@@ -90,7 +90,7 @@ dev = [
"diffusers==0.16.1",
"opencv-stubs==0.0.7",
"transformers==4.29.2",
- "pyright==1.1.308",
+ "pyright==1.1.331",
]
# Documentation related packages
From e9a3bae859d7e9e715f255b9854144f0dc9a5c85 Mon Sep 17 00:00:00 2001
From: Cyrus Vachha
Date: Fri, 13 Oct 2023 22:23:20 -0700
Subject: [PATCH 041/101] Update URLs for readme and image references (#2527)
* Update URLs for readme and image references
Update the image URLs and documentation links due to change in docs url structure. Files edited include readme, documentation, and legacy viewer files.
* Updating url in custom dataset
---
README.md | 30 +++++++++----------
docs/nerfology/methods/instant_ngp.md | 2 +-
docs/quickstart/custom_dataset.md | 4 +--
docs/reference/contributing.md | 2 +-
nerfstudio/configs/external_methods.py | 12 ++++----
.../viewer/app/src/modules/Banner/Banner.jsx | 2 +-
.../src/modules/LandingModal/LandingModal.jsx | 2 +-
.../ViewportControlsModal.jsx | 2 +-
8 files changed, 28 insertions(+), 28 deletions(-)
diff --git a/README.md b/README.md
index 3c7e827b17..20f041410b 100644
--- a/README.md
+++ b/README.md
@@ -23,10 +23,10 @@
-
-
+
+
-
+
@@ -34,7 +34,7 @@
A collaboration friendly studio for NeRFs
@@ -70,7 +70,7 @@ Nerfstudio initially launched as an opensource project by Berkeley students in [
We are committed to providing learning resources to help you understand the basics of (if you're just getting started), and keep up-to-date with (if you're a seasoned veteran) all things NeRF. As researchers, we know just how hard it is to get onboarded with this next-gen technology. So we're here to help with tutorials, documentation, and more!
-Have feature requests? Want to add your brand-spankin'-new NeRF model? Have a new dataset? **We welcome [contributions](https://docs.nerf.studio/en/latest/reference/contributing.html)!** Please do not hesitate to reach out to the nerfstudio team with any questions via [Discord](https://discord.gg/uMbNqcraFc).
+Have feature requests? Want to add your brand-spankin'-new NeRF model? Have a new dataset? **We welcome [contributions](https://docs.nerf.studio/reference/contributing.html)!** Please do not hesitate to reach out to the nerfstudio team with any questions via [Discord](https://discord.gg/uMbNqcraFc).
Have feedback? We'd love for you to fill out our [Nerfstudio Feedback Form](https://forms.gle/sqN5phJN7LfQVwnP9) if you want to let us know who you are, why you are interested in Nerfstudio, or provide any feedback!
@@ -118,7 +118,7 @@ You must have an NVIDIA video card with CUDA installed on the system. This libra
### Create environment
-Nerfstudio requires `python >= 3.8`. We recommend using conda to manage dependencies. Make sure to install [Conda](https://docs.conda.io/en/latest/miniconda.html) before proceeding.
+Nerfstudio requires `python >= 3.8`. We recommend using conda to manage dependencies. Make sure to install [Conda](https://docs.conda.io/miniconda.html) before proceeding.
```bash
conda create --name nerfstudio -y python=3.8
@@ -281,23 +281,23 @@ We support four different methods to track training progress, using the viewer[t
And that's it for getting started with the basics of nerfstudio.
-If you're interested in learning more on how to create your own pipelines, develop with the viewer, run benchmarks, and more, please check out some of the quicklinks below or visit our [documentation](https://docs.nerf.studio/en/latest/) directly.
+If you're interested in learning more on how to create your own pipelines, develop with the viewer, run benchmarks, and more, please check out some of the quicklinks below or visit our [documentation](https://docs.nerf.studio/) directly.
| Section | Description |
| -------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------- |
-| [Documentation](https://docs.nerf.studio/en/latest/) | Full API documentation and tutorials |
+| [Documentation](https://docs.nerf.studio/) | Full API documentation and tutorials |
| [Viewer](https://viewer.nerf.studio/) | Home page for our web viewer |
| π **Educational** |
-| [Model Descriptions](https://docs.nerf.studio/en/latest/nerfology/methods/index.html) | Description of all the models supported by nerfstudio and explanations of component parts. |
-| [Component Descriptions](https://docs.nerf.studio/en/latest/nerfology/model_components/index.html) | Interactive notebooks that explain notable/commonly used modules in various models. |
+| [Model Descriptions](https://docs.nerf.studio/nerfology/methods/index.html) | Description of all the models supported by nerfstudio and explanations of component parts. |
+| [Component Descriptions](https://docs.nerf.studio/nerfology/model_components/index.html) | Interactive notebooks that explain notable/commonly used modules in various models. |
| π **Tutorials** |
-| [Getting Started](https://docs.nerf.studio/en/latest/quickstart/installation.html) | A more in-depth guide on how to get started with nerfstudio from installation to contributing. |
-| [Using the Viewer](https://docs.nerf.studio/en/latest/quickstart/viewer_quickstart.html) | A quick demo video on how to navigate the viewer. |
+| [Getting Started](https://docs.nerf.studio/quickstart/installation.html) | A more in-depth guide on how to get started with nerfstudio from installation to contributing. |
+| [Using the Viewer](https://docs.nerf.studio/quickstart/viewer_quickstart.html) | A quick demo video on how to navigate the viewer. |
| [Using Record3D](https://www.youtube.com/watch?v=XwKq7qDQCQk) | Demo video on how to run nerfstudio without using COLMAP. |
| π» **For Developers** |
-| [Creating pipelines](https://docs.nerf.studio/en/latest/developer_guides/pipelines/index.html) | Learn how to easily build new neural rendering pipelines by using and/or implementing new modules. |
-| [Creating datasets](https://docs.nerf.studio/en/latest/quickstart/custom_dataset.html) | Have a new dataset? Learn how to run it with nerfstudio. |
-| [Contributing](https://docs.nerf.studio/en/latest/reference/contributing.html) | Walk-through for how you can start contributing now. |
+| [Creating pipelines](https://docs.nerf.studio/developer_guides/pipelines/index.html) | Learn how to easily build new neural rendering pipelines by using and/or implementing new modules. |
+| [Creating datasets](https://docs.nerf.studio/quickstart/custom_dataset.html) | Have a new dataset? Learn how to run it with nerfstudio. |
+| [Contributing](https://docs.nerf.studio/reference/contributing.html) | Walk-through for how you can start contributing now. |
| π **Community** |
| [Discord](https://discord.gg/uMbNqcraFc) | Join our community to discuss more. We would love to hear from you! |
| [Twitter](https://twitter.com/nerfstudioteam) | Follow us on Twitter @nerfstudioteam to see cool updates and announcements |
diff --git a/docs/nerfology/methods/instant_ngp.md b/docs/nerfology/methods/instant_ngp.md
index c7589e295c..5f5f7e07f1 100644
--- a/docs/nerfology/methods/instant_ngp.md
+++ b/docs/nerfology/methods/instant_ngp.md
@@ -34,7 +34,7 @@ Instant-NGP breaks NeRF training into 3 pillars and proposes improvements to eac
The core idea behind the improved sampling technique is that sampling over empty space should be skipped and sampling behind high density areas should also be skipped. This is achieved by maintaining a set of multiscale occupancy grids which coarsely mark empty and non-empty space. Occupancy is stored as a single bit, and a sample on a ray is skipped if its occupancy is too low. These occupancy grids are stored independently of the trainable encoding and are updated throughout training based on the updated density predictions. The authors find they can increase sampling speed by 10-100x compared to naive approaches.
-Nerfstudio uses [NerfAcc](https://www.nerfacc.com/en/latest/index.html) as the sampling algorithm implementation. The details on NerfAcc's sampling and occupancy grid is discussed [here](https://www.nerfacc.com/en/stable/methodology/sampling.html#occupancy-grid-estimator).
+Nerfstudio uses [NerfAcc](https://www.nerfacc.com/index.html) as the sampling algorithm implementation. The details on NerfAcc's sampling and occupancy grid is discussed [here](https://www.nerfacc.com/en/stable/methodology/sampling.html#occupancy-grid-estimator).
Another major bottleneck for NeRF's training speed has been querying the neural network. The authors of this work implement the network such that it runs entirely on a single CUDA kernel. The network is also shrunk down to be just 4 layers with 64 neurons in each layer. They show that their fully-fused neural network is 5-10x faster than a Tensorflow implementation.
diff --git a/docs/quickstart/custom_dataset.md b/docs/quickstart/custom_dataset.md
index 14a89e4a2d..46aaa907d6 100644
--- a/docs/quickstart/custom_dataset.md
+++ b/docs/quickstart/custom_dataset.md
@@ -406,7 +406,7 @@ This outputs two 180 deg equirectangular renders horizontally stacked, one for e
### Setup instructions
-To render for VR video it is essential to adjust the NeRF to have an approximately true-to-life real world scale (adjustable in the camera path) to ensure that the scene depth and IPD (distance between the eyes) is appropriate for the render to be viewable in VR. You can adjust the scene scale with the [Nerfstudio Blender Add-on](https://docs.nerf.studio/en/latest/extensions/blender_addon.html) by appropriately scaling a point cloud representation of the NeRF.
+To render for VR video it is essential to adjust the NeRF to have an approximately true-to-life real world scale (adjustable in the camera path) to ensure that the scene depth and IPD (distance between the eyes) is appropriate for the render to be viewable in VR. You can adjust the scene scale with the [Nerfstudio Blender Add-on](https://docs.nerf.studio/extensions/blender_addon.html) by appropriately scaling a point cloud representation of the NeRF.
Results may be unviewable if the scale is not set appropriately. The IPD is set at 64mm by default but only is accurate when the NeRF scene is true to scale.
For good quality renders, it is recommended to render at high resolutions (For ODS: 4096x2048 per eye, or 2048x1024 per eye. For VR180: 4096x4096 per eye or 2048x2048 per eye). Render resolutions for a single eye are specified in the camera path. For VR180, resolutions must be in a 1:1 aspect ratio. For ODS, resolutions must be in a 2:1 aspect ratio. The final stacked render output will automatically be constructed (with aspect ratios for VR180 as 2:1 and ODS as 1:1).
@@ -417,7 +417,7 @@ If you are rendering an image sequence, it is recommended to render as png inste
:::
To render with the VR videos camera:
-1. Use the [Nerfstudio Blender Add-on](https://docs.nerf.studio/en/latest/extensions/blender_addon.html) to set the scale of the NeRF scene and create the camera path
+1. Use the [Nerfstudio Blender Add-on](https://docs.nerf.studio/extensions/blender_addon.html) to set the scale of the NeRF scene and create the camera path
- Export a point cloud representation of the NeRF
- Import the point cloud representation in Blender and enable the Nerfstudio Blender Add-on
- Create a reference object such as a cube which may be 1x1x1 meter. You could also create a cylinder and scale it to an appropriate height of a viewer.
diff --git a/docs/reference/contributing.md b/docs/reference/contributing.md
index 529d154eb4..bf0af33db3 100644
--- a/docs/reference/contributing.md
+++ b/docs/reference/contributing.md
@@ -94,7 +94,7 @@ python nerfstudio/scripts/docs/build_docs.py
### Auto build
-As you change or add models/components, the auto-generated [Reference API](https://docs.nerf.studio/en/latest/reference/api/index.html) may change.
+As you change or add models/components, the auto-generated [Reference API](https://docs.nerf.studio/reference/api/index.html) may change.
If you want the code to build on save you can use [sphinx autobuild](https://github.com/executablebooks/sphinx-autobuild).
:::{admonition} Tip
diff --git a/nerfstudio/configs/external_methods.py b/nerfstudio/configs/external_methods.py
index 6a0c78b099..ce4e7bbc2b 100644
--- a/nerfstudio/configs/external_methods.py
+++ b/nerfstudio/configs/external_methods.py
@@ -45,7 +45,7 @@ class ExternalMethod:
external_methods.append(
ExternalMethod(
"""[bold yellow]Instruct-NeRF2NeRF[/bold yellow]
-For more information visit: https://docs.nerf.studio/en/latest/nerfology/methods/in2n.html
+For more information visit: https://docs.nerf.studio/nerfology/methods/in2n.html
To enable Instruct-NeRF2NeRF, you must install it first by running:
[grey]pip install git+https://github.com/ayaanzhaque/instruct-nerf2nerf[/grey]""",
@@ -62,7 +62,7 @@ class ExternalMethod:
external_methods.append(
ExternalMethod(
"""[bold yellow]K-Planes[/bold yellow]
-For more information visit https://docs.nerf.studio/en/latest/nerfology/methods/kplanes.html
+For more information visit https://docs.nerf.studio/nerfology/methods/kplanes.html
To enable K-Planes, you must install it first by running:
[grey]pip install kplanes-nerfstudio[/grey]""",
@@ -78,7 +78,7 @@ class ExternalMethod:
external_methods.append(
ExternalMethod(
"""[bold yellow]LERF[/bold yellow]
-For more information visit: https://docs.nerf.studio/en/latest/nerfology/methods/lerf.html
+For more information visit: https://docs.nerf.studio/nerfology/methods/lerf.html
To enable LERF, you must install it first by running:
[grey]pip install git+https://github.com/kerrj/lerf[/grey]""",
@@ -95,7 +95,7 @@ class ExternalMethod:
external_methods.append(
ExternalMethod(
"""[bold yellow]Tetra-NeRF[/bold yellow]
-For more information visit: https://docs.nerf.studio/en/latest/nerfology/methods/tetranerf.html
+For more information visit: https://docs.nerf.studio/nerfology/methods/tetranerf.html
To enable Tetra-NeRF, you must install it first. Please follow the instructions here:
https://github.com/jkulhanek/tetra-nerf/blob/master/README.md#installation""",
@@ -110,7 +110,7 @@ class ExternalMethod:
external_methods.append(
ExternalMethod(
"""[bold yellow]NeRFPlayer[/bold yellow]
-For more information visit: https://docs.nerf.studio/en/latest/nerfology/methods/nerfplayer.html
+For more information visit: https://docs.nerf.studio/nerfology/methods/nerfplayer.html
To enable NeRFPlayer, you must install it first by running:
[grey]pip install git+https://github.com/lsongx/nerfplayer-nerfstudio[/grey]""",
@@ -125,7 +125,7 @@ class ExternalMethod:
external_methods.append(
ExternalMethod(
"""[bold yellow]Volinga[/bold yellow]
-For more information visit: https://docs.nerf.studio/en/latest/extensions/unreal_engine.html
+For more information visit: https://docs.nerf.studio/extensions/unreal_engine.html
To enable Volinga, you must install it first by running:
[grey]pip install git+https://github.com/Volinga/volinga-model[/grey]""",
diff --git a/nerfstudio/viewer/app/src/modules/Banner/Banner.jsx b/nerfstudio/viewer/app/src/modules/Banner/Banner.jsx
index baedf714d6..e2156606a6 100644
--- a/nerfstudio/viewer/app/src/modules/Banner/Banner.jsx
+++ b/nerfstudio/viewer/app/src/modules/Banner/Banner.jsx
@@ -62,7 +62,7 @@ export default function Banner() {
From 5f5562b191997e1c09216e1980a788bfdb1bd956 Mon Sep 17 00:00:00 2001
From: AdamRashid96 <71362382+AdamRashid96@users.noreply.github.com>
Date: Mon, 13 Nov 2023 10:23:05 -0800
Subject: [PATCH 064/101] Render nearest training view (#2384)
* render nearest camera to render view
* added lpips
* added the rotation distance and the depth check for the cameras
* render nearest training img
* formatting
* add flag for occlusions
* remove commented import
* remove comments
* fix pyright test
* remove None
* add assert
* fix pyright tests
* format
* made it a flag render-nearest-camera
* added default cams and dataset for tests
* maintain resolution of training images
---
nerfstudio/scripts/render.py | 93 ++++++++++++++++++++++++++++++++++++
1 file changed, 93 insertions(+)
diff --git a/nerfstudio/scripts/render.py b/nerfstudio/scripts/render.py
index 5e7a52e015..d9d815ed71 100644
--- a/nerfstudio/scripts/render.py
+++ b/nerfstudio/scripts/render.py
@@ -48,11 +48,14 @@
from torch import Tensor
from typing_extensions import Annotated
+import viser.transforms as tf
+
from nerfstudio.cameras.camera_paths import (
get_interpolated_camera_path,
get_path_from_json,
get_spiral_path,
)
+
from nerfstudio.cameras.cameras import Cameras, CameraType, RayBundle
from nerfstudio.data.datamanagers.base_datamanager import (
VanillaDataManager,
@@ -84,6 +87,8 @@ def _render_trajectory_video(
depth_near_plane: Optional[float] = None,
depth_far_plane: Optional[float] = None,
colormap_options: colormaps.ColormapOptions = colormaps.ColormapOptions(),
+ render_nearest_camera=False,
+ check_occlusions: bool = False,
) -> None:
"""Helper function to create a video of the spiral trajectory.
@@ -99,6 +104,8 @@ def _render_trajectory_video(
depth_near_plane: Closest depth to consider when using the colormap for depth. If None, use min value.
depth_far_plane: Furthest depth to consider when using the colormap for depth. If None, use max value.
colormap_options: Options for colormap.
+ render_nearest_camera: Whether to render the nearest training camera to the rendered camera.
+ check_occlusions: If true, checks line-of-sight occlusions when computing camera distance and rejects cameras not visible to each other
"""
CONSOLE.print("[bold green]Creating trajectory " + output_format)
cameras.rescale_output_resolution(rendered_resolution_scaling_factor)
@@ -132,6 +139,14 @@ def _render_trajectory_video(
with ExitStack() as stack:
writer = None
+ if render_nearest_camera:
+ assert pipeline.datamanager.train_dataset is not None
+ train_dataset = pipeline.datamanager.train_dataset
+ train_cameras = train_dataset.cameras.to(pipeline.device)
+ else:
+ train_dataset = None
+ train_cameras = None
+
with progress:
for camera_idx in progress.track(range(cameras.size), description=""):
obb_box = None
@@ -139,6 +154,50 @@ def _render_trajectory_video(
obb_box = crop_data.obb
camera_ray_bundle = cameras.generate_rays(camera_indices=camera_idx, obb_box=obb_box)
+ max_dist, max_idx = -1, -1
+ true_max_dist, true_max_idx = -1, -1
+
+ if render_nearest_camera:
+ assert pipeline.datamanager.train_dataset is not None
+ assert train_dataset is not None
+ assert train_cameras is not None
+ cam_pos = cameras[camera_idx].camera_to_worlds[:, 3].cpu()
+ cam_quat = tf.SO3.from_matrix(cameras[camera_idx].camera_to_worlds[:3, :3].numpy(force=True)).wxyz
+
+ for i in range(len(train_cameras)):
+ train_cam_pos = train_cameras[i].camera_to_worlds[:, 3].cpu()
+ # Make sure the line of sight from rendered cam to training cam is not blocked by any object
+ bundle = RayBundle(
+ origins=cam_pos.view(1, 3),
+ directions=((cam_pos - train_cam_pos) / (cam_pos - train_cam_pos).norm()).view(1, 3),
+ pixel_area=torch.tensor(1).view(1, 1),
+ nears=torch.tensor(0.05).view(1, 1),
+ fars=torch.tensor(100).view(1, 1),
+ camera_indices=torch.tensor(0).view(1, 1),
+ metadata={},
+ ).to(pipeline.device)
+ outputs = pipeline.model.get_outputs(bundle)
+
+ q = tf.SO3.from_matrix(train_cameras[i].camera_to_worlds[:3, :3].numpy(force=True)).wxyz
+ # calculate distance between two quaternions
+ rot_dist = 1 - np.dot(q, cam_quat) ** 2
+ pos_dist = torch.norm(train_cam_pos - cam_pos)
+ dist = 0.3 * rot_dist + 0.7 * pos_dist
+
+ if true_max_dist == -1 or dist < true_max_dist:
+ true_max_dist = dist
+ true_max_idx = i
+
+ if outputs["depth"][0] < torch.norm(cam_pos - train_cam_pos).item():
+ continue
+
+ if check_occlusions and (max_dist == -1 or dist < max_dist):
+ max_dist = dist
+ max_idx = i
+
+ if max_idx == -1:
+ max_idx = true_max_idx
+
if crop_data is not None:
with renderers.background_color_override_context(
crop_data.background_color.to(pipeline.device)
@@ -181,6 +240,28 @@ def _render_trajectory_video(
.numpy()
)
render_image.append(output_image)
+
+ # Add closest training image to the right of the rendered image
+ if render_nearest_camera:
+ assert train_dataset is not None
+ assert train_cameras is not None
+ img = train_dataset.get_image(max_idx)
+ height = cameras.image_height[0]
+ # maintain the resolution of the img to calculate the width from the height
+ width = int(img.shape[1] * (height / img.shape[0]))
+ resized_image = torch.nn.functional.interpolate(
+ img.permute(2, 0, 1)[None], size=(int(height), int(width))
+ )[0].permute(1, 2, 0)
+ resized_image = (
+ colormaps.apply_colormap(
+ image=resized_image,
+ colormap_options=colormap_options,
+ )
+ .cpu()
+ .numpy()
+ )
+ render_image.append(resized_image)
+
render_image = np.concatenate(render_image, axis=1)
if output_format == "images":
if image_format == "png":
@@ -354,6 +435,10 @@ class BaseRender:
"""Furthest depth to consider when using the colormap for depth. If None, use max value."""
colormap_options: colormaps.ColormapOptions = colormaps.ColormapOptions()
"""Colormap options."""
+ render_nearest_camera: bool = False
+ """Whether to render the nearest training camera to the rendered camera."""
+ check_occlusions: bool = False
+ """If true, checks line-of-sight occlusions when computing camera distance and rejects cameras not visible to each other"""
@dataclass
@@ -418,6 +503,8 @@ def main(self) -> None:
depth_near_plane=self.depth_near_plane,
depth_far_plane=self.depth_far_plane,
colormap_options=self.colormap_options,
+ render_nearest_camera=self.render_nearest_camera,
+ check_occlusions=self.check_occlusions,
)
if (
@@ -451,6 +538,8 @@ def main(self) -> None:
depth_near_plane=self.depth_near_plane,
depth_far_plane=self.depth_far_plane,
colormap_options=self.colormap_options,
+ render_nearest_camera=self.render_nearest_camera,
+ check_occlusions=self.check_occlusions,
)
self.output_path = Path(str(left_eye_path.parent)[:-5] + ".mp4")
@@ -549,6 +638,8 @@ def main(self) -> None:
depth_near_plane=self.depth_near_plane,
depth_far_plane=self.depth_far_plane,
colormap_options=self.colormap_options,
+ render_nearest_camera=self.render_nearest_camera,
+ check_occlusions=self.check_occlusions,
)
@@ -592,6 +683,8 @@ def main(self) -> None:
depth_near_plane=self.depth_near_plane,
depth_far_plane=self.depth_far_plane,
colormap_options=self.colormap_options,
+ render_nearest_camera=self.render_nearest_camera,
+ check_occlusions=self.check_occlusions,
)
From 1f5f0c792b5d9bb2753fe3c0ad9e9c7e25337c66 Mon Sep 17 00:00:00 2001
From: Brent Yi
Date: Tue, 14 Nov 2023 00:00:21 -0800
Subject: [PATCH 065/101] Make spherical harmonics test more forgiving (#2607)
---
tests/utils/test_math.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/utils/test_math.py b/tests/utils/test_math.py
index c0e1edc433..952c6a1be5 100644
--- a/tests/utils/test_math.py
+++ b/tests/utils/test_math.py
@@ -13,4 +13,4 @@ def test_spherical_harmonics(components):
dx = dx / torch.linalg.norm(dx, dim=-1, keepdim=True)
sh = components_from_spherical_harmonics(components, dx)
matrix = (sh.T @ sh) / N * 4 * torch.pi
- torch.testing.assert_close(matrix, torch.eye(components**2), rtol=0, atol=1e-2)
+ torch.testing.assert_close(matrix, torch.eye(components**2), rtol=0, atol=1.5e-2)
From b8736f704fcfdc7c1169c9a7f9956cdf6c817017 Mon Sep 17 00:00:00 2001
From: Chris Heinrich
Date: Tue, 14 Nov 2023 00:14:17 -0800
Subject: [PATCH 066/101] Update LICENSE (#2597)
Filling in the license to what I think it should be. We're adding this to our list of third party libraries
---
LICENSE | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/LICENSE b/LICENSE
index 261eeb9e9f..ee561dfbe4 100644
--- a/LICENSE
+++ b/LICENSE
@@ -186,7 +186,7 @@
same "printed page" as the copyright notice for easier
identification within third-party archives.
- Copyright [yyyy] [name of copyright owner]
+ Copyright 2023 The Nerfstudio Team
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
From be37bcddea3df1206c054a30441dad811ff41b0d Mon Sep 17 00:00:00 2001
From: emma <57429431+emmaguo13@users.noreply.github.com>
Date: Tue, 14 Nov 2023 00:21:03 -0800
Subject: [PATCH 067/101] Remove an outdated comment in RayGenerator class
(#2606)
clean up comment
---
nerfstudio/model_components/ray_generators.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/nerfstudio/model_components/ray_generators.py b/nerfstudio/model_components/ray_generators.py
index bad32258d0..fab9e39bba 100644
--- a/nerfstudio/model_components/ray_generators.py
+++ b/nerfstudio/model_components/ray_generators.py
@@ -28,7 +28,6 @@ class RayGenerator(nn.Module):
Args:
cameras: Camera objects containing camera info.
- pose_optimizer: pose optimization module, for optimizing noisy camera intrinsics/extrinsics.
"""
image_coords: Tensor
From 9528a3f717fe28a6c6500199b5b37d4f2c6e10ca Mon Sep 17 00:00:00 2001
From: Rohan Mathur
Date: Wed, 15 Nov 2023 20:09:48 -0800
Subject: [PATCH 068/101] Added UI to load camera path from existing path for
the dataset (#2530)
* added load camera path
* bugfixes
* fixed fov value in load camera path
---------
Co-authored-by: Justin Kerr
---
nerfstudio/viewer_beta/render_panel.py | 72 ++++++++++++++++++++++++--
1 file changed, 68 insertions(+), 4 deletions(-)
diff --git a/nerfstudio/viewer_beta/render_panel.py b/nerfstudio/viewer_beta/render_panel.py
index 53966fe74e..36e53ccbde 100644
--- a/nerfstudio/viewer_beta/render_panel.py
+++ b/nerfstudio/viewer_beta/render_panel.py
@@ -13,21 +13,24 @@
# limitations under the License.
from __future__ import annotations
-from pathlib import Path
+
import colorsys
import dataclasses
+import datetime
+import json
import threading
import time
+from pathlib import Path
from typing import Dict, List, Optional, Tuple
-import datetime
-from nerfstudio.viewer_beta.control_panel import ControlPanel
+
import numpy as onp
import splines
import splines.quaternion
import viser
-import json
import viser.transforms as tf
+from nerfstudio.viewer_beta.control_panel import ControlPanel
+
@dataclasses.dataclass
class Keyframe:
@@ -541,6 +544,67 @@ def _(_) -> None:
play_button.visible = True
pause_button.visible = False
+ # add button for loading existing path
+ load_camera_path_button = server.add_gui_button(
+ "Load Path", icon=viser.Icon.FOLDER_OPEN, hint="Load an existing camera path."
+ )
+
+ @load_camera_path_button.on_click
+ def _(event: viser.GuiEvent) -> None:
+ assert event.client is not None
+ camera_path_dir = datapath / "camera_paths"
+ camera_path_dir.mkdir(parents=True, exist_ok=True)
+ preexisting_camera_paths = list(camera_path_dir.glob("*.json"))
+ preexisting_camera_filenames = [p.name for p in preexisting_camera_paths]
+
+ with event.client.add_gui_modal("Load Path") as modal:
+ if len(preexisting_camera_filenames) == 0:
+ event.client.add_gui_markdown("No existing paths found")
+ else:
+ event.client.add_gui_markdown("Select existing camera path:")
+ camera_path_dropdown = event.client.add_gui_dropdown(
+ label="Camera Path",
+ options=[str(p) for p in preexisting_camera_filenames],
+ initial_value=str(preexisting_camera_filenames[0]),
+ )
+ load_button = event.client.add_gui_button("Load")
+
+ @load_button.on_click
+ def _(_) -> None:
+ # load the json file
+ json_path = datapath / "camera_paths" / camera_path_dropdown.value
+ with open(json_path, "r") as f:
+ json_data = json.load(f)
+
+ keyframes = json_data["keyframes"]
+ camera_path.reset()
+ for i in range(len(keyframes)):
+ frame = keyframes[i]
+ pose = tf.SE3.from_matrix(onp.array(frame["matrix"]).reshape(4, 4))
+ # apply the x rotation by 180 deg
+ pose = tf.SE3.from_rotation_and_translation(
+ pose.rotation() @ tf.SO3.from_x_radians(onp.pi), pose.translation()
+ )
+ camera_path.add_camera(
+ Keyframe(
+ position=pose.translation() * VISER_NERFSTUDIO_SCALE_RATIO,
+ wxyz=pose.rotation().wxyz,
+ override_fov_enabled=True,
+ override_fov_value=frame["fov"] / 180.0 * onp.pi,
+ aspect=frame["aspect"],
+ ),
+ )
+ # update the render name
+ render_name_text.value = json_path.stem
+ camera_path.update_spline()
+ modal.close()
+
+ cancel_button = event.client.add_gui_button("Cancel")
+
+ @cancel_button.on_click
+ def _(_) -> None:
+ modal.close()
+
# set the initial value to the current date-time string
now = datetime.datetime.now()
render_name_text = server.add_gui_text(
From 43703fa05a306d765b5c6c57ccc763af3de66f0b Mon Sep 17 00:00:00 2001
From: Chung Min Kim
Date: Sun, 26 Nov 2023 21:56:49 -0800
Subject: [PATCH 069/101] Bugfix; scrape pipeline for viewercontrol, not
trainer (#2621)
Scrape pipeline for viewercontrol, not trainer
---
nerfstudio/viewer_beta/viewer.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py
index 3054cf5cba..e577ca5c37 100644
--- a/nerfstudio/viewer_beta/viewer.py
+++ b/nerfstudio/viewer_beta/viewer.py
@@ -213,7 +213,7 @@ def nested_folder_install(folder_labels: List[str], prev_labels: List[str], elem
# scrape the trainer/pipeline for any ViewerControl objects to initialize them
self.viewer_controls: List[ViewerControl] = [
- e for (_, e) in parse_object(self.trainer, ViewerControl, "Custom Elements")
+ e for (_, e) in parse_object(pipeline, ViewerControl, "Custom Elements")
]
for c in self.viewer_controls:
c._setup(self)
From c896ee4e0a26f98f2b06ff17ecb6c99013816c4e Mon Sep 17 00:00:00 2001
From: Chung Min Kim
Date: Sun, 26 Nov 2023 22:11:05 -0800
Subject: [PATCH 070/101] Remove PCA colormap bug for float output maps (#2620)
Previously, "PCA" would be offered as a colormap option for float outputs (and throws "PCA is not a known colormap" error).
---
nerfstudio/viewer_beta/control_panel.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/nerfstudio/viewer_beta/control_panel.py b/nerfstudio/viewer_beta/control_panel.py
index faa027622b..d118836849 100644
--- a/nerfstudio/viewer_beta/control_panel.py
+++ b/nerfstudio/viewer_beta/control_panel.py
@@ -445,7 +445,7 @@ def _get_colormap_options(dimensions: int, dtype: type) -> List[Colormaps]:
if dimensions == 3:
colormap_options = ["default"]
if dimensions == 1 and dtype in [torch.float64, torch.float32, torch.float16, torch.bfloat16]:
- colormap_options = [c for c in list(get_args(Colormaps)) if c != "default"]
+ colormap_options = [c for c in list(get_args(Colormaps)) if c not in ("default", "pca")]
if dimensions > 3:
colormap_options = ["pca"]
return colormap_options
From 38726da2190780c2a2205449dfac5d9ae98136e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Kulh=C3=A1nek?=
Date: Wed, 29 Nov 2023 20:48:41 +0100
Subject: [PATCH 071/101] Fix tangential camera distortion (p1,p2 in opencv
camera) (#2627)
---
nerfstudio/cameras/cameras.py | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/nerfstudio/cameras/cameras.py b/nerfstudio/cameras/cameras.py
index 68f7b186fd..c1b988dbbd 100644
--- a/nerfstudio/cameras/cameras.py
+++ b/nerfstudio/cameras/cameras.py
@@ -615,9 +615,9 @@ def _generate_rays_from_coords(
# Get our image coordinates and image coordinates offset by 1 (offsets used for dx, dy calculations)
# Also make sure the shapes are correct
- coord = torch.stack([(x - cx) / fx, -(y - cy) / fy], -1) # (num_rays, 2)
- coord_x_offset = torch.stack([(x - cx + 1) / fx, -(y - cy) / fy], -1) # (num_rays, 2)
- coord_y_offset = torch.stack([(x - cx) / fx, -(y - cy + 1) / fy], -1) # (num_rays, 2)
+ coord = torch.stack([(x - cx) / fx, (y - cy) / fy], -1) # (num_rays, 2)
+ coord_x_offset = torch.stack([(x - cx + 1) / fx, (y - cy) / fy], -1) # (num_rays, 2)
+ coord_y_offset = torch.stack([(x - cx) / fx, (y - cy + 1) / fy], -1) # (num_rays, 2)
assert (
coord.shape == num_rays_shape + (2,)
and coord_x_offset.shape == num_rays_shape + (2,)
@@ -648,6 +648,9 @@ def _generate_rays_from_coords(
distortion_params[mask, :],
).reshape(-1, 2)
+ # Switch from OpenCV to OpenGL
+ coord_stack[..., 1] *= -1
+
# Make sure after we have undistorted our images, the shapes are still correct
assert coord_stack.shape == (3,) + num_rays_shape + (2,)
From 5865ecf3236e47818974354a793b5d94dbc0c272 Mon Sep 17 00:00:00 2001
From: Chung Min Kim
Date: Wed, 29 Nov 2023 14:48:58 -0800
Subject: [PATCH 072/101] Integrate scene click capability from original viewer
(#2509)
* Integrate scene click capability from original viewer
* filter non-click ScenePointerEvents from ClickEvent
* lint
* Put click origin+direction in nerfstudio world scale
* Update docs
* Update doc heading
---------
Co-authored-by: Justin Kerr
---
.../developer_guides/viewer/viewer_control.md | 14 ++++++--
nerfstudio/viewer_beta/viewer_elements.py | 33 ++++++++++++++++---
2 files changed, 40 insertions(+), 7 deletions(-)
diff --git a/docs/developer_guides/viewer/viewer_control.md b/docs/developer_guides/viewer/viewer_control.md
index e6a84d3d76..0bb33294c6 100644
--- a/docs/developer_guides/viewer/viewer_control.md
+++ b/docs/developer_guides/viewer/viewer_control.md
@@ -62,8 +62,8 @@ class MyModel(nn.Module): # Must inherit from nn.Module
self.viewer_button = ViewerButton(name="Dummy Button",cb_hook=button_cb)
```
-## Double-click Callbacks
-We forward *double* clicks inside the viewer to the ViewerControl object, which you can use to interact with the scene. To do this, register a callback using `register_click_cb()`. The click is defined to be a ray that starts at the camera origin and passes through the click point on the screen, in world coordinates.
+## Scene Click Callbacks
+We forward *single* clicks inside the viewer to the ViewerControl object, which you can use to interact with the scene. To do this, register a callback using `register_click_cb()`. The click is defined to be a ray that starts at the camera origin and passes through the click point on the screen, in world coordinates.
```python
from nerfstudio.viewer.server.viewer_elements import ViewerControl,ViewerClick
@@ -77,6 +77,16 @@ class MyModel(nn.Module): # must inherit from nn.Module
self.viewer_control.register_click_cb(click_cb)
```
+You can also use `unregister_click_cb()` to remove callbacks that are no longer needed. A good example is a "Click on Scene" button, that when pressed, would register a callback that would wait for the next click, and then unregister itself.
+```python
+ ...
+ def button_cb(button: ViewerButton):
+ def click_cb(click: ViewerClick):
+ print(f"Click at {click.origin} in direction {click.direction}")
+ self.viewer_control.unregister_click_cb(click_cb)
+ self.viewer_control.register_click_cb(click_cb)
+```
+
### Thread safety
Just like `ViewerElement` callbacks, click callbacks are asynchronous to training and can potentially interrupt a call to `get_outputs()`.
diff --git a/nerfstudio/viewer_beta/viewer_elements.py b/nerfstudio/viewer_beta/viewer_elements.py
index 82a8b3d447..e8de855e0d 100644
--- a/nerfstudio/viewer_beta/viewer_elements.py
+++ b/nerfstudio/viewer_beta/viewer_elements.py
@@ -31,6 +31,7 @@
GuiButtonHandle,
GuiDropdownHandle,
GuiInputHandle,
+ ScenePointerEvent,
ViserServer,
)
@@ -68,7 +69,7 @@ class for exposing non-gui controls of the viewer to the user
def __init__(self):
# this should be a user-facing constructor, since it will be used inside the model/pipeline class
- self.click_cbs = []
+ self._click_cbs = {}
def _setup(self, viewer: Viewer):
"""
@@ -151,13 +152,35 @@ def register_click_cb(self, cb: Callable):
cb: The callback to call when a click is detected.
The callback should take a ViewerClick object as an argument
"""
- self.click_cbs.append(cb)
+ from nerfstudio.viewer_beta.viewer import VISER_NERFSTUDIO_SCALE_RATIO
+
+ def wrapped_cb(scene_pointer_msg: ScenePointerEvent):
+ # only call the callback if the event is a click
+ if scene_pointer_msg.event != "click":
+ return
+ origin = scene_pointer_msg.ray_origin
+ direction = scene_pointer_msg.ray_direction
+
+ origin = tuple([x / VISER_NERFSTUDIO_SCALE_RATIO for x in origin])
+ assert len(origin) == 3
+
+ click = ViewerClick(origin, direction)
+ cb(click)
- def on_click(self, msg):
+ self._click_cbs[cb] = wrapped_cb
+ self.viser_server.on_scene_click(wrapped_cb)
+
+ def unregister_click_cb(self, cb: Callable):
"""
- Internal use only, register a click in the viewer which propagates to all self.click_cbs
+ Remove a callback which will be called when a click is detected in the viewer.
+
+ Args:
+ cb: The callback to remove
"""
- raise NotImplementedError()
+ if cb not in self._click_cbs:
+ raise ValueError(f"Callback {cb} not registered, cannot remove")
+ self.viser_server.remove_scene_click_callback(self._click_cbs[cb])
+ self._click_cbs.pop(cb)
@property
def server(self):
From c8c55521e8f8e429e065d54ff45447181f8b6df3 Mon Sep 17 00:00:00 2001
From: Brent Yi
Date: Wed, 29 Nov 2023 16:52:37 -0800
Subject: [PATCH 073/101] Keyframe timing for beta viewer (#2638)
* Add keyframe timing to beta viewer
* Nits
* New keyframe timing logic
* Bump viser version
---
nerfstudio/viewer_beta/render_panel.py | 600 ++++++++++++++++++-------
pyproject.toml | 2 +-
2 files changed, 431 insertions(+), 171 deletions(-)
diff --git a/nerfstudio/viewer_beta/render_panel.py b/nerfstudio/viewer_beta/render_panel.py
index 36e53ccbde..b5f4e1bfb2 100644
--- a/nerfstudio/viewer_beta/render_panel.py
+++ b/nerfstudio/viewer_beta/render_panel.py
@@ -21,9 +21,9 @@
import threading
import time
from pathlib import Path
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, List, Optional, Tuple, Union
-import numpy as onp
+import numpy as np
import splines
import splines.quaternion
import viser
@@ -34,11 +34,13 @@
@dataclasses.dataclass
class Keyframe:
- position: onp.ndarray
- wxyz: onp.ndarray
+ position: np.ndarray
+ wxyz: np.ndarray
override_fov_enabled: bool
- override_fov_value: float
+ override_fov_rad: float
aspect: float
+ override_transition_enabled: bool
+ override_transition_sec: Optional[float]
@staticmethod
def from_camera(camera: viser.CameraHandle, aspect: float) -> Keyframe:
@@ -46,17 +48,19 @@ def from_camera(camera: viser.CameraHandle, aspect: float) -> Keyframe:
camera.position,
camera.wxyz,
override_fov_enabled=False,
- override_fov_value=camera.fov,
+ override_fov_rad=camera.fov,
aspect=aspect,
+ override_transition_enabled=False,
+ override_transition_sec=None,
)
class CameraPath:
- def __init__(self, server: viser.ViserServer):
+ def __init__(self, server: viser.ViserServer, duration_element: viser.GuiInputHandle[float]):
self._server = server
self._keyframes: Dict[int, Tuple[Keyframe, viser.CameraFrustumHandle]] = {}
self._keyframe_counter: int = 0
- self._spline: Optional[viser.SceneNodeHandle] = None
+ self._spline_nodes: List[viser.SceneNodeHandle] = []
self._camera_edit_panel: Optional[viser.Gui3dContainerHandle] = None
self._orientation_spline: Optional[splines.quaternion.KochanekBartels] = None
@@ -64,10 +68,15 @@ def __init__(self, server: viser.ViserServer):
self._fov_spline: Optional[splines.KochanekBartels] = None
self._keyframes_visible: bool = True
+ self._duration_element = duration_element
+
# These parameters should be overridden externally.
self.loop: bool = False
- self.smoothness: float = 0.5 # Tension / alpha term.
+ self.framerate: float = 30.0
+ self.tension: float = 0.5 # Tension / alpha term.
self.default_fov: float = 0.0
+ self.default_transition_sec: float = 0.0
+ self.show_spline: bool = True
def set_keyframes_visible(self, visible: bool) -> None:
self._keyframes_visible = visible
@@ -85,20 +94,28 @@ def add_camera(self, keyframe: Keyframe, keyframe_index: Optional[int] = None) -
frustum_handle = server.add_camera_frustum(
f"/render_cameras/{keyframe_index}",
- fov=keyframe.override_fov_value if keyframe.override_fov_enabled else self.default_fov,
+ fov=keyframe.override_fov_rad if keyframe.override_fov_enabled else self.default_fov,
aspect=keyframe.aspect,
scale=0.1,
- color=(127, 127, 127),
+ color=(200, 10, 30),
wxyz=keyframe.wxyz,
position=keyframe.position,
visible=self._keyframes_visible,
)
+ self._server.add_icosphere(
+ f"/render_cameras/{keyframe_index}/sphere",
+ radius=0.03,
+ color=(200, 10, 30),
+ )
@frustum_handle.on_click
def _(_) -> None:
+ if self._camera_edit_panel is not None:
+ self._camera_edit_panel.remove()
+ self._camera_edit_panel = None
+
with server.add_3d_gui_container(
"/camera_edit_panel",
- wxyz=keyframe.wxyz,
position=keyframe.position,
) as camera_edit_panel:
self._camera_edit_panel = camera_edit_panel
@@ -108,80 +125,82 @@ def _(_) -> None:
5.0,
175.0,
step=0.1,
- initial_value=keyframe.override_fov_value * 180.0 / onp.pi,
+ initial_value=keyframe.override_fov_rad * 180.0 / np.pi,
disabled=not keyframe.override_fov_enabled,
)
delete_button = server.add_gui_button("Delete", color="red", icon=viser.Icon.TRASH)
go_to_button = server.add_gui_button("Go to")
close_button = server.add_gui_button("Close")
- @override_fov.on_update
- def _(_) -> None:
- keyframe.override_fov_enabled = override_fov.value
- override_fov_degrees.disabled = not override_fov.value
- self.add_camera(keyframe, keyframe_index)
+ @override_fov.on_update
+ def _(_) -> None:
+ keyframe.override_fov_enabled = override_fov.value
+ override_fov_degrees.disabled = not override_fov.value
+ self.add_camera(keyframe, keyframe_index)
- @override_fov_degrees.on_update
- def _(_) -> None:
- keyframe.override_fov_value = override_fov_degrees.value / 180.0 * onp.pi
- self.add_camera(keyframe, keyframe_index)
-
- @delete_button.on_click
- def _(event: viser.GuiEvent) -> None:
- assert event.client is not None
- with event.client.add_gui_modal("Confirm") as modal:
- event.client.add_gui_markdown("Delete keyframe?")
- confirm_button = event.client.add_gui_button("Yes", color="red", icon=viser.Icon.TRASH)
- exit_button = event.client.add_gui_button("Cancel")
-
- @confirm_button.on_click
- def _(_) -> None:
- assert camera_edit_panel is not None
-
- keyframe_id = None
- for i, keyframe_tuple in self._keyframes.items():
- if keyframe_tuple[1] is frustum_handle:
- keyframe_id = i
- break
- assert keyframe_id is not None
-
- self._keyframes.pop(keyframe_id)
- frustum_handle.remove()
- camera_edit_panel.remove()
- modal.close()
- self.update_spline()
-
- @exit_button.on_click
- def _(_) -> None:
- modal.close()
-
- @go_to_button.on_click
- def _(event: viser.GuiEvent) -> None:
- assert event.client is not None
- client = event.client
- T_world_current = tf.SE3.from_rotation_and_translation(
- tf.SO3(client.camera.wxyz), client.camera.position
- )
- T_world_target = tf.SE3.from_rotation_and_translation(
- tf.SO3(keyframe.wxyz), keyframe.position
- ) @ tf.SE3.from_translation(onp.array([0.0, 0.0, -0.5]))
+ @override_fov_degrees.on_update
+ def _(_) -> None:
+ keyframe.override_fov_rad = override_fov_degrees.value / 180.0 * np.pi
+ self.add_camera(keyframe, keyframe_index)
+
+ @delete_button.on_click
+ def _(event: viser.GuiEvent) -> None:
+ assert event.client is not None
+ with event.client.add_gui_modal("Confirm") as modal:
+ event.client.add_gui_markdown("Delete keyframe?")
+ confirm_button = event.client.add_gui_button("Yes", color="red", icon=viser.Icon.TRASH)
+ exit_button = event.client.add_gui_button("Cancel")
+
+ @confirm_button.on_click
+ def _(_) -> None:
+ assert camera_edit_panel is not None
+
+ keyframe_id = None
+ for i, keyframe_tuple in self._keyframes.items():
+ if keyframe_tuple[1] is frustum_handle:
+ keyframe_id = i
+ break
+ assert keyframe_id is not None
+
+ self._keyframes.pop(keyframe_id)
+ frustum_handle.remove()
+ camera_edit_panel.remove()
+ self._camera_edit_panel = None
+ modal.close()
+ self.update_spline()
+
+ @exit_button.on_click
+ def _(_) -> None:
+ modal.close()
+
+ @go_to_button.on_click
+ def _(event: viser.GuiEvent) -> None:
+ assert event.client is not None
+ client = event.client
+ T_world_current = tf.SE3.from_rotation_and_translation(
+ tf.SO3(client.camera.wxyz), client.camera.position
+ )
+ T_world_target = tf.SE3.from_rotation_and_translation(
+ tf.SO3(keyframe.wxyz), keyframe.position
+ ) @ tf.SE3.from_translation(np.array([0.0, 0.0, -0.5]))
- T_current_target = T_world_current.inverse() @ T_world_target
+ T_current_target = T_world_current.inverse() @ T_world_target
- for j in range(10):
- T_world_set = T_world_current @ tf.SE3.exp(T_current_target.log() * j / 9.0)
+ for j in range(10):
+ T_world_set = T_world_current @ tf.SE3.exp(T_current_target.log() * j / 9.0)
- # Important bit: we atomically set both the orientation and the position
- # of the camera.
- with client.atomic():
- client.camera.wxyz = T_world_set.rotation().wxyz
- client.camera.position = T_world_set.translation()
- time.sleep(1.0 / 30.0)
+ # Important bit: we atomically set both the orientation and the position
+ # of the camera.
+ with client.atomic():
+ client.camera.wxyz = T_world_set.rotation().wxyz
+ client.camera.position = T_world_set.translation()
+ time.sleep(1.0 / 30.0)
- @close_button.on_click
- def _(_) -> None:
- assert camera_edit_panel is not None
- camera_edit_panel.remove()
+ @close_button.on_click
+ def _(_) -> None:
+ assert camera_edit_panel is not None
+ camera_edit_panel.remove()
+ self._camera_edit_panel = None
self._keyframes[keyframe_index] = (keyframe, frustum_handle)
@@ -196,79 +215,220 @@ def reset(self) -> None:
self._keyframes.clear()
self.update_spline()
- def interpolate_pose_and_fov(self, normalized_t: float) -> Optional[Tuple[tf.SE3, float]]:
+ def interpolate_pose_and_fov_rad(self, normalized_t: float) -> Optional[Tuple[tf.SE3, float]]:
if len(self._keyframes) < 2:
return None
- # TODO: this doesn't need to be constantly re-instantiated.
+
+ transition_times_cumsum = self.compute_transition_times_cumsum()
+ spline_indices = np.arange(transition_times_cumsum.shape[0])
+
+ def spline_t_from_t_sec(time: Union[float, np.ndarray]) -> np.ndarray:
+ return np.interp(time, transition_times_cumsum, spline_indices)
+
self._fov_spline = splines.KochanekBartels(
[
- keyframe[0].override_fov_value if keyframe[0].override_fov_enabled else self.default_fov
+ keyframe[0].override_fov_rad if keyframe[0].override_fov_enabled else self.default_fov
for keyframe in self._keyframes.values()
],
- tcb=(self.smoothness, 0.0, 0.0),
+ tcb=(self.tension, 0.0, 0.0),
endconditions="closed" if self.loop else "natural",
)
assert self._orientation_spline is not None
assert self._position_spline is not None
assert self._fov_spline is not None
- max_t = len(self._keyframes) if self.loop else len(self._keyframes) - 1
+ max_t = self.compute_duration()
t = max_t * normalized_t
- quat = self._orientation_spline.evaluate(t)
+
+ quat = self._orientation_spline.evaluate(spline_t_from_t_sec(t))
assert isinstance(quat, splines.quaternion.UnitQuaternion)
return (
tf.SE3.from_rotation_and_translation(
- tf.SO3(onp.array([quat.scalar, *quat.vector])),
- self._position_spline.evaluate(t),
+ tf.SO3(np.array([quat.scalar, *quat.vector])),
+ self._position_spline.evaluate(spline_t_from_t_sec(t)),
),
- float(self._fov_spline.evaluate(t)),
+ float(self._fov_spline.evaluate(spline_t_from_t_sec(t))),
)
def update_spline(self) -> None:
- keyframes = list(self._keyframes.values())
- if len(keyframes) <= 1:
- if self._spline is not None:
- self._spline.remove()
- self._spline = None
+ num_frames = int(self.compute_duration() * self.framerate)
+ if num_frames <= 0 or not self.show_spline:
+ for node in self._spline_nodes:
+ node.remove()
+ self._spline_nodes.clear()
return
# Update internal splines.
+ keyframes = list(self._keyframes.values())
+ transition_times_cumsum = self.compute_transition_times_cumsum()
+ spline_indices = np.arange(transition_times_cumsum.shape[0])
+
+ def spline_t_from_t_sec(time: Union[float, np.ndarray]) -> np.ndarray:
+ return np.interp(time, transition_times_cumsum, spline_indices)
+
self._orientation_spline = splines.quaternion.KochanekBartels(
[
- splines.quaternion.UnitQuaternion.from_unit_xyzw(onp.roll(keyframe[0].wxyz, shift=-1))
+ splines.quaternion.UnitQuaternion.from_unit_xyzw(np.roll(keyframe[0].wxyz, shift=-1))
for keyframe in keyframes
],
- tcb=(self.smoothness, 0.0, 0.0),
+ tcb=(self.tension, 0.0, 0.0),
endconditions="closed" if self.loop else "natural",
)
self._position_spline = splines.KochanekBartels(
[keyframe[0].position for keyframe in keyframes],
- tcb=(self.smoothness, 0.0, 0.0),
+ tcb=(self.tension, 0.0, 0.0),
endconditions="closed" if self.loop else "natural",
)
# Update visualized spline.
- num_keyframes = len(keyframes) + 1 if self.loop else len(keyframes)
- points_array = onp.array(
- [self._position_spline.evaluate(t) for t in onp.linspace(0, num_keyframes - 1, num_keyframes * 100)]
+ points_array = self._position_spline.evaluate(
+ spline_t_from_t_sec(np.linspace(0, transition_times_cumsum[-1], num_frames))
)
- colors_array = onp.array([colorsys.hls_to_rgb(h, 0.5, 1.0) for h in onp.linspace(0.0, 1.0, len(points_array))])
- self._spline = self._server.add_point_cloud(
- "/render_camera_spline",
- points=points_array,
- colors=colors_array,
- point_size=0.035,
+ colors_array = np.array([colorsys.hls_to_rgb(h, 0.5, 1.0) for h in np.linspace(0.0, 1.0, len(points_array))])
+
+ # Clear prior spline nodes.
+ for node in self._spline_nodes:
+ node.remove()
+ self._spline_nodes.clear()
+
+ self._spline_nodes.append(
+ self._server.add_spline_catmull_rom(
+ "/render_camera_spline",
+ positions=points_array,
+ color=(220, 220, 220),
+ closed=self.loop,
+ line_width=1.0,
+ segments=points_array.shape[0] + 1,
+ )
+ )
+ self._spline_nodes.append(
+ self._server.add_point_cloud(
+ "/render_camera_spline/points",
+ points=points_array,
+ colors=colors_array,
+ point_size=0.04,
+ )
)
+ def make_transition_handle(i: int) -> None:
+ assert self._position_spline is not None
+ transition_pos = self._position_spline.evaluate(
+ spline_t_from_t_sec((transition_times_cumsum[i] + transition_times_cumsum[i + 1]) / 2.0)
+ )
+ transition_sphere = self._server.add_icosphere(
+ f"/render_camera_spline/transition_{i}",
+ radius=0.04,
+ color=(255, 0, 0),
+ position=transition_pos,
+ )
+ self._spline_nodes.append(transition_sphere)
+
+ @transition_sphere.on_click
+ def _(_) -> None:
+ server = self._server
+
+ if self._camera_edit_panel is not None:
+ self._camera_edit_panel.remove()
+ self._camera_edit_panel = None
+
+ keyframe_index = (i + 1) % len(self._keyframes)
+ keyframe = keyframes[keyframe_index][0]
+
+ with server.add_3d_gui_container(
+ "/camera_edit_panel",
+ position=transition_pos,
+ ) as camera_edit_panel:
+ self._camera_edit_panel = camera_edit_panel
+ override_transition_enabled = server.add_gui_checkbox(
+ "Override transition", initial_value=keyframe.override_transition_enabled
+ )
+ override_transition_sec = server.add_gui_number(
+ "Override transition (sec)",
+ initial_value=keyframe.override_transition_sec
+ if keyframe.override_transition_sec is not None
+ else self.default_transition_sec,
+ min=0.001,
+ max=30.0,
+ step=0.001,
+ disabled=not override_transition_enabled.value,
+ )
+ close_button = server.add_gui_button("Close")
+
+ @override_transition_enabled.on_update
+ def _(_) -> None:
+ keyframe.override_transition_enabled = override_transition_enabled.value
+ override_transition_sec.disabled = not override_transition_enabled.value
+ self._duration_element.value = self.compute_duration()
+
+ @override_transition_sec.on_update
+ def _(_) -> None:
+ keyframe.override_transition_sec = override_transition_sec.value
+ self._duration_element.value = self.compute_duration()
+
+ @close_button.on_click
+ def _(_) -> None:
+ assert camera_edit_panel is not None
+ camera_edit_panel.remove()
+ self._camera_edit_panel = None
+
+ (num_transitions_plus_1,) = transition_times_cumsum.shape
+ for i in range(num_transitions_plus_1 - 1):
+ make_transition_handle(i)
+
+ # for i in range(transition_times.shape[0])
+
+ def compute_duration(self) -> float:
+ """Compute the total duration of the trajectory."""
+ total = 0.0
+ for i, (keyframe, frustum) in enumerate(self._keyframes.values()):
+ if i == 0 and not self.loop:
+ continue
+ del frustum
+ total += (
+ keyframe.override_transition_sec
+ if keyframe.override_transition_enabled and keyframe.override_transition_sec is not None
+ else self.default_transition_sec
+ )
+ return total
+
+ def compute_transition_times_cumsum(self) -> np.ndarray:
+ """Compute the total duration of the trajectory."""
+ total = 0.0
+ out = [0.0]
+ for i, (keyframe, frustum) in enumerate(self._keyframes.values()):
+ if i == 0:
+ continue
+ del frustum
+ total += (
+ keyframe.override_transition_sec
+ if keyframe.override_transition_enabled and keyframe.override_transition_sec is not None
+ else self.default_transition_sec
+ )
+ out.append(total)
+
+ if self.loop:
+ keyframe = next(iter(self._keyframes.values()))[0]
+ total += (
+ keyframe.override_transition_sec
+ if keyframe.override_transition_enabled and keyframe.override_transition_sec is not None
+ else self.default_transition_sec
+ )
+ out.append(total)
+
+ return np.array(out)
+
def populate_render_tab(
- server: viser.ViserServer, config_path: Path, datapath: Path, control_panel: ControlPanel
+ server: viser.ViserServer,
+ config_path: Path,
+ datapath: Path,
+ control_panel: Optional[ControlPanel] = None,
) -> None:
from nerfstudio.viewer_beta.viewer import VISER_NERFSTUDIO_SCALE_RATIO
fov_degrees = server.add_gui_slider(
- "FOV",
- initial_value=90.0,
+ "Default FOV",
+ initial_value=75.0,
min=0.1,
max=175.0,
step=0.01,
@@ -277,7 +437,7 @@ def populate_render_tab(
@fov_degrees.on_update
def _(_) -> None:
- fov_radians = fov_degrees.value / 180.0 * onp.pi
+ fov_radians = fov_degrees.value / 180.0 * np.pi
for client in server.get_clients().values():
client.camera.fov = fov_radians
camera_path.default_fov = fov_radians
@@ -294,19 +454,14 @@ def _(_) -> None:
step=1,
hint="Render output resolution in pixels.",
)
-
- @resolution.on_update
- def _(_) -> None:
- """Update the aspect ratio for all cameras when the resolution changes."""
- camera_path.update_aspect(resolution.value[0] / resolution.value[1])
+ resolution.on_update(lambda _: camera_path.update_aspect(resolution.value[0] / resolution.value[1]))
camera_type = server.add_gui_dropdown(
"Camera Type",
("Perspective", "Fisheye", "Equirectangular"),
initial_value="Perspective",
- hint="Camera model to render with.",
+ hint="Camera model to render with. This is applied to all keyframes.",
)
-
add_button = server.add_gui_button(
"Add keyframe",
icon=viser.Icon.PLUS,
@@ -320,8 +475,12 @@ def _(event: viser.GuiEvent) -> None:
# Add this camera to the path.
camera_path.add_camera(
- Keyframe.from_camera(camera, aspect=resolution.value[0] / resolution.value[1]),
+ Keyframe.from_camera(
+ camera,
+ aspect=resolution.value[0] / resolution.value[1],
+ ),
)
+ duration_number.value = camera_path.compute_duration()
camera_path.update_spline()
reset_up_button = server.add_gui_button(
@@ -333,7 +492,7 @@ def _(event: viser.GuiEvent) -> None:
@reset_up_button.on_click
def _(event: viser.GuiEvent) -> None:
assert event.client is not None
- event.client.camera.up_direction = tf.SO3(event.client.camera.wxyz) @ onp.array([0.0, -1.0, 0.0])
+ event.client.camera.up_direction = tf.SO3(event.client.camera.wxyz) @ np.array([0.0, -1.0, 0.0])
clear_keyframes_button = server.add_gui_button(
"Clear keyframes",
@@ -345,7 +504,7 @@ def _(event: viser.GuiEvent) -> None:
def _(event: viser.GuiEvent) -> None:
assert event.client_id is not None
client = server.get_clients()[event.client_id]
- with client.add_gui_modal("Confirm") as modal:
+ with client.atomic(), client.add_gui_modal("Confirm") as modal:
client.add_gui_markdown("Clear all keyframes?")
confirm_button = client.add_gui_button("Yes", color="red", icon=viser.Icon.TRASH)
exit_button = client.add_gui_button("Cancel")
@@ -355,6 +514,8 @@ def _(_) -> None:
camera_path.reset()
modal.close()
+ duration_number.value = camera_path.compute_duration()
+
# Clear move handles.
if len(transform_controls) > 0:
for t in transform_controls:
@@ -366,14 +527,14 @@ def _(_) -> None:
def _(_) -> None:
modal.close()
- loop = server.add_gui_checkbox("Loop", False)
+ loop = server.add_gui_checkbox("Loop", False, hint="Add a segment between the first and last keyframes.")
@loop.on_update
def _(_) -> None:
camera_path.loop = loop.value
- camera_path.update_spline()
+ duration_number.value = camera_path.compute_duration()
- smoothness = server.add_gui_slider(
+ tension_slider = server.add_gui_slider(
"Spline Tension",
min=0.0,
max=1.0,
@@ -382,9 +543,9 @@ def _(_) -> None:
hint="Tension parameter for adjusting smoothness of spline interpolation.",
)
- @smoothness.on_update
+ @tension_slider.on_update
def _(_) -> None:
- camera_path.smoothness = smoothness.value
+ camera_path.tension = tension_slider.value
camera_path.update_spline()
move_checkbox = server.add_gui_checkbox(
@@ -428,41 +589,80 @@ def _(_) -> None:
transform_controls.append(controls)
_make_transform_controls_callback(keyframe, controls)
+ show_keyframe_checkbox = server.add_gui_checkbox(
+ "Show keyframes",
+ initial_value=True,
+ hint="Show keyframes in the scene.",
+ )
+
+ @show_keyframe_checkbox.on_update
+ def _(_: viser.GuiEvent) -> None:
+ camera_path.set_keyframes_visible(show_keyframe_checkbox.value)
+
+ show_spline_checkbox = server.add_gui_checkbox(
+ "Show spline",
+ initial_value=True,
+ hint="Show camera path spline in the scene.",
+ )
+
+ @show_spline_checkbox.on_update
+ def _(_) -> None:
+ camera_path.show_spline = show_spline_checkbox.value
+ camera_path.update_spline()
+
playback_folder = server.add_gui_folder("Playback")
with playback_folder:
- duration_number = server.add_gui_number("Duration (sec)", min=0.0, max=1e8, step=0.0001, initial_value=4.0)
- framerate_number = server.add_gui_number("Frame rate (FPS)", min=0.1, max=240.0, step=1e-8, initial_value=30.0)
+ play_button = server.add_gui_button("Play", icon=viser.Icon.PLAYER_PLAY)
+ pause_button = server.add_gui_button("Pause", icon=viser.Icon.PLAYER_PAUSE, visible=False)
+ attach_viewport_checkbox = server.add_gui_checkbox("Attach viewport", initial_value=False)
+ transition_sec_number = server.add_gui_number(
+ "Transition (sec)",
+ min=0.001,
+ max=30.0,
+ step=0.001,
+ initial_value=0.5,
+ hint="Time in seconds between each keyframe, which can also be overridden on a per-transition basis.",
+ )
+ framerate_number = server.add_gui_number("FPS", min=0.1, max=240.0, step=1e-2, initial_value=30.0)
framerate_buttons = server.add_gui_button_group("", ("24", "30", "60"))
+ duration_number = server.add_gui_number(
+ "Duration (sec)",
+ min=0.0,
+ max=1e8,
+ step=0.001,
+ initial_value=0.0,
+ disabled=True,
+ )
@framerate_buttons.on_click
def _(_) -> None:
framerate_number.value = float(framerate_buttons.value)
- play_button = server.add_gui_button("Play", icon=viser.Icon.PLAYER_PLAY)
- pause_button = server.add_gui_button("Pause", icon=viser.Icon.PLAYER_PAUSE, visible=False)
- attach_viewport_checkbox = server.add_gui_checkbox("Attach viewport", initial_value=False)
- show_checkbox = server.add_gui_checkbox(
- "Show keyframes",
- initial_value=True,
- hint="Show keyframes in the scene.",
- )
+ @transition_sec_number.on_update
+ def _(_) -> None:
+ camera_path.default_transition_sec = transition_sec_number.value
+ duration_number.value = camera_path.compute_duration()
- @show_checkbox.on_update
- def _(_: viser.GuiEvent) -> None:
- camera_path.set_keyframes_visible(show_checkbox.value)
+ def get_max_frame_index() -> int:
+ return max(1, int(framerate_number.value * duration_number.value) - 1)
+
+ preview_camera_handle: Optional[viser.SceneNodeHandle] = None
+
+ def remove_preview_camera() -> None:
+ nonlocal preview_camera_handle
+ if preview_camera_handle is not None:
+ preview_camera_handle.remove()
+ preview_camera_handle = None
def add_preview_frame_slider() -> Optional[viser.GuiInputHandle[int]]:
"""Helper for creating the current frame # slider. This is removed and
re-added anytime the `max` value changes."""
- max_frame_index = int(framerate_number.value * duration_number.value) - 1
- if max_frame_index <= 0:
- return None
with playback_folder:
preview_frame_slider = server.add_gui_slider(
"Preview frame",
min=0,
- max=max_frame_index,
+ max=get_max_frame_index(),
step=1,
initial_value=0,
# Place right after the pause button.
@@ -471,16 +671,19 @@ def add_preview_frame_slider() -> Optional[viser.GuiInputHandle[int]]:
@preview_frame_slider.on_update
def _(_) -> None:
- max_frame_index = int(framerate_number.value * duration_number.value) - 1
- maybe_pose_and_fov = camera_path.interpolate_pose_and_fov(
- preview_frame_slider.value / max_frame_index if max_frame_index > 0 else 0
+ nonlocal preview_camera_handle
+
+ maybe_pose_and_fov_rad = camera_path.interpolate_pose_and_fov_rad(
+ preview_frame_slider.value / get_max_frame_index()
)
- if maybe_pose_and_fov is None:
+ if maybe_pose_and_fov_rad is None:
+ remove_preview_camera()
return
- pose, fov = maybe_pose_and_fov
- server.add_camera_frustum(
+ pose, fov_rad = maybe_pose_and_fov_rad
+
+ preview_camera_handle = server.add_camera_frustum(
"/preview_camera",
- fov=fov,
+ fov=fov_rad,
aspect=resolution.value[0] / resolution.value[1],
scale=0.35,
wxyz=pose.rotation().wxyz,
@@ -497,21 +700,55 @@ def _(_) -> None:
for client in server.get_clients().values():
client.camera.wxyz = pose.rotation().wxyz
client.camera.position = pose.translation()
- client.camera.fov = fov
+ client.camera.fov = fov_rad
return preview_frame_slider
@attach_viewport_checkbox.on_update
def _(_) -> None:
+ if preview_frame_slider is None:
+ remove_preview_camera()
+ return
+ maybe_pose_and_fov_rad = camera_path.interpolate_pose_and_fov_rad(
+ preview_frame_slider.value / get_max_frame_index()
+ )
+ if maybe_pose_and_fov_rad is None:
+ remove_preview_camera()
+ return
+ pose, fov = maybe_pose_and_fov_rad
+ server.add_camera_frustum(
+ "/preview_camera",
+ fov=fov,
+ aspect=resolution.value[0] / resolution.value[1],
+ scale=0.35,
+ wxyz=pose.rotation().wxyz,
+ position=pose.translation(),
+ color=(10, 200, 30),
+ # Hack: hide green frustum if the viewport is attached.
+ # This is a waste of bandwidth, but will ensure that any old
+ # frustums are removed/aren't rendered.
+ #
+ # Easy to fix with a global variable.
+ visible=not attach_viewport_checkbox.value,
+ )
if not attach_viewport_checkbox.value:
for client in server.get_clients().values():
- client.camera.fov = fov_degrees.value
+ client.camera.fov = fov_degrees.value / 180 * np.pi
+ else:
+ if attach_viewport_checkbox.value:
+ for client in server.get_clients().values():
+ client.camera.wxyz = pose.rotation().wxyz
+ client.camera.position = pose.translation()
+ client.camera.fov = fov
preview_frame_slider = add_preview_frame_slider()
+ # Update the # of frames.
@duration_number.on_update
@framerate_number.on_update
def _(_) -> None:
+ remove_preview_camera() # Will be re-added when slider is updated.
+
nonlocal preview_frame_slider
old = preview_frame_slider
assert old is not None
@@ -522,6 +759,9 @@ def _(_) -> None:
else:
preview_frame_slider = old
+ camera_path.framerate = framerate_number.value
+ camera_path.update_spline()
+
# Play the camera trajectory when the play button is pressed.
@play_button.on_click
def _(_) -> None:
@@ -580,20 +820,25 @@ def _(_) -> None:
camera_path.reset()
for i in range(len(keyframes)):
frame = keyframes[i]
- pose = tf.SE3.from_matrix(onp.array(frame["matrix"]).reshape(4, 4))
+ pose = tf.SE3.from_matrix(np.array(frame["matrix"]).reshape(4, 4))
# apply the x rotation by 180 deg
pose = tf.SE3.from_rotation_and_translation(
- pose.rotation() @ tf.SO3.from_x_radians(onp.pi), pose.translation()
+ pose.rotation() @ tf.SO3.from_x_radians(np.pi), pose.translation()
)
camera_path.add_camera(
Keyframe(
position=pose.translation() * VISER_NERFSTUDIO_SCALE_RATIO,
wxyz=pose.rotation().wxyz,
override_fov_enabled=True,
- override_fov_value=frame["fov"] / 180.0 * onp.pi,
+ override_fov_rad=frame["fov"] / 180.0 * np.pi,
aspect=frame["aspect"],
+ override_transition_enabled=frame.get("override_transition_enabled", None),
+ override_transition_sec=frame.get("override_transition_sec", None),
),
)
+
+ transition_sec_number.value = json_data.get("default_transition_sec", 0.5)
+
# update the render name
render_name_text.value = json_path.stem
camera_path.update_spline()
@@ -624,9 +869,9 @@ def _(event: viser.GuiEvent) -> None:
json_data = {}
# json data has the properties:
# keyframes: list of keyframes with
- # matrix : flattened 4x4 matrix
- # fov: float in degrees
- # aspect: float
+ # matrix : flattened 4x4 matrix
+ # fov: float in degrees
+ # aspect: float
# camera_type: string of camera type
# render_height: int
# render_width: int
@@ -642,18 +887,21 @@ def _(event: viser.GuiEvent) -> None:
keyframes = []
for keyframe, dummy in camera_path._keyframes.values():
pose = tf.SE3.from_rotation_and_translation(
- tf.SO3(keyframe.wxyz) @ tf.SO3.from_x_radians(onp.pi),
+ tf.SO3(keyframe.wxyz) @ tf.SO3.from_x_radians(np.pi),
keyframe.position / VISER_NERFSTUDIO_SCALE_RATIO,
)
keyframes.append(
{
"matrix": pose.as_matrix().flatten().tolist(),
- "fov": onp.rad2deg(keyframe.override_fov_value)
+ "fov": np.rad2deg(keyframe.override_fov_rad)
if keyframe.override_fov_enabled
else fov_degrees.value,
"aspect": keyframe.aspect,
+ "override_transition_enabled": keyframe.override_transition_enabled,
+ "override_transition_sec": keyframe.override_transition_sec,
}
)
+ json_data["default_transition_sec"] = transition_sec_number.value
json_data["keyframes"] = keyframes
json_data["camera_type"] = camera_type.value.lower()
json_data["render_height"] = resolution.value[1]
@@ -661,38 +909,39 @@ def _(event: viser.GuiEvent) -> None:
json_data["fps"] = framerate_number.value
json_data["seconds"] = duration_number.value
json_data["is_cycle"] = loop.value
- json_data["smoothness_value"] = smoothness.value
+ json_data["smoothness_value"] = tension_slider.value
# now populate the camera path:
camera_path_list = []
for i in range(num_frames):
- maybe_pose_and_fov = camera_path.interpolate_pose_and_fov(i / num_frames)
+ maybe_pose_and_fov = camera_path.interpolate_pose_and_fov_rad(i / num_frames)
if maybe_pose_and_fov is None:
return
pose, fov = maybe_pose_and_fov
# rotate the axis of the camera 180 about x axis
pose = tf.SE3.from_rotation_and_translation(
- pose.rotation() @ tf.SO3.from_x_radians(onp.pi),
+ pose.rotation() @ tf.SO3.from_x_radians(np.pi),
pose.translation() / VISER_NERFSTUDIO_SCALE_RATIO,
)
camera_path_list.append(
{
"camera_to_world": pose.as_matrix().flatten().tolist(),
- "fov": onp.rad2deg(fov),
+ "fov": np.rad2deg(fov),
"aspect": resolution.value[0] / resolution.value[1],
}
)
json_data["camera_path"] = camera_path_list
# finally add crop data if crop is enabled
- if control_panel.crop_viewport:
- obb = control_panel.crop_obb
- rpy = tf.SO3.from_matrix(obb.R.numpy()).as_rpy_radians()
- color = control_panel.background_color
- json_data["crop"] = {
- "crop_center": obb.T.tolist(),
- "crop_scale": obb.S.tolist(),
- "crop_rot": [rpy.roll, rpy.pitch, rpy.yaw],
- "crop_bg_color": {"r": color[0], "g": color[1], "b": color[2]},
- }
+ if control_panel is not None:
+ if control_panel.crop_viewport:
+ obb = control_panel.crop_obb
+ rpy = tf.SO3.from_matrix(obb.R.numpy()).as_rpy_radians()
+ color = control_panel.background_color
+ json_data["crop"] = {
+ "crop_center": obb.T.tolist(),
+ "crop_scale": obb.S.tolist(),
+ "crop_rot": [rpy.roll, rpy.pitch, rpy.yaw],
+ "crop_bg_color": {"r": color[0], "g": color[1], "b": color[2]},
+ }
# now write the json file
json_outfile = datapath / "camera_paths" / f"{render_name_text.value}.json"
@@ -727,7 +976,18 @@ def _(event: viser.GuiEvent) -> None:
def _(_) -> None:
modal.close()
- camera_path = CameraPath(server)
- camera_path.default_fov = fov_degrees.value / 180.0 * onp.pi
+ camera_path = CameraPath(server, duration_number)
+ camera_path.default_fov = fov_degrees.value / 180.0 * np.pi
+ camera_path.default_transition_sec = transition_sec_number.value
transform_controls: List[viser.SceneNodeHandle] = []
+
+
+if __name__ == "__main__":
+ populate_render_tab(
+ server=viser.ViserServer(),
+ config_path=Path("."),
+ datapath=Path("."),
+ )
+ while True:
+ time.sleep(10.0)
diff --git a/pyproject.toml b/pyproject.toml
index 0d8708ddab..780d2c6b5a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,7 +53,7 @@ dependencies = [
"torchvision>=0.14.1",
"torchmetrics[image]>=1.0.1",
"typing_extensions>=4.4.0",
- "viser==0.1.7",
+ "viser==0.1.12",
"nuscenes-devkit>=1.1.1",
"wandb>=0.13.3",
"xatlas",
From 8b85c44b276ac3ab482840c03b12b556ec98a18f Mon Sep 17 00:00:00 2001
From: Justin Kerr
Date: Wed, 29 Nov 2023 19:42:42 -0800
Subject: [PATCH 074/101] Patch spiral rendering w/ parallel datamanager
(#2637)
* fix jittering in markdown in viewer beta
* Revert "fix jittering in markdown in viewer beta"
This reverts commit 70ade42e46f3dcf0e89e4efc445650f6d6525673.
* print correctly formatted url in banner for viewer beta
* allow parallel datamanager in spiral rendering
* lint
* Update render.py
---------
Co-authored-by: Brent Yi
---
nerfstudio/scripts/render.py | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/nerfstudio/scripts/render.py b/nerfstudio/scripts/render.py
index d9d815ed71..79cc6c7bd9 100644
--- a/nerfstudio/scripts/render.py
+++ b/nerfstudio/scripts/render.py
@@ -61,6 +61,10 @@
VanillaDataManager,
VanillaDataManagerConfig,
)
+from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManager
+from nerfstudio.data.datamanagers.random_cameras_datamanager import (
+ RandomCamerasDataManager,
+)
from nerfstudio.data.datasets.base_dataset import Dataset
from nerfstudio.data.scene_box import OrientedBox
from nerfstudio.data.utils.dataloaders import FixedIndicesEvalDataloader
@@ -666,7 +670,14 @@ def main(self) -> None:
install_checks.check_ffmpeg_installed()
- assert isinstance(pipeline.datamanager, VanillaDataManager)
+ assert isinstance(
+ pipeline.datamanager,
+ (
+ VanillaDataManager,
+ ParallelDataManager,
+ RandomCamerasDataManager,
+ ),
+ )
steps = int(self.frame_rate * self.seconds)
camera_start = pipeline.datamanager.eval_dataloader.get_camera(image_idx=0).flatten()
camera_path = get_spiral_path(camera_start, steps=steps, radius=self.radius)
From 5a772b177d2c8e28ec346758e57cc64eab5ec564 Mon Sep 17 00:00:00 2001
From: Yosshi999
Date: Thu, 30 Nov 2023 14:57:43 +0900
Subject: [PATCH 075/101] Bugfix: shape confusion in resizing (#2565)
Co-authored-by: Justin Kerr
Co-authored-by: Brent Yi
---
nerfstudio/data/dataparsers/dycheck_dataparser.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/nerfstudio/data/dataparsers/dycheck_dataparser.py b/nerfstudio/data/dataparsers/dycheck_dataparser.py
index 50a581f288..d9d4c8f4c6 100644
--- a/nerfstudio/data/dataparsers/dycheck_dataparser.py
+++ b/nerfstudio/data/dataparsers/dycheck_dataparser.py
@@ -322,7 +322,7 @@ def process_frames(self, frame_names: List[str], time_ids: np.ndarray) -> Tuple[
for frame in frame_names:
cv2.imwrite(
str(self.data / f"rgb/{d}x/{frame}.png"),
- cv2.resize(cv2.imread(str(self.data / f"rgb/1x/{frame}.png")), (h, w)),
+ cv2.resize(cv2.imread(str(self.data / f"rgb/1x/{frame}.png")), (w, h)),
)
CONSOLE.print("finished")
From 3d8af872901318e8b2dd697857f210e8285f15b8 Mon Sep 17 00:00:00 2001
From: David Holtz <56723830+dmholtz@users.noreply.github.com>
Date: Fri, 1 Dec 2023 00:00:31 +0100
Subject: [PATCH 076/101] Make nuScenes dataparser compatible with
ParallelDataManager (#2635)
The ParallelDataManager (see #2092) makes pytorch crash if the cameras instance's fx, fy, cx or cy tensors
are loaded from a common shared tensor.
This PR fixes the issue by cloning the respective tensors before passing them to the Cameras(...) constructor.
---
nerfstudio/data/dataparsers/nuscenes_dataparser.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/nerfstudio/data/dataparsers/nuscenes_dataparser.py b/nerfstudio/data/dataparsers/nuscenes_dataparser.py
index 317717e860..0e899a68c9 100644
--- a/nerfstudio/data/dataparsers/nuscenes_dataparser.py
+++ b/nerfstudio/data/dataparsers/nuscenes_dataparser.py
@@ -200,10 +200,10 @@ def _generate_dataparser_outputs(self, split="train"):
)
cameras = Cameras(
- fx=intrinsics[:, 0, 0],
- fy=intrinsics[:, 1, 1],
- cx=intrinsics[:, 0, 2],
- cy=intrinsics[:, 1, 2],
+ fx=intrinsics[:, 0, 0].detach().clone(),
+ fy=intrinsics[:, 1, 1].detach().clone(),
+ cx=intrinsics[:, 0, 2].detach().clone(),
+ cy=intrinsics[:, 1, 2].detach().clone(),
height=900,
width=1600,
camera_to_worlds=poses[:, :3, :4],
From 747dd557f67ddf66f9f0192b697e208780cffcb7 Mon Sep 17 00:00:00 2001
From: blacksino <44363764+blacksino@users.noreply.github.com>
Date: Fri, 1 Dec 2023 07:07:26 +0800
Subject: [PATCH 077/101] fix include_input for NeRFEncoding (#2642)
---
nerfstudio/field_components/encodings.py | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/nerfstudio/field_components/encodings.py b/nerfstudio/field_components/encodings.py
index ee2aac93c8..15a2ce3ab4 100644
--- a/nerfstudio/field_components/encodings.py
+++ b/nerfstudio/field_components/encodings.py
@@ -169,17 +169,18 @@ def pytorch_fwd(
encoded_inputs = expected_sin(
torch.cat([scaled_inputs, scaled_inputs + torch.pi / 2.0], dim=-1), torch.cat(2 * [input_var], dim=-1)
)
-
- if self.include_input:
- encoded_inputs = torch.cat([encoded_inputs, in_tensor], dim=-1)
return encoded_inputs
def forward(
self, in_tensor: Float[Tensor, "*bs input_dim"], covs: Optional[Float[Tensor, "*bs input_dim input_dim"]] = None
) -> Float[Tensor, "*bs output_dim"]:
if self.tcnn_encoding is not None:
- return self.tcnn_encoding(in_tensor)
- return self.pytorch_fwd(in_tensor, covs)
+ encoded_inputs = self.tcnn_encoding(in_tensor)
+ else:
+ encoded_inputs = self.pytorch_fwd(in_tensor, covs)
+ if self.include_input:
+ encoded_inputs = torch.cat([encoded_inputs, in_tensor], dim=-1)
+ return encoded_inputs
class FFEncoding(Encoding):
From 43a7da36a512cab9032c81b3b9496f908a7ab61c Mon Sep 17 00:00:00 2001
From: Jaggz H
Date: Thu, 30 Nov 2023 15:51:36 -0800
Subject: [PATCH 078/101] Converted many @dataclass assignments for python 3.11
compatibility (#2630)
Converted many @dataclass assignments to field()s for python 3.11 compatibility
---
nerfstudio/configs/base_config.py | 4 ++--
nerfstudio/configs/experiment_config.py | 10 +++++-----
nerfstudio/data/datamanagers/base_datamanager.py | 4 ++--
nerfstudio/data/dataparsers/base_dataparser.py | 2 +-
nerfstudio/models/base_surface_model.py | 2 +-
nerfstudio/models/nerfacto.py | 2 +-
nerfstudio/models/tensorf.py | 2 +-
nerfstudio/pipelines/base_pipeline.py | 4 ++--
8 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/nerfstudio/configs/base_config.py b/nerfstudio/configs/base_config.py
index b29661e116..c04c11aaac 100644
--- a/nerfstudio/configs/base_config.py
+++ b/nerfstudio/configs/base_config.py
@@ -17,7 +17,7 @@
from __future__ import annotations
-from dataclasses import dataclass
+from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, List, Literal, Optional, Tuple, Type
@@ -113,7 +113,7 @@ class LoggingConfig(PrintableConfig):
max_buffer_size: int = 20
"""maximum history size to keep for computing running averages of stats.
e.g. if 20, averages will be computed over past 20 occurrences."""
- local_writer: LocalWriterConfig = LocalWriterConfig(enable=True)
+ local_writer: LocalWriterConfig = field(default_factory=lambda: LocalWriterConfig(enable=True))
"""if provided, will print stats locally. if None, will disable printing"""
profiler: Literal["none", "basic", "pytorch"] = "basic"
"""how to profile the code;
diff --git a/nerfstudio/configs/experiment_config.py b/nerfstudio/configs/experiment_config.py
index d4b3d4de66..5686898b70 100644
--- a/nerfstudio/configs/experiment_config.py
+++ b/nerfstudio/configs/experiment_config.py
@@ -16,7 +16,7 @@
from __future__ import annotations
-from dataclasses import dataclass
+from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Literal, Optional
@@ -51,13 +51,13 @@ class ExperimentConfig(InstantiateConfig):
"""Project name."""
timestamp: str = "{timestamp}"
"""Experiment timestamp."""
- machine: MachineConfig = MachineConfig()
+ machine: MachineConfig = field(default_factory=lambda: MachineConfig())
"""Machine configuration"""
- logging: LoggingConfig = LoggingConfig()
+ logging: LoggingConfig = field(default_factory=lambda: LoggingConfig())
"""Logging configuration"""
- viewer: ViewerConfig = ViewerConfig()
+ viewer: ViewerConfig = field(default_factory=lambda: ViewerConfig())
"""Viewer configuration"""
- pipeline: VanillaPipelineConfig = VanillaPipelineConfig()
+ pipeline: VanillaPipelineConfig = field(default_factory=lambda: VanillaPipelineConfig())
"""Pipeline configuration"""
optimizers: Dict[str, Any] = to_immutable_dict(
{
diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py
index 9be95514c1..7971f46e56 100644
--- a/nerfstudio/data/datamanagers/base_datamanager.py
+++ b/nerfstudio/data/datamanagers/base_datamanager.py
@@ -317,7 +317,7 @@ class VanillaDataManagerConfig(DataManagerConfig):
_target: Type = field(default_factory=lambda: VanillaDataManager)
"""Target class to instantiate."""
- dataparser: AnnotatedDataParserUnion = BlenderDataParserConfig()
+ dataparser: AnnotatedDataParserUnion = field(default_factory=lambda: BlenderDataParserConfig())
"""Specifies the dataparser used to unpack the data."""
train_num_rays_per_batch: int = 1024
"""Number of rays per batch to use per training iteration."""
@@ -345,7 +345,7 @@ class VanillaDataManagerConfig(DataManagerConfig):
"""Size of patch to sample from. If > 1, patch-based sampling will be used."""
camera_optimizer: Optional[CameraOptimizerConfig] = field(default=None)
"""Deprecated, has been moved to the model config."""
- pixel_sampler: PixelSamplerConfig = PixelSamplerConfig()
+ pixel_sampler: PixelSamplerConfig = field(default_factory=lambda: PixelSamplerConfig())
"""Specifies the pixel sampler used to sample pixels from images."""
def __post_init__(self):
diff --git a/nerfstudio/data/dataparsers/base_dataparser.py b/nerfstudio/data/dataparsers/base_dataparser.py
index 80fab739f4..5cf1e6bdbf 100644
--- a/nerfstudio/data/dataparsers/base_dataparser.py
+++ b/nerfstudio/data/dataparsers/base_dataparser.py
@@ -57,7 +57,7 @@ class DataparserOutputs:
"""Camera object storing collection of camera information in dataset."""
alpha_color: Optional[Float[Tensor, "3"]] = None
"""Color of dataset background."""
- scene_box: SceneBox = SceneBox(aabb=torch.tensor([[-1, -1, -1], [1, 1, 1]]))
+ scene_box: SceneBox = field(default_factory=lambda: SceneBox(aabb=torch.tensor([[-1, -1, -1], [1, 1, 1]])))
"""Scene box of dataset. Used to bound the scene or provide the scene scale depending on model."""
mask_filenames: Optional[List[Path]] = None
"""Filenames for any masks that are required"""
diff --git a/nerfstudio/models/base_surface_model.py b/nerfstudio/models/base_surface_model.py
index 638aa27130..97bde9d9f4 100644
--- a/nerfstudio/models/base_surface_model.py
+++ b/nerfstudio/models/base_surface_model.py
@@ -79,7 +79,7 @@ class SurfaceModelConfig(ModelConfig):
"""Monocular normal consistency loss multiplier."""
mono_depth_loss_mult: float = 0.0
"""Monocular depth consistency loss multiplier."""
- sdf_field: SDFFieldConfig = SDFFieldConfig()
+ sdf_field: SDFFieldConfig = field(default_factory=lambda: SDFFieldConfig())
"""Config for SDF Field"""
background_model: Literal["grid", "mlp", "none"] = "mlp"
"""background models"""
diff --git a/nerfstudio/models/nerfacto.py b/nerfstudio/models/nerfacto.py
index df8eed9b65..667d23eb81 100644
--- a/nerfstudio/models/nerfacto.py
+++ b/nerfstudio/models/nerfacto.py
@@ -127,7 +127,7 @@ class NerfactoModelConfig(ModelConfig):
"""Which implementation to use for the model."""
appearance_embed_dim: int = 32
"""Dimension of the appearance embedding."""
- camera_optimizer: CameraOptimizerConfig = CameraOptimizerConfig(mode="SO3xR3")
+ camera_optimizer: CameraOptimizerConfig = field(default_factory=lambda: CameraOptimizerConfig(mode="SO3xR3"))
"""Config of the camera optimizer to use"""
diff --git a/nerfstudio/models/tensorf.py b/nerfstudio/models/tensorf.py
index 53d986337e..0ca56f10de 100644
--- a/nerfstudio/models/tensorf.py
+++ b/nerfstudio/models/tensorf.py
@@ -90,7 +90,7 @@ class TensoRFModelConfig(ModelConfig):
tensorf_encoding: Literal["triplane", "vm", "cp"] = "vm"
regularization: Literal["none", "l1", "tv"] = "l1"
"""Regularization method used in tensorf paper"""
- camera_optimizer: CameraOptimizerConfig = CameraOptimizerConfig(mode="SO3xR3")
+ camera_optimizer: CameraOptimizerConfig = field(default_factory=lambda: CameraOptimizerConfig(mode="SO3xR3"))
"""Config of the camera optimizer to use"""
use_gradient_scaling: bool = False
"""Use gradient scaler where the gradients are lower for points closer to the camera."""
diff --git a/nerfstudio/pipelines/base_pipeline.py b/nerfstudio/pipelines/base_pipeline.py
index 10aca0e70a..345a39d4b6 100644
--- a/nerfstudio/pipelines/base_pipeline.py
+++ b/nerfstudio/pipelines/base_pipeline.py
@@ -224,9 +224,9 @@ class VanillaPipelineConfig(cfg.InstantiateConfig):
_target: Type = field(default_factory=lambda: VanillaPipeline)
"""target class to instantiate"""
- datamanager: DataManagerConfig = DataManagerConfig()
+ datamanager: DataManagerConfig = field(default_factory=lambda: DataManagerConfig())
"""specifies the datamanager config"""
- model: ModelConfig = ModelConfig()
+ model: ModelConfig = field(default_factory=lambda: ModelConfig())
"""specifies the model config"""
From 49503c2175a71c3d71b75d5702c4ff9e8b0342cb Mon Sep 17 00:00:00 2001
From: Jose <34888496+Jerry-Master@users.noreply.github.com>
Date: Fri, 1 Dec 2023 01:04:01 +0100
Subject: [PATCH 079/101] Mps fix (#2436)
* mps bugfix
* removed docs change
* Double quotes
---------
Co-authored-by: Brent Yi
---
nerfstudio/engine/trainer.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py
index fe4aa85cea..4cc296063e 100644
--- a/nerfstudio/engine/trainer.py
+++ b/nerfstudio/engine/trainer.py
@@ -464,6 +464,7 @@ def train_iteration(self, step: int) -> TRAIN_INTERATION_OUTPUT:
self.optimizers.zero_grad_all()
cpu_or_cuda_str: str = self.device.split(":")[0]
+ cpu_or_cuda_str = "cpu" if cpu_or_cuda_str == "mps" else cpu_or_cuda_str
assert (
self.gradient_accumulation_steps > 0
), f"gradient_accumulation_steps must be > 0, not {self.gradient_accumulation_steps}"
From 0cb410001b1fff668e51b12e7ff579711c1187f9 Mon Sep 17 00:00:00 2001
From: Panteleris Paschalis
Date: Fri, 1 Dec 2023 03:11:11 +0200
Subject: [PATCH 080/101] Fixes bug in PairPixelSampler when working with
masked dataset (#2368)
* Fixes bug in PairPixelSampler when working with masked dataset
* Formatting
* Black formatting now ok.
---------
Co-authored-by: AdamRashid96 <71362382+AdamRashid96@users.noreply.github.com>
Co-authored-by: Brent Yi
---
nerfstudio/data/pixel_samplers.py | 24 +++++++++++-------------
1 file changed, 11 insertions(+), 13 deletions(-)
diff --git a/nerfstudio/data/pixel_samplers.py b/nerfstudio/data/pixel_samplers.py
index 9234a5d420..07bab5d826 100644
--- a/nerfstudio/data/pixel_samplers.py
+++ b/nerfstudio/data/pixel_samplers.py
@@ -17,13 +17,7 @@
"""
import random
-
-import torch
-from jaxtyping import Int
-from torch import Tensor
-
from dataclasses import dataclass, field
-from nerfstudio.data.utils.pixel_sampling_utils import erode_mask
from typing import (
Dict,
Optional,
@@ -31,9 +25,14 @@
Union,
)
+import torch
+from jaxtyping import Int
+from torch import Tensor
+
from nerfstudio.configs.base_config import (
InstantiateConfig,
)
+from nerfstudio.data.utils.pixel_sampling_utils import erode_mask
@dataclass
@@ -398,19 +397,18 @@ def sample_method( # pylint: disable=no-self-use
device: Union[torch.device, str] = "cpu",
) -> Int[Tensor, "batch_size 3"]:
rays_to_sample = self.rays_to_sample
+ if batch_size is not None:
+ assert (
+ int(batch_size) % 2 == 0
+ ), f"PairPixelSampler can only return batch sizes in multiples of two (got {batch_size})"
+ rays_to_sample = batch_size // 2
+
if isinstance(mask, Tensor):
m = erode_mask(mask.permute(0, 3, 1, 2).float(), pixel_radius=self.radius)
nonzero_indices = torch.nonzero(m[:, 0], as_tuple=False).to(device)
chosen_indices = random.sample(range(len(nonzero_indices)), k=rays_to_sample)
indices = nonzero_indices[chosen_indices]
else:
- rays_to_sample = self.rays_to_sample
- if batch_size is not None:
- assert (
- int(batch_size) % 2 == 0
- ), f"PairPixelSampler can only return batch sizes in multiples of two (got {batch_size})"
- rays_to_sample = batch_size // 2
-
s = (rays_to_sample, 1)
ns = torch.randint(0, num_images, s, dtype=torch.long, device=device)
hs = torch.randint(self.radius, image_height - self.radius, s, dtype=torch.long, device=device)
From 4c627edbacceec7d6e9c001e94ae3469b59c7a91 Mon Sep 17 00:00:00 2001
From: Ethan Weber
Date: Thu, 30 Nov 2023 19:16:42 -0800
Subject: [PATCH 081/101] Encoder + MLP combo (#2063)
* adding encoder + mlp combo
* update documentation
* minor fixes
* fixed issue when tcnn isn't installed
---------
Co-authored-by: Brent Yi
---
nerfstudio/field_components/encodings.py | 88 ++++++++----
nerfstudio/field_components/mlp.py | 176 +++++++++++++++++++----
nerfstudio/fields/nerfacto_field.py | 13 +-
3 files changed, 215 insertions(+), 62 deletions(-)
diff --git a/nerfstudio/field_components/encodings.py b/nerfstudio/field_components/encodings.py
index 15a2ce3ab4..dc410673b9 100644
--- a/nerfstudio/field_components/encodings.py
+++ b/nerfstudio/field_components/encodings.py
@@ -48,6 +48,11 @@ def __init__(self, in_dim: int) -> None:
raise ValueError("Input dimension should be greater than zero")
super().__init__(in_dim=in_dim)
+ @classmethod
+ def get_tcnn_encoding_config(cls) -> dict:
+ """Get the encoding configuration for tcnn if implemented"""
+ raise NotImplementedError("Encoding does not have a TCNN implementation")
+
@abstractmethod
def forward(self, in_tensor: Shaped[Tensor, "*bs input_dim"]) -> Shaped[Tensor, "*bs output_dim"]:
"""Call forward and returns and processed tensor
@@ -126,14 +131,20 @@ def __init__(
if implementation == "tcnn" and not TCNN_EXISTS:
print_tcnn_speed_warning("NeRFEncoding")
elif implementation == "tcnn":
- encoding_config = {"otype": "Frequency", "n_frequencies": num_frequencies}
assert min_freq_exp == 0, "tcnn only supports min_freq_exp = 0"
assert max_freq_exp == num_frequencies - 1, "tcnn only supports max_freq_exp = num_frequencies - 1"
+ encoding_config = self.get_tcnn_encoding_config(num_frequencies=self.num_frequencies)
self.tcnn_encoding = tcnn.Encoding(
n_input_dims=in_dim,
encoding_config=encoding_config,
)
+ @classmethod
+ def get_tcnn_encoding_config(cls, num_frequencies) -> dict:
+ """Get the encoding configuration for tcnn if implemented"""
+ encoding_config = {"otype": "Frequency", "n_frequencies": num_frequencies}
+ return encoding_config
+
def get_out_dim(self) -> int:
if self.in_dim is None:
raise ValueError("Input dimension has not been set")
@@ -327,48 +338,67 @@ def __init__(
) -> None:
super().__init__(in_dim=3)
self.num_levels = num_levels
+ self.min_res = min_res
self.features_per_level = features_per_level
+ self.hash_init_scale = hash_init_scale
self.log2_hashmap_size = log2_hashmap_size
self.hash_table_size = 2**log2_hashmap_size
levels = torch.arange(num_levels)
- growth_factor = np.exp((np.log(max_res) - np.log(min_res)) / (num_levels - 1)) if num_levels > 1 else 1
- self.scalings = torch.floor(min_res * growth_factor**levels)
+ self.growth_factor = np.exp((np.log(max_res) - np.log(min_res)) / (num_levels - 1)) if num_levels > 1 else 1
+ self.scalings = torch.floor(min_res * self.growth_factor**levels)
self.hash_offset = levels * self.hash_table_size
self.tcnn_encoding = None
self.hash_table = torch.empty(0)
- if implementation == "tcnn" and not TCNN_EXISTS:
+ if implementation == "torch":
+ self.build_nn_modules()
+ elif implementation == "tcnn" and not TCNN_EXISTS:
print_tcnn_speed_warning("HashEncoding")
- implementation = "torch"
-
- if implementation == "tcnn":
- encoding_config = {
- "otype": "HashGrid",
- "n_levels": self.num_levels,
- "n_features_per_level": self.features_per_level,
- "log2_hashmap_size": self.log2_hashmap_size,
- "base_resolution": min_res,
- "per_level_scale": growth_factor,
- }
- if interpolation is not None:
- encoding_config["interpolation"] = interpolation
-
+ self.build_nn_modules()
+ elif implementation == "tcnn":
+ encoding_config = self.get_tcnn_encoding_config(
+ num_levels=self.num_levels,
+ features_per_level=self.features_per_level,
+ log2_hashmap_size=self.log2_hashmap_size,
+ min_res=self.min_res,
+ growth_factor=self.growth_factor,
+ interpolation=interpolation,
+ )
self.tcnn_encoding = tcnn.Encoding(
n_input_dims=3,
encoding_config=encoding_config,
)
- elif implementation == "torch":
- self.hash_table = torch.rand(size=(self.hash_table_size * num_levels, features_per_level)) * 2 - 1
- self.hash_table *= hash_init_scale
- self.hash_table = nn.Parameter(self.hash_table)
if self.tcnn_encoding is None:
assert (
interpolation is None or interpolation == "Linear"
), f"interpolation '{interpolation}' is not supported for torch encoding backend"
+ def build_nn_modules(self) -> None:
+ """Initialize the torch version of the hash encoding."""
+ self.hash_table = torch.rand(size=(self.hash_table_size * self.num_levels, self.features_per_level)) * 2 - 1
+ self.hash_table *= self.hash_init_scale
+ self.hash_table = nn.Parameter(self.hash_table)
+
+ @classmethod
+ def get_tcnn_encoding_config(
+ cls, num_levels, features_per_level, log2_hashmap_size, min_res, growth_factor, interpolation=None
+ ) -> dict:
+ """Get the encoding configuration for tcnn if implemented"""
+ encoding_config = {
+ "otype": "HashGrid",
+ "n_levels": num_levels,
+ "n_features_per_level": features_per_level,
+ "log2_hashmap_size": log2_hashmap_size,
+ "base_resolution": min_res,
+ "per_level_scale": growth_factor,
+ }
+ if interpolation is not None:
+ encoding_config["interpolation"] = interpolation
+ return encoding_config
+
def get_out_dim(self) -> int:
return self.num_levels * self.features_per_level
@@ -745,15 +775,21 @@ def __init__(self, levels: int = 4, implementation: Literal["tcnn", "torch"] = "
if implementation == "tcnn" and not TCNN_EXISTS:
print_tcnn_speed_warning("SHEncoding")
elif implementation == "tcnn":
- encoding_config = {
- "otype": "SphericalHarmonics",
- "degree": levels,
- }
+ encoding_config = self.get_tcnn_encoding_config(levels=self.levels)
self.tcnn_encoding = tcnn.Encoding(
n_input_dims=3,
encoding_config=encoding_config,
)
+ @classmethod
+ def get_tcnn_encoding_config(cls, levels) -> dict:
+ """Get the encoding configuration for tcnn if implemented"""
+ encoding_config = {
+ "otype": "SphericalHarmonics",
+ "degree": levels,
+ }
+ return encoding_config
+
def get_out_dim(self) -> int:
return self.levels**2
diff --git a/nerfstudio/field_components/mlp.py b/nerfstudio/field_components/mlp.py
index 2585a49e4e..94e6b1eb49 100644
--- a/nerfstudio/field_components/mlp.py
+++ b/nerfstudio/field_components/mlp.py
@@ -17,12 +17,14 @@
"""
from typing import Literal, Optional, Set, Tuple, Union
+import numpy as np
import torch
from jaxtyping import Float
from torch import Tensor, nn
from nerfstudio.field_components.base_field_component import FieldComponent
from nerfstudio.utils.printing import print_tcnn_speed_warning
+from nerfstudio.field_components.encodings import HashEncoding
from nerfstudio.utils.rich_utils import CONSOLE
from nerfstudio.utils.external import TCNN_EXISTS, tcnn
@@ -66,6 +68,7 @@ class MLP(FieldComponent):
out_dim: Output layer dimension. Uses layer_width if None.
activation: intermediate layer activation function.
out_activation: output activation function.
+ implementation: Implementation of hash encoding. Fallback to torch if tcnn not available.
"""
def __init__(
@@ -98,39 +101,47 @@ def __init__(
print_tcnn_speed_warning("MLP")
self.build_nn_modules()
elif implementation == "tcnn":
- activation_str = activation_to_tcnn_string(activation)
- output_activation_str = activation_to_tcnn_string(out_activation)
- if layer_width in [16, 32, 64, 128]:
- network_config = {
- "otype": "FullyFusedMLP",
- "activation": activation_str,
- "output_activation": output_activation_str,
- "n_neurons": layer_width,
- "n_hidden_layers": num_layers - 1,
- }
- else:
- CONSOLE.line()
- CONSOLE.print("[bold yellow]WARNING: Using slower TCNN CutlassMLP instead of TCNN FullyFusedMLP")
- CONSOLE.print(
- "[bold yellow]Use layer width of 16, 32, 64, or 128 to use the faster TCNN FullyFusedMLP."
- )
- CONSOLE.line()
- network_config = {
- "otype": "CutlassMLP",
- "activation": activation_str,
- "output_activation": output_activation_str,
- "n_neurons": layer_width,
- "n_hidden_layers": num_layers - 1,
- }
-
+ network_config = self.get_tcnn_network_config(
+ activation=self.activation,
+ out_activation=self.out_activation,
+ layer_width=self.layer_width,
+ num_layers=self.num_layers,
+ )
self.tcnn_encoding = tcnn.Network(
n_input_dims=in_dim,
- n_output_dims=out_dim,
+ n_output_dims=self.out_dim,
network_config=network_config,
)
+ @classmethod
+ def get_tcnn_network_config(cls, activation, out_activation, layer_width, num_layers) -> dict:
+ """Get the network configuration for tcnn if implemented"""
+ activation_str = activation_to_tcnn_string(activation)
+ output_activation_str = activation_to_tcnn_string(out_activation)
+ if layer_width in [16, 32, 64, 128]:
+ network_config = {
+ "otype": "FullyFusedMLP",
+ "activation": activation_str,
+ "output_activation": output_activation_str,
+ "n_neurons": layer_width,
+ "n_hidden_layers": num_layers - 1,
+ }
+ else:
+ CONSOLE.line()
+ CONSOLE.print("[bold yellow]WARNING: Using slower TCNN CutlassMLP instead of TCNN FullyFusedMLP")
+ CONSOLE.print("[bold yellow]Use layer width of 16, 32, 64, or 128 to use the faster TCNN FullyFusedMLP.")
+ CONSOLE.line()
+ network_config = {
+ "otype": "CutlassMLP",
+ "activation": activation_str,
+ "output_activation": output_activation_str,
+ "n_neurons": layer_width,
+ "n_hidden_layers": num_layers - 1,
+ }
+ return network_config
+
def build_nn_modules(self) -> None:
- """Initialize multi-layer perceptron."""
+ """Initialize the torch version of the multi-layer perceptron."""
layers = []
if self.num_layers == 1:
layers.append(nn.Linear(self.in_dim, self.out_dim))
@@ -171,3 +182,114 @@ def forward(self, in_tensor: Float[Tensor, "*bs in_dim"]) -> Float[Tensor, "*bs
if self.tcnn_encoding is not None:
return self.tcnn_encoding(in_tensor)
return self.pytorch_fwd(in_tensor)
+
+
+class MLPWithHashEncoding(FieldComponent):
+ """Multilayer perceptron with hash encoding
+
+ Args:
+ num_levels: Number of feature grids.
+ min_res: Resolution of smallest feature grid.
+ max_res: Resolution of largest feature grid.
+ log2_hashmap_size: Size of hash map is 2^log2_hashmap_size.
+ features_per_level: Number of features per level.
+ hash_init_scale: Value to initialize hash grid.
+ interpolation: Interpolation override for tcnn hashgrid. Not supported for torch unless linear.
+ num_layers: Number of network layers
+ layer_width: Width of each MLP layer
+ out_dim: Output layer dimension. Uses layer_width if None.
+ activation: intermediate layer activation function.
+ out_activation: output activation function.
+ implementation: Implementation of hash encoding. Fallback to torch if tcnn not available.
+ """
+
+ def __init__(
+ self,
+ num_levels: int = 16,
+ min_res: int = 16,
+ max_res: int = 1024,
+ log2_hashmap_size: int = 19,
+ features_per_level: int = 2,
+ hash_init_scale: float = 0.001,
+ interpolation: Optional[Literal["Nearest", "Linear", "Smoothstep"]] = None,
+ num_layers: int = 2,
+ layer_width: int = 64,
+ out_dim: Optional[int] = None,
+ skip_connections: Optional[Tuple[int]] = None,
+ activation: Optional[nn.Module] = nn.ReLU(),
+ out_activation: Optional[nn.Module] = None,
+ implementation: Literal["tcnn", "torch"] = "torch",
+ ) -> None:
+ super().__init__()
+ self.in_dim = 3
+
+ self.num_levels = num_levels
+ self.min_res = min_res
+ self.max_res = max_res
+ self.features_per_level = features_per_level
+ self.hash_init_scale = hash_init_scale
+ self.log2_hashmap_size = log2_hashmap_size
+ self.hash_table_size = 2**log2_hashmap_size
+
+ self.growth_factor = np.exp((np.log(max_res) - np.log(min_res)) / (num_levels - 1)) if num_levels > 1 else 1
+
+ self.out_dim = out_dim if out_dim is not None else layer_width
+ self.num_layers = num_layers
+ self.layer_width = layer_width
+ self.skip_connections = skip_connections
+ self._skip_connections: Set[int] = set(skip_connections) if skip_connections else set()
+ self.activation = activation
+ self.out_activation = out_activation
+ self.net = None
+
+ self.tcnn_encoding = None
+ if implementation == "torch":
+ self.build_nn_modules()
+ elif implementation == "tcnn" and not TCNN_EXISTS:
+ print_tcnn_speed_warning("MLPWithHashEncoding")
+ self.build_nn_modules()
+ elif implementation == "tcnn":
+ self.model = tcnn.NetworkWithInputEncoding(
+ n_input_dims=self.in_dim,
+ n_output_dims=self.out_dim,
+ encoding_config=HashEncoding.get_tcnn_encoding_config(
+ num_levels=self.num_levels,
+ features_per_level=self.features_per_level,
+ log2_hashmap_size=self.log2_hashmap_size,
+ min_res=self.min_res,
+ growth_factor=self.growth_factor,
+ interpolation=interpolation,
+ ),
+ network_config=MLP.get_tcnn_network_config(
+ activation=self.activation,
+ out_activation=self.out_activation,
+ layer_width=self.layer_width,
+ num_layers=self.num_layers,
+ ),
+ )
+
+ def build_nn_modules(self) -> None:
+ """Initialize the torch version of the MLP with hash encoding."""
+ encoder = HashEncoding(
+ num_levels=self.num_levels,
+ min_res=self.min_res,
+ max_res=self.max_res,
+ log2_hashmap_size=self.log2_hashmap_size,
+ features_per_level=self.features_per_level,
+ hash_init_scale=self.hash_init_scale,
+ implementation="torch",
+ )
+ mlp = MLP(
+ in_dim=encoder.get_out_dim(),
+ num_layers=self.num_layers,
+ layer_width=self.layer_width,
+ out_dim=self.out_dim,
+ skip_connections=self.skip_connections,
+ activation=self.activation,
+ out_activation=self.out_activation,
+ implementation="torch",
+ )
+ self.model = torch.nn.Sequential(encoder, mlp)
+
+ def forward(self, in_tensor: Float[Tensor, "*bs in_dim"]) -> Float[Tensor, "*bs out_dim"]:
+ return self.model(in_tensor)
diff --git a/nerfstudio/fields/nerfacto_field.py b/nerfstudio/fields/nerfacto_field.py
index f215a00458..910bba8465 100644
--- a/nerfstudio/fields/nerfacto_field.py
+++ b/nerfstudio/fields/nerfacto_field.py
@@ -26,7 +26,7 @@
from nerfstudio.data.scene_box import SceneBox
from nerfstudio.field_components.activations import trunc_exp
from nerfstudio.field_components.embedding import Embedding
-from nerfstudio.field_components.encodings import HashEncoding, NeRFEncoding, SHEncoding
+from nerfstudio.field_components.encodings import NeRFEncoding, SHEncoding
from nerfstudio.field_components.field_heads import (
FieldHeadNames,
PredNormalsFieldHead,
@@ -35,13 +35,13 @@
TransientRGBFieldHead,
UncertaintyFieldHead,
)
-from nerfstudio.field_components.mlp import MLP
+from nerfstudio.field_components.mlp import MLP, MLPWithHashEncoding
from nerfstudio.field_components.spatial_distortions import SpatialDistortion
from nerfstudio.fields.base_field import Field, get_normalized_directions
class NerfactoField(Field):
- """Compound Field that uses TCNN
+ """Compound Field
Args:
aabb: parameters of scene aabb bounds
@@ -127,16 +127,12 @@ def __init__(
in_dim=3, num_frequencies=2, min_freq_exp=0, max_freq_exp=2 - 1, implementation=implementation
)
- self.mlp_base_grid = HashEncoding(
+ self.mlp_base = MLPWithHashEncoding(
num_levels=num_levels,
min_res=base_res,
max_res=max_res,
log2_hashmap_size=log2_hashmap_size,
features_per_level=features_per_level,
- implementation=implementation,
- )
- self.mlp_base_mlp = MLP(
- in_dim=self.mlp_base_grid.get_out_dim(),
num_layers=num_layers,
layer_width=hidden_dim,
out_dim=1 + self.geo_feat_dim,
@@ -144,7 +140,6 @@ def __init__(
out_activation=None,
implementation=implementation,
)
- self.mlp_base = torch.nn.Sequential(self.mlp_base_grid, self.mlp_base_mlp)
# transients
if self.use_transient_embedding:
From 64f0b2547ba02b1e2aeef719de95b4e7b1aa188a Mon Sep 17 00:00:00 2001
From: AdamRashid96 <71362382+AdamRashid96@users.noreply.github.com>
Date: Wed, 6 Dec 2023 22:56:39 -0800
Subject: [PATCH 082/101] Ns-process-data bug with dev version of colmap
(#2651)
* only parse digit and decimal
* formatting
---
nerfstudio/process_data/colmap_utils.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/nerfstudio/process_data/colmap_utils.py b/nerfstudio/process_data/colmap_utils.py
index 83188c005b..27d6e86150 100644
--- a/nerfstudio/process_data/colmap_utils.py
+++ b/nerfstudio/process_data/colmap_utils.py
@@ -55,7 +55,9 @@ def get_colmap_version(colmap_cmd: str, default_version=3.8) -> float:
assert output is not None
for line in output.split("\n"):
if line.startswith("COLMAP"):
- return float(line.split(" ")[1])
+ version = line.split(" ")[1]
+ version = "".join([c for c in version if c.isdigit() or c == "."])
+ return float(version)
CONSOLE.print(f"[bold red]Could not find COLMAP version. Using default {default_version}")
return default_version
From 4c966b9bd4bab5a4089e5ca767b4a6af9b3e1bf6 Mon Sep 17 00:00:00 2001
From: omahs <73983677+omahs@users.noreply.github.com>
Date: Fri, 8 Dec 2023 12:27:05 +0100
Subject: [PATCH 083/101] Fix typos (#2655)
* fix typo
* fix typo
* fix typos
* fix typo
---
docs/developer_guides/config.md | 2 +-
docs/developer_guides/new_methods.md | 2 +-
docs/developer_guides/viewer/local_viewer.md | 2 +-
docs/index.md | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/docs/developer_guides/config.md b/docs/developer_guides/config.md
index bf216278f3..acf6914d7f 100644
--- a/docs/developer_guides/config.md
+++ b/docs/developer_guides/config.md
@@ -89,7 +89,7 @@ Often times, you just want to play with the parameters of an existing model with
ns-train --help
```
-- List out all exist configurable parameters for `{METHOD_NAME}`
+- List out all existing configurable parameters for `{METHOD_NAME}`
```bash
ns-train {METHOD_NAME} --help
diff --git a/docs/developer_guides/new_methods.md b/docs/developer_guides/new_methods.md
index 4b56d85810..dd1ef31e76 100644
--- a/docs/developer_guides/new_methods.md
+++ b/docs/developer_guides/new_methods.md
@@ -132,7 +132,7 @@ finally run the following to register the dataparser.
pip install -e .
```
-Similarly to the method develomement, you can also use environment variables to register dataparsers.
+Similarly to the method development, you can also use environment variables to register dataparsers.
Use the `NERFSTUDIO_DATAPARSER_CONFIGS` environment variable:
```
diff --git a/docs/developer_guides/viewer/local_viewer.md b/docs/developer_guides/viewer/local_viewer.md
index d5912bd891..2931f8c0d6 100644
--- a/docs/developer_guides/viewer/local_viewer.md
+++ b/docs/developer_guides/viewer/local_viewer.md
@@ -1,6 +1,6 @@
# Local Server
-If you are unable to connect to `https://viewer.nerf.studio`, want to use Safari, or want develop the viewer codebase, you can launch your own local viewer.
+If you are unable to connect to `https://viewer.nerf.studio`, want to use Safari, or want to develop the viewer codebase, you can launch your own local viewer.
## Installing Dependencies
diff --git a/docs/index.md b/docs/index.md
index 9e294c3ac2..6b160fcecc 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -135,7 +135,7 @@ This documentation is organized into 3 parts:
### Included Methods
-- [**Nerfacto**](nerfology/methods/nerfacto.md): Recommended method, integrates mutiple methods into one.
+- [**Nerfacto**](nerfology/methods/nerfacto.md): Recommended method, integrates multiple methods into one.
- [Instant-NGP](nerfology/methods/instant_ngp.md): Instant Neural Graphics Primitives with a Multiresolution Hash Encoding
- [NeRF](nerfology/methods/nerf.md): OG Neural Radiance Fields
- [Mip-NeRF](nerfology/methods/mipnerf.md): A Multiscale Representation for Anti-Aliasing Neural Radiance Fields
From 1d9cc7dd8a4f1c5fdc8f05dd4ea3b5c3ef6bb85a Mon Sep 17 00:00:00 2001
From: Alexandru Kis
Date: Fri, 8 Dec 2023 18:58:51 +0200
Subject: [PATCH 084/101] Update hloc and add new matcher types (#2658)
* Update hloc and add new matcher types
* Update dockerfile comment to better reflect what the command is doing
---------
Co-authored-by: mfischer
---
Dockerfile | 3 ++-
.../colmap_converter_to_nerfstudio_dataset.py | 2 ++
nerfstudio/process_data/hloc_utils.py | 9 +++++++-
nerfstudio/process_data/process_data_utils.py | 23 +++++++++++++++++--
4 files changed, 33 insertions(+), 4 deletions(-)
diff --git a/Dockerfile b/Dockerfile
index 300d00df3a..8c9fe4f31c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -135,9 +135,10 @@ RUN git clone --branch v0.4.0 --recursive https://github.com/colmap/pycolmap.git
python3.10 -m pip install . && \
cd ..
-# Install hloc master (last release (1.3) is too old) as alternative feature detector and matcher option for nerfstudio.
+# Install hloc 1.4 as alternative feature detector and matcher option for nerfstudio.
RUN git clone --branch master --recursive https://github.com/cvg/Hierarchical-Localization.git && \
cd Hierarchical-Localization && \
+ git checkout v1.4 && \
python3.10 -m pip install -e . && \
cd ..
diff --git a/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py b/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py
index b4dccbdd82..924c4b752d 100644
--- a/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py
+++ b/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py
@@ -66,6 +66,8 @@ class ColmapConverterToNerfstudioDataset(BaseConverterToNerfstudioDataset):
"NN-ratio",
"NN-mutual",
"adalam",
+ "disk+lightglue",
+ "superpoint+lightglue",
] = "any"
"""Matching algorithm."""
num_downscales: int = 3
diff --git a/nerfstudio/process_data/hloc_utils.py b/nerfstudio/process_data/hloc_utils.py
index a833f51edd..b7167efd88 100644
--- a/nerfstudio/process_data/hloc_utils.py
+++ b/nerfstudio/process_data/hloc_utils.py
@@ -60,7 +60,14 @@ def run_hloc(
"sift", "superpoint_aachen", "superpoint_max", "superpoint_inloc", "r2d2", "d2net-ss", "sosnet", "disk"
] = "superpoint_aachen",
matcher_type: Literal[
- "superglue", "superglue-fast", "NN-superpoint", "NN-ratio", "NN-mutual", "adalam"
+ "superglue",
+ "superglue-fast",
+ "NN-superpoint",
+ "NN-ratio",
+ "NN-mutual",
+ "adalam",
+ "disk+lightglue",
+ "superpoint+lightglue",
] = "superglue",
num_matched: int = 50,
refine_pixsfm: bool = False,
diff --git a/nerfstudio/process_data/process_data_utils.py b/nerfstudio/process_data/process_data_utils.py
index 334dab4de0..fe24e55f27 100644
--- a/nerfstudio/process_data/process_data_utils.py
+++ b/nerfstudio/process_data/process_data_utils.py
@@ -477,7 +477,16 @@ def find_tool_feature_matcher_combination(
"disk",
],
matcher_type: Literal[
- "any", "NN", "superglue", "superglue-fast", "NN-superpoint", "NN-ratio", "NN-mutual", "adalam"
+ "any",
+ "NN",
+ "superglue",
+ "superglue-fast",
+ "NN-superpoint",
+ "NN-ratio",
+ "NN-mutual",
+ "adalam",
+ "disk+lightglue",
+ "superpoint+lightglue",
],
) -> Union[
Tuple[None, None, None],
@@ -493,7 +502,17 @@ def find_tool_feature_matcher_combination(
"sosnet",
"disk",
],
- Literal["NN", "superglue", "superglue-fast", "NN-superpoint", "NN-ratio", "NN-mutual", "adalam"],
+ Literal[
+ "NN",
+ "superglue",
+ "superglue-fast",
+ "NN-superpoint",
+ "NN-ratio",
+ "NN-mutual",
+ "adalam",
+ "disk+lightglue",
+ "superpoint+lightglue",
+ ],
],
]:
"""Find a valid combination of sfm tool, feature type, and matcher type.
From 390b63373dd65c7ecab42e43392fa93420d1e6b0 Mon Sep 17 00:00:00 2001
From: Paul Wais
Date: Mon, 11 Dec 2023 05:55:36 -0800
Subject: [PATCH 085/101] Trainer can now always save dataparser transform
(#2653)
* Trainer can now always save dataparser transform
* appease the linter
---------
Co-authored-by: Brent Yi
---
nerfstudio/engine/trainer.py | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py
index 4cc296063e..2930db892e 100644
--- a/nerfstudio/engine/trainer.py
+++ b/nerfstudio/engine/trainer.py
@@ -28,7 +28,6 @@
import torch
from nerfstudio.configs.experiment_config import ExperimentConfig
-from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager
from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes, TrainingCallbackLocation
from nerfstudio.engine.optimizers import Optimizers
from nerfstudio.pipelines.base_pipeline import VanillaPipeline
@@ -225,11 +224,9 @@ def train(self) -> None:
"""Train the model."""
assert self.pipeline.datamanager.train_dataset is not None, "Missing DatsetInputs"
- # don't want to call save_dataparser_transform if pipeline's datamanager does not have a dataparser
- if isinstance(self.pipeline.datamanager, VanillaDataManager):
- self.pipeline.datamanager.train_dataparser_outputs.save_dataparser_transform(
- self.base_dir / "dataparser_transforms.json"
- )
+ self.pipeline.datamanager.train_dataparser_outputs.save_dataparser_transform(
+ self.base_dir / "dataparser_transforms.json"
+ )
self._init_viewer_state()
with TimeWriter(writer, EventName.TOTAL_TRAIN_TIME):
From 73fc3dcd6305001818f45ca7a1392de330a2c337 Mon Sep 17 00:00:00 2001
From: Brent Yi
Date: Mon, 11 Dec 2023 18:08:17 +0000
Subject: [PATCH 086/101] Update dependencies for M1 Macs (#2665)
* Update dependencies for M1 Macs
* import newrawpy as rawpy
* Comment
* Add back rawpy for Linux
* Run black
---
nerfstudio/process_data/process_data_utils.py | 25 +++++++++++++++----
pyproject.toml | 7 ++++--
2 files changed, 25 insertions(+), 7 deletions(-)
diff --git a/nerfstudio/process_data/process_data_utils.py b/nerfstudio/process_data/process_data_utils.py
index fe24e55f27..c1946305b3 100644
--- a/nerfstudio/process_data/process_data_utils.py
+++ b/nerfstudio/process_data/process_data_utils.py
@@ -24,9 +24,13 @@
import cv2
import imageio
-import numpy as np
-import rawpy
+try:
+ import rawpy
+except ImportError:
+ import newrawpy as rawpy # type: ignore
+
+import numpy as np
from nerfstudio.utils.rich_utils import CONSOLE, status
from nerfstudio.utils.scripts import run_command
@@ -352,7 +356,11 @@ def copy_and_upscale_polycam_depth_maps_list(
depth_dir.mkdir(parents=True, exist_ok=True)
# copy and upscale them to new directory
- with status(msg="[bold yellow] Upscaling depth maps...", spinner="growVertical", verbose=verbose):
+ with status(
+ msg="[bold yellow] Upscaling depth maps...",
+ spinner="growVertical",
+ verbose=verbose,
+ ):
upscale_factor = 2**POLYCAM_UPSCALING_TIMES
assert upscale_factor > 1
assert isinstance(upscale_factor, int)
@@ -437,7 +445,11 @@ def downscale_images(
if num_downscales == 0:
return "No downscaling performed."
- with status(msg="[bold yellow]Downscaling images...", spinner="growVertical", verbose=verbose):
+ with status(
+ msg="[bold yellow]Downscaling images...",
+ spinner="growVertical",
+ verbose=verbose,
+ ):
downscale_factors = [2**i for i in range(num_downscales + 1)[1:]]
for downscale_factor in downscale_factors:
assert downscale_factor > 1
@@ -600,7 +612,10 @@ def generate_crop_mask(height: int, width: int, crop_factor: Tuple[float, float,
def generate_mask(
- height: int, width: int, crop_factor: Tuple[float, float, float, float], percent_radius: float
+ height: int,
+ width: int,
+ crop_factor: Tuple[float, float, float, float],
+ percent_radius: float,
) -> Optional[np.ndarray]:
"""generate a mask of the given size.
diff --git a/pyproject.toml b/pyproject.toml
index 780d2c6b5a..4c589f6808 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,11 +39,14 @@ dependencies = [
"protobuf<=3.20.3,!=3.20.0",
# TODO(1480) enable when pycolmap windows wheels are available
# "pycolmap==0.3.0",
- "pymeshlab>=2022.2.post2",
+ "pymeshlab>=2022.2.post2; platform_machine != 'arm64'",
"pyngrok>=5.1.0",
"python-socketio>=5.7.1",
"pyquaternion>=0.9.9",
- "rawpy>=0.18.1",
+ # TODO we can switch back to (non-new) rawpy if they start releasing arm64
+ # wheels. https://github.com/letmaik/rawpy/issues/171#issuecomment-1572627747
+ "rawpy>=0.18.1; platform_machine != 'arm64'",
+ "newrawpy>=0.18.1; platform_machine == 'arm64'",
"requests",
"rich>=12.5.1",
"scikit-image>=0.19.3",
From 281f81f1ec75c2be6d5793d617896b9b1ca8114d Mon Sep 17 00:00:00 2001
From: Mohit Motwani
Date: Tue, 12 Dec 2023 16:43:50 +0530
Subject: [PATCH 087/101] Update cameras.py - removed duplicate lines (#2661)
The self.times was being computed twice in cameras.py in the same function which was unnecessary
---
nerfstudio/cameras/cameras.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/nerfstudio/cameras/cameras.py b/nerfstudio/cameras/cameras.py
index c1b988dbbd..467c8fcc01 100644
--- a/nerfstudio/cameras/cameras.py
+++ b/nerfstudio/cameras/cameras.py
@@ -872,8 +872,6 @@ def _compute_rays_for_vr180(
else:
metadata = {"directions_norm": directions_norm[0].detach()}
- times = self.times[camera_indices, 0] if self.times is not None else None
-
return RayBundle(
origins=origins,
directions=directions,
From 01d57b90dc37566f1cf10754cd745226d700af95 Mon Sep 17 00:00:00 2001
From: Brent Yi
Date: Tue, 12 Dec 2023 11:44:26 +0000
Subject: [PATCH 088/101] Fix large dataset (> 512 images) training in
viewer_beta (#2669)
* Fix large dataset (> 512 images) training in viewer_beta (#2586)
* fix index bug in update_camera_poses of viewer
* Set Ruff line lenth
---------
Co-authored-by: Gina Wu <42229107+ginazhouhuiwu@users.noreply.github.com>
---
nerfstudio/viewer_beta/viewer.py | 10 +++++-----
pyproject.toml | 1 +
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py
index e577ca5c37..b01c35f4b5 100644
--- a/nerfstudio/viewer_beta/viewer.py
+++ b/nerfstudio/viewer_beta/viewer.py
@@ -259,15 +259,15 @@ def update_camera_poses(self):
with torch.no_grad():
assert isinstance(camera_optimizer, CameraOptimizer)
c2ws_delta = camera_optimizer(torch.tensor(idxs, device=camera_optimizer.device)).cpu().numpy()
- for idx in idxs:
+ for i, key in enumerate(idxs):
# both are numpy arrays
- c2w_orig = self.original_c2w[idx]
- c2w_delta = c2ws_delta[idx, ...]
+ c2w_orig = self.original_c2w[key]
+ c2w_delta = c2ws_delta[i, ...]
c2w = c2w_orig @ np.concatenate((c2w_delta, np.array([[0, 0, 0, 1]])), axis=0)
R = vtf.SO3.from_matrix(c2w[:3, :3]) # type: ignore
R = R @ vtf.SO3.from_x_radians(np.pi)
- self.camera_handles[idx].position = c2w[:3, 3] * VISER_NERFSTUDIO_SCALE_RATIO
- self.camera_handles[idx].wxyz = R.wxyz
+ self.camera_handles[key].position = c2w[:3, 3] * VISER_NERFSTUDIO_SCALE_RATIO
+ self.camera_handles[key].wxyz = R.wxyz
def _interrupt_render(self, _) -> None:
"""Interrupt current render."""
diff --git a/pyproject.toml b/pyproject.toml
index 4c589f6808..b6d11c60c3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -162,6 +162,7 @@ pythonVersion = "3.8"
pythonPlatform = "Linux"
[tool.ruff]
+line-length = 120
select = [
"E", # pycodestyle errors.
"F", # Pyflakes rules.
From 6ccbc2de45d31befed6f96d007a8e20f942d5e60 Mon Sep 17 00:00:00 2001
From: Chris Sweeney
Date: Tue, 12 Dec 2023 14:18:13 -0800
Subject: [PATCH 089/101] Add support for Project Aria datasets. (#2617)
* Add support for Project Aria datasets.
1) An export script for processing Project Aria datasets
2) Add support for Fisheye624 cameras
3) Add support for center-crop masking and sampling
Example commands for running nerfstudio on Project Aria data:
python scripts/datasets/process_project_aria.py --vrs_file= --mps_data_dir= --output_dir=
ns-train nerfacto --data nerfstudio-data --orientation-method none
* Clean up type errors
* Fix formatting
* Fix linter errors, import, and use absolute file paths
* Add aria subcommand to ns-process-data
* Appease ruff
* Add Aria to docs
* Fix docs, add back import try/except
* nit: path.as_posix() -> str(path)
---------
Co-authored-by: Brent Yi
---
README.md | 49 ++--
docs/quickstart/custom_dataset.md | 30 ++-
nerfstudio/cameras/camera_utils.py | 222 ++++++++++++++++-
nerfstudio/cameras/cameras.py | 36 ++-
.../data/datamanagers/base_datamanager.py | 8 +-
.../data/dataparsers/nerfstudio_dataparser.py | 27 +-
nerfstudio/data/pixel_samplers.py | 41 ++++
.../scripts/datasets/process_project_aria.py | 231 ++++++++++++++++++
nerfstudio/scripts/process_data.py | 29 +++
pyproject.toml | 1 +
10 files changed, 634 insertions(+), 40 deletions(-)
create mode 100644 nerfstudio/scripts/datasets/process_project_aria.py
diff --git a/README.md b/README.md
index ab18747680..1ea4ca8fad 100644
--- a/README.md
+++ b/README.md
@@ -70,6 +70,7 @@ Have feedback? We'd love for you to fill out our [Nerfstudio Feedback Form](http
We hope nerfstudio enables you to build faster :hammer: learn together :books: and contribute to our NeRF community :sparkling_heart:.
## Sponsors
+
Sponsors of this work includes [Luma AI](https://lumalabs.ai/) and the [BAIR commons](https://bcommons.berkeley.edu/home).
@@ -97,7 +98,6 @@ Sponsors of this work includes [Luma AI](https://lumalabs.ai/) and the [BAIR com
-
# Quickstart
The quickstart will help you get started with the default vanilla NeRF trained on the classic Blender Lego scene.
@@ -233,18 +233,19 @@ ns-export pointcloud --help
Using an existing dataset is great, but likely you want to use your own data! We support various methods for using your own data. Before it can be used in nerfstudio, the camera location and orientations must be determined and then converted into our format using `ns-process-data`. We rely on external tools for this, instructions and information can be found in the documentation.
-| Data | Capture Device | Requirements | `ns-process-data` Speed |
-| ---------------------------------------------------------------------------------------------------- | -------------- | ----------------------------------------------------------------- | ----------------------- |
-| π· [Images](https://docs.nerf.studio/quickstart/custom_dataset.html#images-or-video) | Any | [COLMAP](https://colmap.github.io/install.html) | π’ |
-| πΉ [Video](https://docs.nerf.studio/quickstart/custom_dataset.html#images-or-video) | Any | [COLMAP](https://colmap.github.io/install.html) | π’ |
-| π [360 Data](https://docs.nerf.studio/quickstart/custom_dataset.html#data-equirectangular) | Any | [COLMAP](https://colmap.github.io/install.html) | π’ |
-| π± [Polycam](https://docs.nerf.studio/quickstart/custom_dataset.html#polycam-capture) | IOS with LiDAR | [Polycam App](https://poly.cam/) | π |
-| π± [KIRI Engine](https://docs.nerf.studio/quickstart/custom_dataset.html#kiri-engine-capture) | IOS or Android | [KIRI Engine App](https://www.kiriengine.com/) | π |
-| π± [Record3D](https://docs.nerf.studio/quickstart/custom_dataset.html#record3d-capture) | IOS with LiDAR | [Record3D app](https://record3d.app/) | π |
-| π₯ [Metashape](https://docs.nerf.studio/quickstart/custom_dataset.html#metashape) | Any | [Metashape](https://www.agisoft.com/) | π |
-| π₯ [RealityCapture](https://docs.nerf.studio/quickstart/custom_dataset.html#realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | π |
-| π₯ [ODM](https://docs.nerf.studio/quickstart/custom_dataset.html#ODM) | Any | [ODM](https://github.com/OpenDroneMap/ODM) | π |
-| π [Custom](https://docs.nerf.studio/quickstart/data_conventions.html) | Any | Camera Poses | π |
+| Data | Capture Device | Requirements | `ns-process-data` Speed |
+| --------------------------------------------------------------------------------------------- | -------------- | ----------------------------------------------------------------- | ----------------------- |
+| π· [Images](https://docs.nerf.studio/quickstart/custom_dataset.html#images-or-video) | Any | [COLMAP](https://colmap.github.io/install.html) | π’ |
+| πΉ [Video](https://docs.nerf.studio/quickstart/custom_dataset.html#images-or-video) | Any | [COLMAP](https://colmap.github.io/install.html) | π’ |
+| π [360 Data](https://docs.nerf.studio/quickstart/custom_dataset.html#data-equirectangular) | Any | [COLMAP](https://colmap.github.io/install.html) | π’ |
+| π± [Polycam](https://docs.nerf.studio/quickstart/custom_dataset.html#polycam-capture) | IOS with LiDAR | [Polycam App](https://poly.cam/) | π |
+| π± [KIRI Engine](https://docs.nerf.studio/quickstart/custom_dataset.html#kiri-engine-capture) | IOS or Android | [KIRI Engine App](https://www.kiriengine.com/) | π |
+| π± [Record3D](https://docs.nerf.studio/quickstart/custom_dataset.html#record3d-capture) | IOS with LiDAR | [Record3D app](https://record3d.app/) | π |
+| π₯ [Metashape](https://docs.nerf.studio/quickstart/custom_dataset.html#metashape) | Any | [Metashape](https://www.agisoft.com/) | π |
+| π₯ [RealityCapture](https://docs.nerf.studio/quickstart/custom_dataset.html#realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | π |
+| π₯ [ODM](https://docs.nerf.studio/quickstart/custom_dataset.html#ODM) | Any | [ODM](https://github.com/OpenDroneMap/ODM) | π |
+| π [Aria](https://docs.nerf.studio/quickstart/custom_dataset.html#Aria) | Aria glasses | [Project Aria](https://projectaria.com/) | π |
+| π [Custom](https://docs.nerf.studio/quickstart/data_conventions.html) | Any | Camera Poses | π |
## 5. Advanced Options
@@ -276,25 +277,25 @@ And that's it for getting started with the basics of nerfstudio.
If you're interested in learning more on how to create your own pipelines, develop with the viewer, run benchmarks, and more, please check out some of the quicklinks below or visit our [documentation](https://docs.nerf.studio/) directly.
-| Section | Description |
-| -------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------- |
+| Section | Description |
+| ---------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------- |
| [Documentation](https://docs.nerf.studio/) | Full API documentation and tutorials |
-| [Viewer](https://viewer.nerf.studio/) | Home page for our web viewer |
-| π **Educational** |
+| [Viewer](https://viewer.nerf.studio/) | Home page for our web viewer |
+| π **Educational** |
| [Model Descriptions](https://docs.nerf.studio/nerfology/methods/index.html) | Description of all the models supported by nerfstudio and explanations of component parts. |
| [Component Descriptions](https://docs.nerf.studio/nerfology/model_components/index.html) | Interactive notebooks that explain notable/commonly used modules in various models. |
-| π **Tutorials** |
+| π **Tutorials** |
| [Getting Started](https://docs.nerf.studio/quickstart/installation.html) | A more in-depth guide on how to get started with nerfstudio from installation to contributing. |
| [Using the Viewer](https://docs.nerf.studio/quickstart/viewer_quickstart.html) | A quick demo video on how to navigate the viewer. |
-| [Using Record3D](https://www.youtube.com/watch?v=XwKq7qDQCQk) | Demo video on how to run nerfstudio without using COLMAP. |
-| π» **For Developers** |
+| [Using Record3D](https://www.youtube.com/watch?v=XwKq7qDQCQk) | Demo video on how to run nerfstudio without using COLMAP. |
+| π» **For Developers** |
| [Creating pipelines](https://docs.nerf.studio/developer_guides/pipelines/index.html) | Learn how to easily build new neural rendering pipelines by using and/or implementing new modules. |
| [Creating datasets](https://docs.nerf.studio/quickstart/custom_dataset.html) | Have a new dataset? Learn how to run it with nerfstudio. |
| [Contributing](https://docs.nerf.studio/reference/contributing.html) | Walk-through for how you can start contributing now. |
-| π **Community** |
-| [Discord](https://discord.gg/uMbNqcraFc) | Join our community to discuss more. We would love to hear from you! |
-| [Twitter](https://twitter.com/nerfstudioteam) | Follow us on Twitter @nerfstudioteam to see cool updates and announcements |
-| [Feedback Form](TODO) | We welcome any feedback! This is our chance to learn what you all are using Nerfstudio for. |
+| π **Community** |
+| [Discord](https://discord.gg/uMbNqcraFc) | Join our community to discuss more. We would love to hear from you! |
+| [Twitter](https://twitter.com/nerfstudioteam) | Follow us on Twitter @nerfstudioteam to see cool updates and announcements |
+| [Feedback Form](TODO) | We welcome any feedback! This is our chance to learn what you all are using Nerfstudio for. |
# Supported Features
diff --git a/docs/quickstart/custom_dataset.md b/docs/quickstart/custom_dataset.md
index 46aaa907d6..fddbe1d56b 100644
--- a/docs/quickstart/custom_dataset.md
+++ b/docs/quickstart/custom_dataset.md
@@ -10,7 +10,7 @@ ns-process-data {video,images,polycam,record3d} --data {DATA_PATH} --output-dir
A full set of arguments can be found {doc}`here`.
-We Currently support the following custom data types:
+We currently support the following custom data types:
| Data | Capture Device | Requirements | `ns-process-data` Speed |
| ----------------------------- | -------------- | ----------------------------------------------- | ----------------------- |
| π· [Images](images_and_video) | Any | [COLMAP](https://colmap.github.io/install.html) | π’ |
@@ -22,6 +22,7 @@ We Currently support the following custom data types:
| π₯ [Metashape](metashape) | Any | [Metashape](https://www.agisoft.com/) | π |
| π₯ [RealityCapture](realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | π |
| π₯ [ODM](odm) | Any | [ODM](https://github.com/OpenDroneMap/ODM) | π |
+| π [Aria](aria) | Aria glasses | [Project Aria](https://projectaria.com/) | π |
(images_and_video)=
@@ -348,6 +349,31 @@ ns-process-data odm --data /path/to/dataset --output-dir {output directory}
ns-train nerfacto --data {output directory}
```
+(aria)=
+
+## Aria
+
+1. Install projectaria_tools:
+
+```bash
+conda activate nerfstudio
+pip install projectaria-tools'[all]'
+```
+
+2. Download a VRS file from Project Aria glasses, and run Machine Perception Services to extract poses.
+
+3. Convert to nerfstudio format.
+
+```bash
+ns-process-data aria --vrs-file /path/to/vrs/file --mps-data-dir /path/to/mps/data --output-dir {output directory}
+```
+
+4. Train!
+
+```bash
+ns-train nerfacto --data {output directory}
+```
+
(360_data)=
## 360 Data (Equirectangular)
@@ -459,4 +485,4 @@ If the depth of the scene is unviewable and looks too close or expanded when vie
- The IPD can be modified in the `cameras.py` script as the variable `vr_ipd` (default is 64 mm).
- Compositing with Blender Objects and VR180 or ODS Renders
- Configure the Blender camera as panoramic and equirectangular. For the VR180 Blender camera, set the panoramic longitude min and max to -90 and 90.
- - Change the Stereoscopy mode to "Parallel" set the Interocular Distance to 0.064 m.
\ No newline at end of file
+ - Change the Stereoscopy mode to "Parallel" set the Interocular Distance to 0.064 m.
diff --git a/nerfstudio/cameras/camera_utils.py b/nerfstudio/cameras/camera_utils.py
index ffb557589b..4456566237 100644
--- a/nerfstudio/cameras/camera_utils.py
+++ b/nerfstudio/cameras/camera_utils.py
@@ -25,7 +25,6 @@
from numpy.typing import NDArray
from torch import Tensor
-
_EPS = np.finfo(float).eps * 4.0
@@ -622,3 +621,224 @@ def auto_orient_and_center_poses(
raise ValueError(f"Unknown value for method: {method}")
return oriented_poses, transform
+
+
+@torch.jit.script
+def fisheye624_project(xyz, params):
+ """
+ Batched implementation of the FisheyeRadTanThinPrism (aka Fisheye624) camera
+ model project() function.
+ Inputs:
+ xyz: BxNx3 tensor of 3D points to be projected
+ params: Bx16 tensor of Fisheye624 parameters formatted like this:
+ [f_u f_v c_u c_v {k_0 ... k_5} {p_0 p_1} {s_0 s_1 s_2 s_3}]
+ or Bx15 tensor of Fisheye624 parameters formatted like this:
+ [f c_u c_v {k_0 ... k_5} {p_0 p_1} {s_0 s_1 s_2 s_3}]
+ Outputs:
+ uv: BxNx2 tensor of 2D projections of xyz in image plane
+ Model for fisheye cameras with radial, tangential, and thin-prism distortion.
+ This model allows fu != fv.
+ Specifically, the model is:
+ uvDistorted = [x_r] + tangentialDistortion + thinPrismDistortion
+ [y_r]
+ proj = diag(fu,fv) * uvDistorted + [cu;cv];
+ where:
+ a = x/z, b = y/z, r = (a^2+b^2)^(1/2)
+ th = atan(r)
+ cosPhi = a/r, sinPhi = b/r
+ [x_r] = (th+ k0 * th^3 + k1* th^5 + ...) [cosPhi]
+ [y_r] [sinPhi]
+ the number of terms in the series is determined by the template parameter numK.
+ tangentialDistortion = [(2 x_r^2 + rd^2)*p_0 + 2*x_r*y_r*p_1]
+ [(2 y_r^2 + rd^2)*p_1 + 2*x_r*y_r*p_0]
+ where rd^2 = x_r^2 + y_r^2
+ thinPrismDistortion = [s0 * rd^2 + s1 rd^4]
+ [s2 * rd^2 + s3 rd^4]
+ Author: Daniel DeTone (ddetone@meta.com)
+ """
+
+ assert xyz.ndim == 3
+ assert params.ndim == 2
+ assert params.shape[-1] == 16 or params.shape[-1] == 15, "This model allows fx != fy"
+ eps = 1e-9
+ B, N = xyz.shape[0], xyz.shape[1]
+
+ # Radial correction.
+ z = xyz[:, :, 2].reshape(B, N, 1)
+ z = torch.where(torch.abs(z) < eps, eps * torch.sign(z), z)
+ ab = xyz[:, :, :2] / z
+ r = torch.norm(ab, dim=-1, p=2, keepdim=True)
+ th = torch.atan(r)
+ th_divr = torch.where(r < eps, torch.ones_like(ab), ab / r)
+ th_k = th.reshape(B, N, 1).clone()
+ for i in range(6):
+ th_k = th_k + params[:, -12 + i].reshape(B, 1, 1) * torch.pow(th, 3 + i * 2)
+ xr_yr = th_k * th_divr
+ uv_dist = xr_yr
+
+ # Tangential correction.
+ p0 = params[:, -6].reshape(B, 1)
+ p1 = params[:, -5].reshape(B, 1)
+ xr = xr_yr[:, :, 0].reshape(B, N)
+ yr = xr_yr[:, :, 1].reshape(B, N)
+ xr_yr_sq = torch.square(xr_yr)
+ xr_sq = xr_yr_sq[:, :, 0].reshape(B, N)
+ yr_sq = xr_yr_sq[:, :, 1].reshape(B, N)
+ rd_sq = xr_sq + yr_sq
+ uv_dist_tu = uv_dist[:, :, 0] + ((2.0 * xr_sq + rd_sq) * p0 + 2.0 * xr * yr * p1)
+ uv_dist_tv = uv_dist[:, :, 1] + ((2.0 * yr_sq + rd_sq) * p1 + 2.0 * xr * yr * p0)
+ uv_dist = torch.stack([uv_dist_tu, uv_dist_tv], dim=-1) # Avoids in-place complaint.
+
+ # Thin Prism correction.
+ s0 = params[:, -4].reshape(B, 1)
+ s1 = params[:, -3].reshape(B, 1)
+ s2 = params[:, -2].reshape(B, 1)
+ s3 = params[:, -1].reshape(B, 1)
+ rd_4 = torch.square(rd_sq)
+ uv_dist[:, :, 0] = uv_dist[:, :, 0] + (s0 * rd_sq + s1 * rd_4)
+ uv_dist[:, :, 1] = uv_dist[:, :, 1] + (s2 * rd_sq + s3 * rd_4)
+
+ # Finally, apply standard terms: focal length and camera centers.
+ if params.shape[-1] == 15:
+ fx_fy = params[:, 0].reshape(B, 1, 1)
+ cx_cy = params[:, 1:3].reshape(B, 1, 2)
+ else:
+ fx_fy = params[:, 0:2].reshape(B, 1, 2)
+ cx_cy = params[:, 2:4].reshape(B, 1, 2)
+ result = uv_dist * fx_fy + cx_cy
+
+ return result
+
+
+# Core implementation of fisheye 624 unprojection. More details are documented here:
+# https://facebookresearch.github.io/projectaria_tools/docs/tech_insights/camera_intrinsic_models#the-fisheye62-model
+@torch.jit.script
+def fisheye624_unproject_helper(uv, params, max_iters: int = 5):
+ """
+ Batched implementation of the FisheyeRadTanThinPrism (aka Fisheye624) camera
+ model. There is no analytical solution for the inverse of the project()
+ function so this solves an optimization problem using Newton's method to get
+ the inverse.
+ Inputs:
+ uv: BxNx3 tensor of 2D pixels to be projected
+ params: Bx16 tensor of Fisheye624 parameters formatted like this:
+ [f_u f_v c_u c_v {k_0 ... k_5} {p_0 p_1} {s_0 s_1 s_2 s_3}]
+ or Bx15 tensor of Fisheye624 parameters formatted like this:
+ [f c_u c_v {k_0 ... k_5} {p_0 p_1} {s_0 s_1 s_2 s_3}]
+ Outputs:
+ xyz: BxNx3 tensor of 3D rays of uv points with z = 1.
+ Model for fisheye cameras with radial, tangential, and thin-prism distortion.
+ This model assumes fu=fv. This unproject function holds that:
+ X = unproject(project(X)) [for X=(x,y,z) in R^3, z>0]
+ and
+ x = project(unproject(s*x)) [for s!=0 and x=(u,v) in R^2]
+ Author: Daniel DeTone (ddetone@meta.com)
+ """
+
+ assert uv.ndim == 3, "Expected batched input shaped BxNx3"
+ assert params.ndim == 2
+ assert params.shape[-1] == 16 or params.shape[-1] == 15, "This model allows fx != fy"
+ eps = 1e-6
+ B, N = uv.shape[0], uv.shape[1]
+
+ if params.shape[-1] == 15:
+ fx_fy = params[:, 0].reshape(B, 1, 1)
+ cx_cy = params[:, 1:3].reshape(B, 1, 2)
+ else:
+ fx_fy = params[:, 0:2].reshape(B, 1, 2)
+ cx_cy = params[:, 2:4].reshape(B, 1, 2)
+
+ uv_dist = (uv - cx_cy) / fx_fy
+
+ # Compute xr_yr using Newton's method.
+ xr_yr = uv_dist.clone() # Initial guess.
+ for _ in range(max_iters):
+ uv_dist_est = xr_yr.clone()
+ # Tangential terms.
+ p0 = params[:, -6].reshape(B, 1)
+ p1 = params[:, -5].reshape(B, 1)
+ xr = xr_yr[:, :, 0].reshape(B, N)
+ yr = xr_yr[:, :, 1].reshape(B, N)
+ xr_yr_sq = torch.square(xr_yr)
+ xr_sq = xr_yr_sq[:, :, 0].reshape(B, N)
+ yr_sq = xr_yr_sq[:, :, 1].reshape(B, N)
+ rd_sq = xr_sq + yr_sq
+ uv_dist_est[:, :, 0] = uv_dist_est[:, :, 0] + ((2.0 * xr_sq + rd_sq) * p0 + 2.0 * xr * yr * p1)
+ uv_dist_est[:, :, 1] = uv_dist_est[:, :, 1] + ((2.0 * yr_sq + rd_sq) * p1 + 2.0 * xr * yr * p0)
+ # Thin Prism terms.
+ s0 = params[:, -4].reshape(B, 1)
+ s1 = params[:, -3].reshape(B, 1)
+ s2 = params[:, -2].reshape(B, 1)
+ s3 = params[:, -1].reshape(B, 1)
+ rd_4 = torch.square(rd_sq)
+ uv_dist_est[:, :, 0] = uv_dist_est[:, :, 0] + (s0 * rd_sq + s1 * rd_4)
+ uv_dist_est[:, :, 1] = uv_dist_est[:, :, 1] + (s2 * rd_sq + s3 * rd_4)
+ # Compute the derivative of uv_dist w.r.t. xr_yr.
+ duv_dist_dxr_yr = uv.new_ones(B, N, 2, 2)
+ duv_dist_dxr_yr[:, :, 0, 0] = 1.0 + 6.0 * xr_yr[:, :, 0] * p0 + 2.0 * xr_yr[:, :, 1] * p1
+ offdiag = 2.0 * (xr_yr[:, :, 0] * p1 + xr_yr[:, :, 1] * p0)
+ duv_dist_dxr_yr[:, :, 0, 1] = offdiag
+ duv_dist_dxr_yr[:, :, 1, 0] = offdiag
+ duv_dist_dxr_yr[:, :, 1, 1] = 1.0 + 6.0 * xr_yr[:, :, 1] * p1 + 2.0 * xr_yr[:, :, 0] * p0
+ xr_yr_sq_norm = xr_yr_sq[:, :, 0] + xr_yr_sq[:, :, 1]
+ temp1 = 2.0 * (s0 + 2.0 * s1 * xr_yr_sq_norm)
+ duv_dist_dxr_yr[:, :, 0, 0] = duv_dist_dxr_yr[:, :, 0, 0] + (xr_yr[:, :, 0] * temp1)
+ duv_dist_dxr_yr[:, :, 0, 1] = duv_dist_dxr_yr[:, :, 0, 1] + (xr_yr[:, :, 1] * temp1)
+ temp2 = 2.0 * (s2 + 2.0 * s3 * xr_yr_sq_norm)
+ duv_dist_dxr_yr[:, :, 1, 0] = duv_dist_dxr_yr[:, :, 1, 0] + (xr_yr[:, :, 0] * temp2)
+ duv_dist_dxr_yr[:, :, 1, 1] = duv_dist_dxr_yr[:, :, 1, 1] + (xr_yr[:, :, 1] * temp2)
+ # Compute 2x2 inverse manually here since torch.inverse() is very slow.
+ # Because this is slow: inv = duv_dist_dxr_yr.inverse()
+ # About a 10x reduction in speed with above line.
+ mat = duv_dist_dxr_yr.reshape(-1, 2, 2)
+ a = mat[:, 0, 0].reshape(-1, 1, 1)
+ b = mat[:, 0, 1].reshape(-1, 1, 1)
+ c = mat[:, 1, 0].reshape(-1, 1, 1)
+ d = mat[:, 1, 1].reshape(-1, 1, 1)
+ det = 1.0 / ((a * d) - (b * c))
+ top = torch.cat([d, -b], dim=2)
+ bot = torch.cat([-c, a], dim=2)
+ inv = det * torch.cat([top, bot], dim=1)
+ inv = inv.reshape(B, N, 2, 2)
+ # Manually compute 2x2 @ 2x1 matrix multiply.
+ # Because this is slow: step = (inv @ (uv_dist - uv_dist_est)[..., None])[..., 0]
+ diff = uv_dist - uv_dist_est
+ a = inv[:, :, 0, 0]
+ b = inv[:, :, 0, 1]
+ c = inv[:, :, 1, 0]
+ d = inv[:, :, 1, 1]
+ e = diff[:, :, 0]
+ f = diff[:, :, 1]
+ step = torch.stack([a * e + b * f, c * e + d * f], dim=-1)
+ # Newton step.
+ xr_yr = xr_yr + step
+
+ # Compute theta using Newton's method.
+ xr_yr_norm = xr_yr.norm(p=2, dim=2).reshape(B, N, 1)
+ th = xr_yr_norm.clone()
+ for _ in range(max_iters):
+ th_radial = uv.new_ones(B, N, 1)
+ dthd_th = uv.new_ones(B, N, 1)
+ for k in range(6):
+ r_k = params[:, -12 + k].reshape(B, 1, 1)
+ th_radial = th_radial + (r_k * torch.pow(th, 2 + k * 2))
+ dthd_th = dthd_th + ((3.0 + 2.0 * k) * r_k * torch.pow(th, 2 + k * 2))
+ th_radial = th_radial * th
+ step = (xr_yr_norm - th_radial) / dthd_th
+ # handle dthd_th close to 0.
+ step = torch.where(dthd_th.abs() > eps, step, torch.sign(step) * eps * 10.0)
+ th = th + step
+ # Compute the ray direction using theta and xr_yr.
+ close_to_zero = torch.logical_and(th.abs() < eps, xr_yr_norm.abs() < eps)
+ ray_dir = torch.where(close_to_zero, xr_yr, torch.tan(th) / xr_yr_norm * xr_yr)
+ ray = torch.cat([ray_dir, uv.new_ones(B, N, 1)], dim=2)
+ return ray
+
+
+# unproject 2D point to 3D with fisheye624 model
+def fisheye624_unproject(coords: torch.Tensor, distortion_params: torch.Tensor) -> torch.Tensor:
+ dirs = fisheye624_unproject_helper(coords.unsqueeze(0), distortion_params[0].unsqueeze(0))
+ # correct for camera space differences:
+ dirs[..., 1] = -dirs[..., 1]
+ dirs[..., 2] = -dirs[..., 2]
+ return dirs
diff --git a/nerfstudio/cameras/cameras.py b/nerfstudio/cameras/cameras.py
index 467c8fcc01..46ec42bdc2 100644
--- a/nerfstudio/cameras/cameras.py
+++ b/nerfstudio/cameras/cameras.py
@@ -32,7 +32,7 @@
import nerfstudio.utils.poses as pose_utils
from nerfstudio.cameras import camera_utils
from nerfstudio.cameras.rays import RayBundle
-from nerfstudio.data.scene_box import SceneBox, OrientedBox
+from nerfstudio.data.scene_box import OrientedBox, SceneBox
from nerfstudio.utils.tensor_dataclass import TensorDataclass
TORCH_DEVICE = Union[torch.device, str]
@@ -48,6 +48,7 @@ class CameraType(Enum):
OMNIDIRECTIONALSTEREO_R = auto()
VR180_L = auto()
VR180_R = auto()
+ FISHEYE624 = auto()
CAMERA_MODEL_TO_TYPE = {
@@ -62,6 +63,7 @@ class CameraType(Enum):
"OMNIDIRECTIONALSTEREO_R": CameraType.OMNIDIRECTIONALSTEREO_R,
"VR180_L": CameraType.VR180_L,
"VR180_R": CameraType.VR180_R,
+ "FISHEYE624": CameraType.FISHEYE624,
}
@@ -79,7 +81,7 @@ class Cameras(TensorDataclass):
cy: Principal point y
width: Image width
height: Image height
- distortion_params: OpenCV 6 radial distortion coefficients
+ distortion_params: distortion coefficients (OpenCV 6 radial or 6-2-4 radial, tangential, thin-prism for Fisheye624)
camera_type: Type of camera model. This will be an int corresponding to the CameraType enum.
times: Timestamps for each camera
metadata: Additional metadata or data needed for interpolation, will mimic shape of the cameras
@@ -629,8 +631,8 @@ def _generate_rays_from_coords(
assert coord_stack.shape == (3,) + num_rays_shape + (2,)
# Undistorts our images according to our distortion parameters
+ distortion_params = None
if not disable_distortion:
- distortion_params = None
if self.distortion_params is not None:
distortion_params = self.distortion_params[true_indices]
if distortion_params_delta is not None:
@@ -832,6 +834,34 @@ def _compute_rays_for_vr180(
# assign final camera origins
c2w[..., :3, 3] = vr180_origins
+ elif CameraType.FISHEYE624.value in cam_types:
+ mask = (self.camera_type[true_indices] == CameraType.FISHEYE624.value).squeeze(-1) # (num_rays)
+ coord_mask = torch.stack([mask, mask, mask], dim=0)
+
+ # fisheye624 requires pixel coordinates to unproject, so we need to recomput the offsets in pixel coords.
+ pcoord = torch.stack([x, y], -1) # (num_rays, 2)
+ pcoord_x_offset = torch.stack([x + 1, y], -1) # (num_rays, 2)
+ pcoord_y_offset = torch.stack([x, y + 1], -1) # (num_rays, 2)
+
+ # Stack image coordinates and image coordinates offset by 1, check shapes too
+ pcoord_stack = torch.stack([pcoord, pcoord_x_offset, pcoord_y_offset], dim=0) # (3, num_rays, 2)
+
+ assert distortion_params is not None
+ masked_coords = pcoord_stack[coord_mask, :]
+ # The fisheye unprojection does not rely on planar/pinhold unprojection, thus the method needs
+ # to access the focal length and principle points directly.
+ camera_params = torch.cat(
+ [
+ fx[mask].unsqueeze(1),
+ fy[mask].unsqueeze(1),
+ cx[mask].unsqueeze(1),
+ cy[mask].unsqueeze(1),
+ distortion_params[mask, :],
+ ],
+ dim=1,
+ )
+ directions_stack[coord_mask] = camera_utils.fisheye624_unproject(masked_coords, camera_params)
+
else:
raise ValueError(f"Camera type {cam} not supported.")
diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py
index 7971f46e56..47144c00e2 100644
--- a/nerfstudio/data/datamanagers/base_datamanager.py
+++ b/nerfstudio/data/datamanagers/base_datamanager.py
@@ -475,8 +475,14 @@ def _get_pixel_sampler(self, dataset: TDataset, num_rays_per_batch: int) -> Pixe
is_equirectangular = (dataset.cameras.camera_type == CameraType.EQUIRECTANGULAR.value).all()
if is_equirectangular.any():
CONSOLE.print("[bold yellow]Warning: Some cameras are equirectangular, but using default pixel sampler.")
+
+ fisheye_crop_radius = (
+ None if dataset.cameras.metadata is None else dataset.cameras.metadata["fisheye_crop_radius"]
+ )
return self.config.pixel_sampler.setup(
- is_equirectangular=is_equirectangular, num_rays_per_batch=num_rays_per_batch
+ is_equirectangular=is_equirectangular,
+ num_rays_per_batch=num_rays_per_batch,
+ fisheye_crop_radius=fisheye_crop_radius,
)
def setup_train(self):
diff --git a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py
index eaeda0bce6..9b4c2951ac 100644
--- a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py
+++ b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py
@@ -104,10 +104,11 @@ def _generate_dataparser_outputs(self, split="train"):
height_fixed = "h" in meta
width_fixed = "w" in meta
distort_fixed = False
- for distort_key in ["k1", "k2", "k3", "p1", "p2"]:
+ for distort_key in ["k1", "k2", "k3", "p1", "p2", "distortion_params"]:
if distort_key in meta:
distort_fixed = True
break
+ fisheye_crop_radius = meta.get("fisheye_crop_radius", None)
fx = []
fy = []
cx = []
@@ -149,7 +150,9 @@ def _generate_dataparser_outputs(self, split="train"):
width.append(int(frame["w"]))
if not distort_fixed:
distort.append(
- camera_utils.get_distortion_params(
+ torch.tensor(frame["distortion_params"], dtype=torch.float32)
+ if "distortion_params" in frame
+ else camera_utils.get_distortion_params(
k1=float(frame["k1"]) if "k1" in frame else 0.0,
k2=float(frame["k2"]) if "k2" in frame else 0.0,
k3=float(frame["k3"]) if "k3" in frame else 0.0,
@@ -274,17 +277,22 @@ def _generate_dataparser_outputs(self, split="train"):
height = int(meta["h"]) if height_fixed else torch.tensor(height, dtype=torch.int32)[idx_tensor]
width = int(meta["w"]) if width_fixed else torch.tensor(width, dtype=torch.int32)[idx_tensor]
if distort_fixed:
- distortion_params = camera_utils.get_distortion_params(
- k1=float(meta["k1"]) if "k1" in meta else 0.0,
- k2=float(meta["k2"]) if "k2" in meta else 0.0,
- k3=float(meta["k3"]) if "k3" in meta else 0.0,
- k4=float(meta["k4"]) if "k4" in meta else 0.0,
- p1=float(meta["p1"]) if "p1" in meta else 0.0,
- p2=float(meta["p2"]) if "p2" in meta else 0.0,
+ distortion_params = (
+ torch.tensor(meta["distortion_params"], dtype=torch.float32)
+ if "distortion_params" in meta
+ else camera_utils.get_distortion_params(
+ k1=float(meta["k1"]) if "k1" in meta else 0.0,
+ k2=float(meta["k2"]) if "k2" in meta else 0.0,
+ k3=float(meta["k3"]) if "k3" in meta else 0.0,
+ k4=float(meta["k4"]) if "k4" in meta else 0.0,
+ p1=float(meta["p1"]) if "p1" in meta else 0.0,
+ p2=float(meta["p2"]) if "p2" in meta else 0.0,
+ )
)
else:
distortion_params = torch.stack(distort, dim=0)[idx_tensor]
+ metadata = {"fisheye_crop_radius": fisheye_crop_radius} if fisheye_crop_radius is not None else None
cameras = Cameras(
fx=fx,
fy=fy,
@@ -295,6 +303,7 @@ def _generate_dataparser_outputs(self, split="train"):
width=width,
camera_to_worlds=poses[:, :3, :4],
camera_type=camera_type,
+ metadata=metadata,
)
assert self.downscale_factor is not None
diff --git a/nerfstudio/data/pixel_samplers.py b/nerfstudio/data/pixel_samplers.py
index 07bab5d826..c7ce9e073e 100644
--- a/nerfstudio/data/pixel_samplers.py
+++ b/nerfstudio/data/pixel_samplers.py
@@ -47,6 +47,8 @@ class PixelSamplerConfig(InstantiateConfig):
"""Whether or not to include a reference to the full image in returned batch."""
is_equirectangular: bool = False
"""List of whether or not camera i is equirectangular."""
+ fisheye_crop_radius: Optional[float] = None
+ """Set to the radius (in pixels) for fisheye cameras."""
class PixelSampler:
@@ -65,6 +67,7 @@ def __init__(self, config: PixelSamplerConfig, **kwargs) -> None:
self.config.num_rays_per_batch = self.kwargs.get("num_rays_per_batch", self.config.num_rays_per_batch)
self.config.keep_full_image = self.kwargs.get("keep_full_image", self.config.keep_full_image)
self.config.is_equirectangular = self.kwargs.get("is_equirectangular", self.config.is_equirectangular)
+ self.config.fisheye_crop_radius = self.kwargs.get("fisheye_crop_radius", self.config.fisheye_crop_radius)
self.set_num_rays_per_batch(self.config.num_rays_per_batch)
def set_num_rays_per_batch(self, num_rays_per_batch: int):
@@ -134,6 +137,36 @@ def sample_method_equirectangular(
return indices
+ def sample_method_fisheye(
+ self,
+ batch_size: int,
+ num_images: int,
+ image_height: int,
+ image_width: int,
+ mask: Optional[Tensor] = None,
+ device: Union[torch.device, str] = "cpu",
+ ) -> Int[Tensor, "batch_size 3"]:
+ if isinstance(mask, torch.Tensor):
+ indices = self.sample_method(batch_size, num_images, image_height, image_width, mask=mask, device=device)
+ else:
+ rand_samples = torch.rand((batch_size, 3), device=device)
+ # convert random samples tto radius and theta
+ radii = self.config.fisheye_crop_radius * torch.sqrt(rand_samples[:, 1])
+ theta = 2.0 * torch.pi * rand_samples[:, 2]
+
+ # convert radius and theta to x and y between -radii and radii
+ x = radii * torch.cos(theta)
+ y = radii * torch.sin(theta)
+
+ # Multiply by the batch size and height/width to get pixel indices.
+ indices = torch.floor(
+ torch.stack([rand_samples[:, 0], y, x], dim=1)
+ * torch.tensor([num_images, image_height // 2, image_width // 2], device=device)
+ + torch.tensor([0, image_height // 2, image_width // 2], device=device)
+ ).long()
+
+ return indices
+
def collate_image_dataset_batch(self, batch: Dict, num_rays_per_batch: int, keep_full_image: bool = False):
"""
Operates on a batch of images and samples pixels to use for generating rays.
@@ -154,6 +187,10 @@ def collate_image_dataset_batch(self, batch: Dict, num_rays_per_batch: int, keep
indices = self.sample_method_equirectangular(
num_rays_per_batch, num_images, image_height, image_width, mask=batch["mask"], device=device
)
+ elif self.config.fisheye_crop_radius is not None:
+ indices = self.sample_method_fisheye(
+ num_rays_per_batch, num_images, image_height, image_width, mask=batch["mask"], device=device
+ )
else:
indices = self.sample_method(
num_rays_per_batch, num_images, image_height, image_width, mask=batch["mask"], device=device
@@ -163,6 +200,10 @@ def collate_image_dataset_batch(self, batch: Dict, num_rays_per_batch: int, keep
indices = self.sample_method_equirectangular(
num_rays_per_batch, num_images, image_height, image_width, device=device
)
+ elif self.config.fisheye_crop_radius is not None:
+ indices = self.sample_method_fisheye(
+ num_rays_per_batch, num_images, image_height, image_width, device=device
+ )
else:
indices = self.sample_method(num_rays_per_batch, num_images, image_height, image_width, device=device)
diff --git a/nerfstudio/scripts/datasets/process_project_aria.py b/nerfstudio/scripts/datasets/process_project_aria.py
new file mode 100644
index 0000000000..f2fdcc94c6
--- /dev/null
+++ b/nerfstudio/scripts/datasets/process_project_aria.py
@@ -0,0 +1,231 @@
+# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import sys
+import threading
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List
+
+import numpy as np
+import tyro
+from PIL import Image
+
+try:
+ from projectaria_tools.core import mps
+ from projectaria_tools.core.data_provider import VrsDataProvider, create_vrs_data_provider
+ from projectaria_tools.core.sophus import SE3
+except ImportError:
+ print("projectaria_tools import failed, please install with pip3 install projectaria-tools'[all]'")
+ sys.exit(1)
+
+ARIA_CAMERA_MODEL = "FISHEYE624"
+
+# The Aria coordinate system is different than the Blender/NerfStudio coordinate system.
+# Blender / Nerfstudio: +Z = back, +Y = up, +X = right
+# Surreal: +Z = forward, +Y = down, +X = right
+T_ARIA_NERFSTUDIO = SE3.from_matrix(
+ np.array(
+ [
+ [1.0, 0.0, 0.0, 0.0],
+ [0.0, -1.0, 0.0, 0.0],
+ [0.0, 0.0, -1.0, 0.0],
+ [0.0, 0.0, 0.0, 1.0],
+ ]
+ )
+)
+
+
+@dataclass
+class AriaCameraCalibration:
+ fx: float
+ fy: float
+ cx: float
+ cy: float
+ distortion_params: np.ndarray
+ width: int
+ height: int
+ t_device_camera: SE3
+
+
+@dataclass
+class AriaImageFrame:
+ camera: AriaCameraCalibration
+ file_path: str
+ t_world_camera: SE3
+ timestamp_ns: float
+
+
+@dataclass
+class TimedPoses:
+ timestamps_ns: np.ndarray
+ t_world_devices: List[SE3]
+
+
+def get_camera_calibs(provider: VrsDataProvider) -> Dict[str, AriaCameraCalibration]:
+ """Retrieve the per-camera factory calibration from within the VRS."""
+
+ factory_calib = {}
+ name = "camera-rgb"
+ device_calib = provider.get_device_calibration()
+ assert device_calib is not None, "Could not find device calibration"
+ sensor_calib = device_calib.get_camera_calib(name)
+ assert sensor_calib is not None, f"Could not find sensor calibration for {name}"
+
+ width = sensor_calib.get_image_size()[0].item()
+ height = sensor_calib.get_image_size()[1].item()
+ intrinsics = sensor_calib.projection_params()
+
+ factory_calib[name] = AriaCameraCalibration(
+ fx=intrinsics[0],
+ fy=intrinsics[0],
+ cx=intrinsics[1],
+ cy=intrinsics[2],
+ distortion_params=intrinsics[3:15],
+ width=width,
+ height=height,
+ t_device_camera=sensor_calib.get_transform_device_camera(),
+ )
+
+ return factory_calib
+
+
+def read_trajectory_csv_to_dict(file_iterable_csv: str) -> TimedPoses:
+ closed_loop_traj = mps.read_closed_loop_trajectory(file_iterable_csv)
+
+ timestamps_secs, poses = zip(
+ *[(it.tracking_timestamp.total_seconds(), it.transform_world_device) for it in closed_loop_traj]
+ )
+
+ SEC_TO_NANOSEC = 1e9
+ return TimedPoses(
+ timestamps_ns=(np.array(timestamps_secs) * SEC_TO_NANOSEC).astype(int),
+ t_world_devices=poses,
+ )
+
+
+def to_aria_image_frame(
+ provider: VrsDataProvider,
+ index: int,
+ name_to_camera: Dict[str, AriaCameraCalibration],
+ t_world_devices: TimedPoses,
+ output_dir: Path,
+) -> AriaImageFrame:
+ name = "camera-rgb"
+
+ camera_calibration = name_to_camera[name]
+ stream_id = provider.get_stream_id_from_label(name)
+ assert stream_id is not None, f"Could not find stream {name}"
+
+ # Get the image corresponding to this index
+ image_data = provider.get_image_data_by_index(stream_id, index)
+ img = Image.fromarray(image_data[0].to_numpy_array())
+ capture_time_ns = image_data[1].capture_timestamp_ns
+
+ file_path = f"{output_dir}/{name}_{capture_time_ns}.jpg"
+ threading.Thread(target=lambda: img.save(file_path)).start()
+
+ # Find the nearest neighbor pose with the closest timestamp to the capture time.
+ nearest_pose_idx = np.searchsorted(t_world_devices.timestamps_ns, capture_time_ns)
+ nearest_pose_idx = np.minimum(nearest_pose_idx, len(t_world_devices.timestamps_ns) - 1)
+ assert nearest_pose_idx != -1, f"Could not find pose for {capture_time_ns}"
+ t_world_device = t_world_devices.t_world_devices[nearest_pose_idx]
+
+ # Compute the world to camera transform.
+ t_world_camera = t_world_device @ camera_calibration.t_device_camera @ T_ARIA_NERFSTUDIO
+
+ return AriaImageFrame(
+ camera=camera_calibration,
+ file_path=file_path,
+ t_world_camera=t_world_camera,
+ timestamp_ns=capture_time_ns,
+ )
+
+
+def to_nerfstudio_frame(frame: AriaImageFrame) -> Dict:
+ return {
+ "fl_x": frame.camera.fx,
+ "fl_y": frame.camera.fy,
+ "cx": frame.camera.cx,
+ "cy": frame.camera.cy,
+ "distortion_params": frame.camera.distortion_params.tolist(),
+ "w": frame.camera.width,
+ "h": frame.camera.height,
+ "file_path": frame.file_path,
+ "transform_matrix": frame.t_world_camera.to_matrix().tolist(),
+ "timestamp": frame.timestamp_ns,
+ }
+
+
+@dataclass
+class ProcessProjectAria:
+ """Processes Project Aria data i.e. a VRS of the raw recording streams and the MPS attachments
+ that provide poses, calibration, and 3d points. More information on MPS data can be found at:
+ https://facebookresearch.github.io/projectaria_tools/docs/ARK/mps.
+ """
+
+ vrs_file: Path
+ """Path to the VRS file."""
+ mps_data_dir: Path
+ """Path to Project Aria Machine Perception Services (MPS) attachments."""
+ output_dir: Path
+ """Path to the output directory."""
+
+ def main(self) -> None:
+ """Generate a nerfstudio dataset from ProjectAria data (VRS) and MPS attachments."""
+ # Create output directory if it doesn't exist.
+ self.output_dir = self.output_dir.absolute()
+ self.output_dir.mkdir(parents=True, exist_ok=True)
+
+ provider = create_vrs_data_provider(str(self.vrs_file.absolute()))
+ assert provider is not None, "Cannot open file"
+
+ name_to_camera = get_camera_calibs(provider)
+
+ print("Getting poses from closed loop trajectory CSV...")
+ trajectory_csv = self.mps_data_dir / "closed_loop_trajectory.csv"
+ t_world_devices = read_trajectory_csv_to_dict(str(trajectory_csv.absolute()))
+
+ name = "camera-rgb"
+ stream_id = provider.get_stream_id_from_label(name)
+
+ # create an AriaImageFrame for each image in the VRS.
+ print("Creating Aria frames...")
+ aria_frames = [
+ to_aria_image_frame(provider, index, name_to_camera, t_world_devices, self.output_dir)
+ for index in range(0, provider.get_num_data(stream_id))
+ ]
+
+ # create the NerfStudio frames from the AriaImageFrames.
+ print("Creating NerfStudio frames...")
+ CANONICAL_RGB_VALID_RADIUS = 707.5
+ CANONICAL_RGB_WIDTH = 1408
+ rgb_valid_radius = CANONICAL_RGB_VALID_RADIUS * (aria_frames[0].camera.width / CANONICAL_RGB_WIDTH)
+ nerfstudio_frames = {
+ "camera_model": ARIA_CAMERA_MODEL,
+ "frames": [to_nerfstudio_frame(frame) for frame in aria_frames],
+ "fisheye_crop_radius": rgb_valid_radius,
+ }
+
+ # write the json out to disk as transforms.json
+ print("Writing transforms.json")
+ transform_file = self.output_dir / "transforms.json"
+ with open(transform_file, "w", encoding="UTF-8"):
+ transform_file.write_text(json.dumps(nerfstudio_frames))
+
+
+if __name__ == "__main__":
+ tyro.extras.set_accent_color("bright_yellow")
+ tyro.cli(ProcessProjectAria).main()
diff --git a/nerfstudio/scripts/process_data.py b/nerfstudio/scripts/process_data.py
index 83cdcca473..2c2cd7a381 100644
--- a/nerfstudio/scripts/process_data.py
+++ b/nerfstudio/scripts/process_data.py
@@ -475,6 +475,12 @@ def main(self) -> None:
CONSOLE.rule()
+@dataclass
+class NotInstalled:
+ def main(self) -> None:
+ ...
+
+
Commands = Union[
Annotated[ImagesToNerfstudioDataset, tyro.conf.subcommand(name="images")],
Annotated[VideoToNerfstudioDataset, tyro.conf.subcommand(name="video")],
@@ -485,6 +491,29 @@ def main(self) -> None:
Annotated[ProcessODM, tyro.conf.subcommand(name="odm")],
]
+# Add aria subcommand if projectaria_tools is installed.
+try:
+ import projectaria_tools
+except ImportError:
+ projectaria_tools = None
+
+if projectaria_tools is not None:
+ from nerfstudio.scripts.datasets.process_project_aria import ProcessProjectAria
+
+ # Note that Union[A, Union[B, C]] == Union[A, B, C].
+ Commands = Union[Commands, Annotated[ProcessProjectAria, tyro.conf.subcommand(name="aria")]]
+else:
+ Commands = Union[
+ Commands,
+ Annotated[
+ NotInstalled,
+ tyro.conf.subcommand(
+ name="aria",
+ description="**Not installed.** Processing Project Aria data requires `pip install projectaria_tools'[all]'`.",
+ ),
+ ],
+ ]
+
def entrypoint():
"""Entrypoint for use with pyproject scripts."""
diff --git a/pyproject.toml b/pyproject.toml
index b6d11c60c3..3b6d69a84f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -94,6 +94,7 @@ dev = [
"opencv-stubs==0.0.7",
"transformers==4.29.2",
"pyright==1.1.331",
+ "projectaria_tools[all]>=1.2.0",
]
# Documentation related packages
From ae00061cf5506ee42e6fbc5f34340ffaabf574cd Mon Sep 17 00:00:00 2001
From: Brent Yi
Date: Tue, 12 Dec 2023 22:31:12 +0000
Subject: [PATCH 090/101] Fix custom data links in README (#2671)
---
README.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 1ea4ca8fad..b6c5bb6ff2 100644
--- a/README.md
+++ b/README.md
@@ -243,8 +243,8 @@ Using an existing dataset is great, but likely you want to use your own data! We
| π± [Record3D](https://docs.nerf.studio/quickstart/custom_dataset.html#record3d-capture) | IOS with LiDAR | [Record3D app](https://record3d.app/) | π |
| π₯ [Metashape](https://docs.nerf.studio/quickstart/custom_dataset.html#metashape) | Any | [Metashape](https://www.agisoft.com/) | π |
| π₯ [RealityCapture](https://docs.nerf.studio/quickstart/custom_dataset.html#realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | π |
-| π₯ [ODM](https://docs.nerf.studio/quickstart/custom_dataset.html#ODM) | Any | [ODM](https://github.com/OpenDroneMap/ODM) | π |
-| π [Aria](https://docs.nerf.studio/quickstart/custom_dataset.html#Aria) | Aria glasses | [Project Aria](https://projectaria.com/) | π |
+| π₯ [ODM](https://docs.nerf.studio/quickstart/custom_dataset.html#odm) | Any | [ODM](https://github.com/OpenDroneMap/ODM) | π |
+| π [Aria](https://docs.nerf.studio/quickstart/custom_dataset.html#aria) | Aria glasses | [Project Aria](https://projectaria.com/) | π |
| π [Custom](https://docs.nerf.studio/quickstart/data_conventions.html) | Any | Camera Poses | π |
## 5. Advanced Options
From 17f785758b8bb9e30c5dec0dcb4c085b28c85401 Mon Sep 17 00:00:00 2001
From: Justin Kerr
Date: Wed, 13 Dec 2023 11:17:36 -0800
Subject: [PATCH 091/101] Parity of layout between viewer beta and legacy
viewer (#2639)
* alter layout of beta viewer to match old viewer
---------
Co-authored-by: Brent Yi
---
nerfstudio/viewer_beta/control_panel.py | 51 ++------------
nerfstudio/viewer_beta/render_panel.py | 2 +-
.../viewer_beta/render_state_machine.py | 2 +
nerfstudio/viewer_beta/viewer.py | 68 +++++++++++++++----
pyproject.toml | 2 +-
5 files changed, 66 insertions(+), 59 deletions(-)
diff --git a/nerfstudio/viewer_beta/control_panel.py b/nerfstudio/viewer_beta/control_panel.py
index d118836849..c94b4745f6 100644
--- a/nerfstudio/viewer_beta/control_panel.py
+++ b/nerfstudio/viewer_beta/control_panel.py
@@ -20,11 +20,10 @@
import torch
import viser.transforms as vtf
from viser import ViserServer
-
+import viser
from nerfstudio.data.scene_box import OrientedBox
from nerfstudio.utils.colormaps import ColormapOptions, Colormaps
from nerfstudio.viewer_beta.viewer_elements import ( # ViewerButtonGroup,
- ViewerButton,
ViewerButtonGroup,
ViewerCheckbox,
ViewerDropdown,
@@ -57,8 +56,6 @@ def __init__(
crop_update_cb: Callable,
update_output_cb: Callable,
update_split_output_cb: Callable,
- toggle_training_state_cb: Callable,
- camera_vis: Callable,
default_composite_depth: bool = True,
):
self.viser_scale_ratio = scale_ratio
@@ -73,10 +70,6 @@ def __init__(
options=["Slow", "Mid", "Fast"],
cb_hook=lambda han: self._train_speed_cb(),
)
- self._reset_camera = ViewerButton(
- name="Reset Up Direction",
- cb_hook=lambda han: self._reset_camera_cb(),
- )
self._output_render = ViewerDropdown(
"Output Render",
"not set",
@@ -189,28 +182,9 @@ def _update_crop_handle(han):
self._time = ViewerSlider("Time", 0.0, 0.0, 1.0, 0.01, cb_hook=rerender_cb, hint="Time to render")
self._time_enabled = time_enabled
- self.stat_folder = self.viser_server.add_gui_folder("Stats")
- with self.stat_folder:
- self.markdown = self.viser_server.add_gui_markdown("Step: 0")
- self.pause_train = viser_server.add_gui_button(label="Pause Training", disabled=False)
- self.pause_train.on_click(lambda _: self.toggle_pause_button())
- self.pause_train.on_click(lambda han: toggle_training_state_cb(han))
- self.resume_train = viser_server.add_gui_button(label="Resume Training", disabled=False)
- self.resume_train.on_click(lambda _: self.toggle_pause_button())
- self.resume_train.on_click(lambda han: toggle_training_state_cb(han))
- self.resume_train.visible = False
- # Add buttons to toggle training image visibility
- self.hide_images = viser_server.add_gui_button(label="Hide Train Cams", disabled=False)
- self.hide_images.on_click(lambda _: camera_vis(False))
- self.hide_images.on_click(lambda _: self.toggle_cameravis_button())
- self.show_images = viser_server.add_gui_button(label="Show Train Cams", disabled=False)
- self.show_images.on_click(lambda _: camera_vis(True))
- self.show_images.on_click(lambda _: self.toggle_cameravis_button())
- self.show_images.visible = False
-
self.add_element(self._train_speed)
self.add_element(self._train_util)
- self.add_element(self._reset_camera)
+
with self.viser_server.add_gui_folder("Render Options"):
self.add_element(self._max_res)
self.add_element(self._output_render)
@@ -244,6 +218,10 @@ def _update_crop_handle(han):
self.add_element(self._crop_rot, additional_tags=("crop",))
self.add_element(self._time, additional_tags=("time",))
+ self._reset_camera = viser_server.add_gui_button(
+ label="Reset Up Dir", disabled=False, icon=viser.Icon.ARROW_BIG_UP_LINES, color="gray"
+ )
+ self._reset_camera.on_click(self._reset_camera_cb)
def _train_speed_cb(self) -> None:
pass
@@ -259,25 +237,10 @@ def _train_speed_cb(self) -> None:
self._train_util.value = 0.5
self._max_res.value = 1024
- def _reset_camera_cb(self) -> None:
+ def _reset_camera_cb(self, _) -> None:
for client in self.viser_server.get_clients().values():
client.camera.up_direction = vtf.SO3(client.camera.wxyz) @ np.array([0.0, -1.0, 0.0])
- def toggle_pause_button(self) -> None:
- self.pause_train.visible = not self.pause_train.visible
- self.resume_train.visible = not self.resume_train.visible
-
- def toggle_cameravis_button(self) -> None:
- self.hide_images.visible = not self.hide_images.visible
- self.show_images.visible = not self.show_images.visible
-
- def update_step(self, step):
- """
- Args:
- step: the train step to set the model to
- """
- self.markdown.content = f"Step: {step}"
-
def update_output_options(self, new_options: List[str]):
"""
Args:
diff --git a/nerfstudio/viewer_beta/render_panel.py b/nerfstudio/viewer_beta/render_panel.py
index b5f4e1bfb2..0e666038ef 100644
--- a/nerfstudio/viewer_beta/render_panel.py
+++ b/nerfstudio/viewer_beta/render_panel.py
@@ -485,7 +485,7 @@ def _(event: viser.GuiEvent) -> None:
reset_up_button = server.add_gui_button(
"Reset up direction",
- icon=viser.Icon.ARROW_AUTOFIT_UP,
+ icon=viser.Icon.ARROW_BIG_UP_LINES,
hint="Reset the orbit up direction.",
)
diff --git a/nerfstudio/viewer_beta/render_state_machine.py b/nerfstudio/viewer_beta/render_state_machine.py
index 46ceeec39c..53c73c34e8 100644
--- a/nerfstudio/viewer_beta/render_state_machine.py
+++ b/nerfstudio/viewer_beta/render_state_machine.py
@@ -246,6 +246,8 @@ def _send_output_to_viewer(self, outputs: Dict[str, Any]):
jpeg_quality=self.viewer.config.jpeg_quality,
depth=depth,
)
+ res = f"{selected_output.shape[0]}x{selected_output.shape[1]}px"
+ self.viewer.stats_markdown.content = self.viewer.make_stats_markdown(None, res)
def _calculate_image_res(self, aspect_ratio: float) -> Tuple[int, int]:
"""Calculate the maximum image height that can be rendered in the time budget
diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py
index b01c35f4b5..92e609f076 100644
--- a/nerfstudio/viewer_beta/viewer.py
+++ b/nerfstudio/viewer_beta/viewer.py
@@ -103,18 +103,13 @@ def __init__(
self._prev_train_state: Literal["training", "paused", "completed"] = "training"
self.last_move_time = 0
- self.viser_server = viser.ViserServer(host=config.websocket_host, port=websocket_port, share=share)
+ self.viser_server = viser.ViserServer(host=config.websocket_host, port=websocket_port)
# Set the name of the URL either to the share link if available, or the localhost
- # TODO: we should revisit this once a public API for share URL status is exposed in viser.
- # https://github.com/nerfstudio-project/viser/issues/124
if share:
- assert self.viser_server._share_tunnel is not None
- while self.viser_server._share_tunnel._shared_state["status"] == "connecting":
- # wait for connection before grabbing URL
- time.sleep(0.01)
- url_maybe = self.viser_server._share_tunnel.get_url()
- if url_maybe is not None:
- self.viewer_url = url_maybe
+ url = self.viser_server.request_share_url()
+ if url is not None:
+ print("Couldn't make share URL")
+ self.viewer_url = url
else:
self.viewer_url = f"http://{config.websocket_host}:{websocket_port}"
else:
@@ -154,6 +149,32 @@ def __init__(
self.viser_server.on_client_disconnect(self.handle_disconnect)
self.viser_server.on_client_connect(self.handle_new_client)
+ # Populate the header, which includes the pause button, train cam button, and stats
+ self.pause_train = self.viser_server.add_gui_button(
+ label="Pause Training", disabled=False, icon=viser.Icon.PLAYER_PAUSE_FILLED
+ )
+ self.pause_train.on_click(lambda _: self.toggle_pause_button())
+ self.pause_train.on_click(lambda han: self._toggle_training_state(han))
+ self.resume_train = self.viser_server.add_gui_button(
+ label="Resume Training", disabled=False, icon=viser.Icon.PLAYER_PLAY_FILLED
+ )
+ self.resume_train.on_click(lambda _: self.toggle_pause_button())
+ self.resume_train.on_click(lambda han: self._toggle_training_state(han))
+ self.resume_train.visible = False
+ # Add buttons to toggle training image visibility
+ self.hide_images = self.viser_server.add_gui_button(
+ label="Hide Train Cams", disabled=False, icon=viser.Icon.EYE_OFF, color=None
+ )
+ self.hide_images.on_click(lambda _: self.set_camera_visibility(False))
+ self.hide_images.on_click(lambda _: self.toggle_cameravis_button())
+ self.show_images = self.viser_server.add_gui_button(
+ label="Show Train Cams", disabled=False, icon=viser.Icon.EYE, color=None
+ )
+ self.show_images.on_click(lambda _: self.set_camera_visibility(True))
+ self.show_images.on_click(lambda _: self.toggle_cameravis_button())
+ self.show_images.visible = False
+ mkdown = self.make_stats_markdown(0, "0x0px")
+ self.stats_markdown = self.viser_server.add_gui_markdown(mkdown)
tabs = self.viser_server.add_gui_tab_group()
control_tab = tabs.add_tab("Control", viser.Icon.SETTINGS)
with control_tab:
@@ -165,8 +186,6 @@ def __init__(
self._crop_params_update,
self._output_type_change,
self._output_split_type_change,
- self._toggle_training_state,
- self.set_camera_visibility,
default_composite_depth=self.config.default_composite_depth,
)
config_path = self.log_filename.parents[0] / "config.yml"
@@ -218,6 +237,29 @@ def nested_folder_install(folder_labels: List[str], prev_labels: List[str], elem
for c in self.viewer_controls:
c._setup(self)
+ def toggle_pause_button(self) -> None:
+ self.pause_train.visible = not self.pause_train.visible
+ self.resume_train.visible = not self.resume_train.visible
+
+ def toggle_cameravis_button(self) -> None:
+ self.hide_images.visible = not self.hide_images.visible
+ self.show_images.visible = not self.show_images.visible
+
+ def make_stats_markdown(self, step: Optional[int], res: Optional[str]) -> str:
+ # if either are None, read it from the current stats_markdown content
+ if step is None:
+ step = int(self.stats_markdown.content.split("\n")[0].split(": ")[1])
+ if res is None:
+ res = (self.stats_markdown.content.split("\n")[1].split(": ")[1]).strip()
+ return f"Step: {step} \nResolution: {res}"
+
+ def update_step(self, step):
+ """
+ Args:
+ step: the train step to set the model to
+ """
+ self.stats_markdown.content = self.make_stats_markdown(step, None)
+
def get_camera_state(self, client: viser.ClientHandle) -> CameraState:
R = vtf.SO3(wxyz=client.camera.wxyz)
R = R @ vtf.SO3.from_x_radians(np.pi)
@@ -403,7 +445,7 @@ def update_scene(self, step: int, num_rays_per_batch: Optional[int] = None) -> N
if camera_state is not None:
self.render_statemachines[id].action(RenderAction("step", camera_state))
self.update_camera_poses()
- self.control_panel.update_step(step)
+ self.update_step(step)
def update_colormap_options(self, dimensions: int, dtype: type) -> None:
"""update the colormap options based on the current render
diff --git a/pyproject.toml b/pyproject.toml
index 3b6d69a84f..019ef6b6a7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,7 +56,7 @@ dependencies = [
"torchvision>=0.14.1",
"torchmetrics[image]>=1.0.1",
"typing_extensions>=4.4.0",
- "viser==0.1.12",
+ "viser==0.1.13",
"nuscenes-devkit>=1.1.1",
"wandb>=0.13.3",
"xatlas",
From a894c6d9383340de1642debea46e4e79dd66bb5c Mon Sep 17 00:00:00 2001
From: Boris Feld
Date: Thu, 14 Dec 2023 17:47:51 +0100
Subject: [PATCH 092/101] Update first_nerf.md (#2677)
Update documentation to mention that Comet visualizer can be used without the viwer
---
docs/quickstart/first_nerf.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/quickstart/first_nerf.md b/docs/quickstart/first_nerf.md
index 137bd0b235..67a84783fb 100644
--- a/docs/quickstart/first_nerf.md
+++ b/docs/quickstart/first_nerf.md
@@ -98,7 +98,7 @@ Each script will have some other minor quirks (like the training script datapars
## Comet / Tensorboard / WandB / Viewer
-We support four different methods to track training progress, using the viewer [tensorboard](https://www.tensorflow.org/tensorboard), [Weights and Biases](https://wandb.ai/site), and [Comet](https://comet.com/?utm_source=nerf&utm_medium=referral&utm_content=nerf_docs). You can specify which visualizer to use by appending `--vis {viewer, tensorboard, wandb, viewer+wandb, viewer+tensorboard, viewer+comet}` to the training command. Simultaneously utilizing the viewer alongside wandb or tensorboard may cause stuttering issues during evaluation steps. The viewer only works for methods that are fast (ie. nerfacto, instant-ngp), for slower methods like NeRF, use the other loggers.
+We support four different methods to track training progress, using the viewer [tensorboard](https://www.tensorflow.org/tensorboard), [Weights and Biases](https://wandb.ai/site), and [Comet](https://comet.com/?utm_source=nerf&utm_medium=referral&utm_content=nerf_docs). You can specify which visualizer to use by appending `--vis {viewer, tensorboard, wandb, comet, viewer+wandb, viewer+tensorboard, viewer+comet}` to the training command. Simultaneously utilizing the viewer alongside wandb or tensorboard may cause stuttering issues during evaluation steps. The viewer only works for methods that are fast (ie. nerfacto, instant-ngp), for slower methods like NeRF, use the other loggers.
## Evaluating Runs
From 622e26b511545bc583ee5122bf4a9dc6bfb63034 Mon Sep 17 00:00:00 2001
From: Tobias Fischer <36965290+tobiasfshr@users.noreply.github.com>
Date: Thu, 14 Dec 2023 19:07:41 +0100
Subject: [PATCH 093/101] [Fix] Pixel Sampler setup in VanillaDataManager for
Cameras incl. metadata (#2678)
fix pixel sampler setup for cameras with metadata other than fisheye crop radius
Co-authored-by: Matias Turkulainen <30566358+maturk@users.noreply.github.com>
---
nerfstudio/data/datamanagers/base_datamanager.py | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py
index 47144c00e2..bf12bd6041 100644
--- a/nerfstudio/data/datamanagers/base_datamanager.py
+++ b/nerfstudio/data/datamanagers/base_datamanager.py
@@ -476,9 +476,10 @@ def _get_pixel_sampler(self, dataset: TDataset, num_rays_per_batch: int) -> Pixe
if is_equirectangular.any():
CONSOLE.print("[bold yellow]Warning: Some cameras are equirectangular, but using default pixel sampler.")
- fisheye_crop_radius = (
- None if dataset.cameras.metadata is None else dataset.cameras.metadata["fisheye_crop_radius"]
- )
+ fisheye_crop_radius = None
+ if dataset.cameras.metadata is not None and "fisheye_crop_radius" in dataset.cameras.metadata:
+ fisheye_crop_radius = dataset.cameras.metadata["fisheye_crop_radius"]
+
return self.config.pixel_sampler.setup(
is_equirectangular=is_equirectangular,
num_rays_per_batch=num_rays_per_batch,
From 88873ef3abac208d0fe6e90d06c044900762b01a Mon Sep 17 00:00:00 2001
From: Rohan Mathur
Date: Thu, 14 Dec 2023 17:22:20 -0800
Subject: [PATCH 094/101] Added support for .tif and .tiff files in equirect
utils (#2613)
* added tiff to equirect utils
* added tif files
---
nerfstudio/process_data/equirect_utils.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/nerfstudio/process_data/equirect_utils.py b/nerfstudio/process_data/equirect_utils.py
index 497cb32331..232d967332 100644
--- a/nerfstudio/process_data/equirect_utils.py
+++ b/nerfstudio/process_data/equirect_utils.py
@@ -304,7 +304,7 @@ def generate_planar_projections_from_equirectangular(
with progress:
for i in progress.track(os.listdir(frame_dir), description="", total=num_ims):
- if i.lower().endswith((".jpg", ".png", ".jpeg")):
+ if i.lower().endswith((".jpg", ".png", ".jpeg", ".tiff", ".tif")):
im = np.array(cv2.imread(os.path.join(frame_dir, i)))
im = torch.tensor(im, dtype=torch.float32, device=device)
im = torch.permute(im, (2, 0, 1)).unsqueeze(0) / 255.0
@@ -331,7 +331,7 @@ def compute_resolution_from_equirect(image_dir: Path, num_images: int) -> Tuple[
"""
for i in os.listdir(image_dir):
- if i.lower().endswith((".jpg", ".png", ".jpeg")):
+ if i.lower().endswith((".jpg", ".png", ".jpeg", ".tiff", ".tif")):
im = np.array(cv2.imread(os.path.join(image_dir, i)))
res_squared = (im.shape[0] * im.shape[1]) / num_images
return (int(np.sqrt(res_squared)), int(np.sqrt(res_squared)))
From ddedb920dc7c2d5cd4be4a042f9718f5bff22981 Mon Sep 17 00:00:00 2001
From: Justin Kerr
Date: Thu, 14 Dec 2023 22:14:09 -0800
Subject: [PATCH 095/101] Gaussian splatting (#2521)
* Adds Gaussian Splatting integration
---------
Co-authored-by: Jake Austin jake-austin@berkeley.edu
Co-authored-by: Brent Yi
Co-authored-by: Zhuoyang
Co-authored-by: Vickie Ye
Co-authored-by: adamrashid96
Co-authored-by: Paul Wais
Co-authored-by: Gina Wu
Co-authored-by: Alexander Kristoffersen
Co-authored-by: machenmusik
Co-authored-by: Riley
Co-authored-by: Gina Wu <42229107+ginazhouhuiwu@users.noreply.github.com>
Co-authored-by: maturk
Co-authored-by: Guangyun-Xu <38279883+Guangyun-Xu@users.noreply.github.com>
Co-authored-by: Zhuoyang Pan <102644383+Zhuoyang-Pan@users.noreply.github.com>
---
nerfstudio/cameras/camera_optimizers.py | 11 +
nerfstudio/configs/base_config.py | 2 +-
nerfstudio/configs/method_configs.py | 54 +-
.../data/datamanagers/base_datamanager.py | 36 +-
.../datamanagers/full_images_datamanager.py | 410 ++++++++++
.../data/datamanagers/parallel_datamanager.py | 11 +-
.../random_cameras_datamanager.py | 9 +-
.../data/dataparsers/colmap_dataparser.py | 2 +-
.../data/dataparsers/nerfstudio_dataparser.py | 31 +
nerfstudio/data/utils/dataloaders.py | 16 +-
nerfstudio/engine/optimizers.py | 21 +
nerfstudio/engine/trainer.py | 37 +-
nerfstudio/exporter/exporter_utils.py | 2 +
nerfstudio/model_components/losses.py | 4 +
nerfstudio/models/base_model.py | 21 +-
nerfstudio/models/gaussian_splatting.py | 764 ++++++++++++++++++
nerfstudio/pipelines/base_pipeline.py | 53 +-
nerfstudio/scripts/exporter.py | 78 +-
nerfstudio/scripts/render.py | 16 +-
nerfstudio/scripts/viewer/run_viewer.py | 2 +-
.../viewer/server/render_state_machine.py | 25 +-
nerfstudio/viewer_beta/export_panel.py | 6 +
.../viewer_beta/render_state_machine.py | 41 +-
nerfstudio/viewer_beta/utils.py | 2 +-
nerfstudio/viewer_beta/viewer.py | 10 +-
pyproject.toml | 6 +-
tests/test_train.py | 1 +
27 files changed, 1544 insertions(+), 127 deletions(-)
create mode 100644 nerfstudio/data/datamanagers/full_images_datamanager.py
create mode 100644 nerfstudio/models/gaussian_splatting.py
diff --git a/nerfstudio/cameras/camera_optimizers.py b/nerfstudio/cameras/camera_optimizers.py
index 7b76488842..4aa69a69b6 100644
--- a/nerfstudio/cameras/camera_optimizers.py
+++ b/nerfstudio/cameras/camera_optimizers.py
@@ -33,6 +33,7 @@
from nerfstudio.utils import poses as pose_utils
from nerfstudio.engine.optimizers import OptimizerConfig
from nerfstudio.engine.schedulers import SchedulerConfig
+from nerfstudio.cameras.cameras import Cameras
@dataclass
@@ -146,6 +147,16 @@ def apply_to_raybundle(self, raybundle: RayBundle) -> None:
raybundle.origins = raybundle.origins + correction_matrices[:, :3, 3]
raybundle.directions = torch.bmm(correction_matrices[:, :3, :3], raybundle.directions[..., None]).squeeze()
+ def apply_to_camera(self, camera: Cameras) -> None:
+ """Apply the pose correction to the raybundle"""
+ if self.config.mode != "off":
+ assert camera.metadata is not None, "Must provide id of camera in its metadata"
+ assert "cam_idx" in camera.metadata, "Must provide id of camera in its metadata"
+ camera_idx = camera.metadata["cam_idx"]
+ adj = self([camera_idx]) # type: ignore
+ adj = torch.cat([adj, torch.Tensor([0, 0, 0, 1])[None, None].to(adj)], dim=1)
+ camera.camera_to_worlds = torch.bmm(camera.camera_to_worlds, adj)
+
def get_loss_dict(self, loss_dict: dict) -> None:
"""Add regularization"""
if self.config.mode != "off":
diff --git a/nerfstudio/configs/base_config.py b/nerfstudio/configs/base_config.py
index c04c11aaac..85c205c5c5 100644
--- a/nerfstudio/configs/base_config.py
+++ b/nerfstudio/configs/base_config.py
@@ -144,7 +144,7 @@ class ViewerConfig(PrintableConfig):
"""Whether to kill the training job when it has completed. Note this will stop rendering in the viewer."""
image_format: Literal["jpeg", "png"] = "jpeg"
"""Image format viewer should use; jpeg is lossy compression, while png is lossless."""
- jpeg_quality: int = 90
+ jpeg_quality: int = 75
"""Quality tradeoff to use for jpeg compression."""
make_share_url: bool = False
"""Viewer beta feature: print a shareable URL. `vis` must be set to viewer_beta; this flag is otherwise ignored."""
diff --git a/nerfstudio/configs/method_configs.py b/nerfstudio/configs/method_configs.py
index 74cabbafcb..2c3a8aac74 100644
--- a/nerfstudio/configs/method_configs.py
+++ b/nerfstudio/configs/method_configs.py
@@ -22,7 +22,6 @@
from typing import Dict
import tyro
-from nerfstudio.data.pixel_samplers import PairPixelSamplerConfig
from nerfstudio.cameras.camera_optimizers import CameraOptimizerConfig
from nerfstudio.configs.base_config import ViewerConfig
@@ -49,6 +48,7 @@
from nerfstudio.data.datasets.depth_dataset import DepthDataset
from nerfstudio.data.datasets.sdf_dataset import SDFDataset
from nerfstudio.data.datasets.semantic_dataset import SemanticDataset
+from nerfstudio.data.pixel_samplers import PairPixelSamplerConfig
from nerfstudio.engine.optimizers import AdamOptimizerConfig, RAdamOptimizerConfig
from nerfstudio.engine.schedulers import (
CosineDecaySchedulerConfig,
@@ -59,8 +59,10 @@
from nerfstudio.field_components.temporal_distortions import TemporalDistortionKind
from nerfstudio.fields.sdf_field import SDFFieldConfig
from nerfstudio.models.depth_nerfacto import DepthNerfactoModelConfig
+from nerfstudio.models.gaussian_splatting import GaussianSplattingModelConfig
from nerfstudio.models.generfacto import GenerfactoModelConfig
from nerfstudio.models.instant_ngp import InstantNGPModelConfig
+from nerfstudio.data.dataparsers.colmap_dataparser import ColmapDataParserConfig
from nerfstudio.models.mipnerf import MipNerfModel
from nerfstudio.models.nerfacto import NerfactoModelConfig
from nerfstudio.models.neus import NeuSModelConfig
@@ -69,6 +71,7 @@
from nerfstudio.models.tensorf import TensoRFModelConfig
from nerfstudio.models.vanilla_nerf import NeRFModel, VanillaModelConfig
from nerfstudio.pipelines.base_pipeline import VanillaPipelineConfig
+from nerfstudio.data.datamanagers.full_images_datamanager import FullImageDatamanagerConfig
from nerfstudio.pipelines.dynamic_batch import DynamicBatchPipelineConfig
from nerfstudio.plugins.registry import discover_methods
@@ -87,6 +90,7 @@
"generfacto": "Generative Text to NeRF model",
"neus": "Implementation of NeuS. (slow)",
"neus-facto": "Implementation of NeuS-Facto. (slow)",
+ "gaussian-splatting": "Gaussian Splatting model",
}
method_configs["nerfacto"] = TrainerConfig(
@@ -594,6 +598,54 @@
vis="viewer",
)
+method_configs["gaussian-splatting"] = TrainerConfig(
+ method_name="gaussian-splatting",
+ steps_per_eval_image=100,
+ steps_per_eval_batch=100,
+ steps_per_save=2000,
+ steps_per_eval_all_images=1000,
+ max_num_iterations=30000,
+ mixed_precision=False,
+ gradient_accumulation_steps={"camera_opt": 100, "color": 2},
+ pipeline=VanillaPipelineConfig(
+ datamanager=FullImageDatamanagerConfig(
+ dataparser=ColmapDataParserConfig(load_3D_points=True),
+ ),
+ model=GaussianSplattingModelConfig(),
+ ),
+ optimizers={
+ "xyz": {
+ "optimizer": AdamOptimizerConfig(lr=1.6e-4, eps=1e-15),
+ "scheduler": ExponentialDecaySchedulerConfig(
+ lr_final=1.6e-6,
+ max_steps=30000,
+ ),
+ },
+ "color": {
+ "optimizer": AdamOptimizerConfig(lr=5e-4, eps=1e-15),
+ "scheduler": ExponentialDecaySchedulerConfig(
+ lr_final=1e-4,
+ max_steps=30000,
+ ),
+ },
+ "opacity": {
+ "optimizer": AdamOptimizerConfig(lr=0.05, eps=1e-15),
+ "scheduler": None,
+ },
+ "scaling": {
+ "optimizer": AdamOptimizerConfig(lr=0.005, eps=1e-15),
+ "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-3, max_steps=30000),
+ },
+ "rotation": {"optimizer": AdamOptimizerConfig(lr=0.001, eps=1e-15), "scheduler": None},
+ "camera_opt": {
+ "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15),
+ "scheduler": ExponentialDecaySchedulerConfig(lr_final=5e-5, max_steps=30000),
+ },
+ },
+ viewer=ViewerConfig(num_rays_per_chunk=1 << 15),
+ vis="viewer_beta",
+)
+
def merge_methods(methods, method_descriptions, new_methods, new_descriptions, overwrite=True):
"""Merge new methods and descriptions into existing methods and descriptions.
diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py
index bf12bd6041..67180bea60 100644
--- a/nerfstudio/data/datamanagers/base_datamanager.py
+++ b/nerfstudio/data/datamanagers/base_datamanager.py
@@ -21,12 +21,13 @@
from abc import abstractmethod
from collections import defaultdict
from dataclasses import dataclass, field
-from pathlib import Path
from functools import cached_property
+from pathlib import Path
from typing import (
Any,
Callable,
Dict,
+ ForwardRef,
Generic,
List,
Literal,
@@ -35,9 +36,8 @@
Type,
Union,
cast,
- ForwardRef,
- get_origin,
get_args,
+ get_origin,
)
import torch
@@ -47,7 +47,7 @@
from typing_extensions import TypeVar
from nerfstudio.cameras.camera_optimizers import CameraOptimizerConfig
-from nerfstudio.cameras.cameras import CameraType
+from nerfstudio.cameras.cameras import Cameras, CameraType
from nerfstudio.cameras.rays import RayBundle
from nerfstudio.configs.base_config import InstantiateConfig
from nerfstudio.configs.dataparser_configs import AnnotatedDataParserUnion
@@ -55,9 +55,9 @@
from nerfstudio.data.dataparsers.blender_dataparser import BlenderDataParserConfig
from nerfstudio.data.datasets.base_dataset import InputDataset
from nerfstudio.data.pixel_samplers import (
+ PatchPixelSamplerConfig,
PixelSampler,
PixelSamplerConfig,
- PatchPixelSamplerConfig,
)
from nerfstudio.data.utils.dataloaders import (
CacheDataloader,
@@ -67,9 +67,8 @@
from nerfstudio.data.utils.nerfstudio_collate import nerfstudio_collate
from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes
from nerfstudio.model_components.ray_generators import RayGenerator
-from nerfstudio.utils.misc import IterableWrapper
+from nerfstudio.utils.misc import IterableWrapper, get_orig_class
from nerfstudio.utils.rich_utils import CONSOLE
-from nerfstudio.utils.misc import get_orig_class
def variable_res_collate(batch: List[Dict]) -> Dict:
@@ -131,7 +130,7 @@ class DataManager(nn.Module):
To get data, use the next_train and next_eval functions.
This data manager's next_train and next_eval methods will return 2 things:
- 1. A Raybundle: This will contain the rays we are sampling, with latents and
+ 1. 'rays': This will contain the rays or camera we are sampling, with latents and
conditionals attached (everything needed at inference)
2. A "batch" of auxiliary information: This will contain the mask, the ground truth
pixels, etc needed to actually train, score, etc the model
@@ -246,7 +245,7 @@ def setup_eval(self):
"""Sets up the data manager for evaluation"""
@abstractmethod
- def next_train(self, step: int) -> Tuple[RayBundle, Dict]:
+ def next_train(self, step: int) -> Tuple[Union[RayBundle, Cameras], Dict]:
"""Returns the next batch of data from the train data manager.
Args:
@@ -258,25 +257,25 @@ def next_train(self, step: int) -> Tuple[RayBundle, Dict]:
raise NotImplementedError
@abstractmethod
- def next_eval(self, step: int) -> Tuple[RayBundle, Dict]:
+ def next_eval(self, step: int) -> Tuple[Union[RayBundle, Cameras], Dict]:
"""Returns the next batch of data from the eval data manager.
Args:
step: the step number of the eval image to retrieve
Returns:
- A tuple of the ray bundle for the image, and a dictionary of additional batch information
+ A tuple of the ray/camera for the image, and a dictionary of additional batch information
such as the groundtruth image.
"""
raise NotImplementedError
@abstractmethod
- def next_eval_image(self, step: int) -> Tuple[int, RayBundle, Dict]:
+ def next_eval_image(self, step: int) -> Tuple[Cameras, Dict]:
"""Retrieve the next eval image.
Args:
step: the step number of the eval image to retrieve
Returns:
- A tuple of the step number, the ray bundle for the image, and a dictionary of
+ A tuple of the step number, the ray/camera for the image, and a dictionary of
additional batch information such as the groundtruth image.
"""
raise NotImplementedError
@@ -313,7 +312,7 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]:
@dataclass
class VanillaDataManagerConfig(DataManagerConfig):
- """A basic data manager"""
+ """A basic data manager for a ray-based model"""
_target: Type = field(default_factory=lambda: VanillaDataManager)
"""Target class to instantiate."""
@@ -555,11 +554,10 @@ def next_eval(self, step: int) -> Tuple[RayBundle, Dict]:
ray_bundle = self.eval_ray_generator(ray_indices)
return ray_bundle, batch
- def next_eval_image(self, step: int) -> Tuple[int, RayBundle, Dict]:
- for camera_ray_bundle, batch in self.eval_dataloader:
- assert camera_ray_bundle.camera_indices is not None
- image_idx = int(camera_ray_bundle.camera_indices[0, 0, 0])
- return image_idx, camera_ray_bundle, batch
+ def next_eval_image(self, step: int) -> Tuple[Cameras, Dict]:
+ for camera, batch in self.eval_dataloader:
+ assert camera.shape[0] == 1
+ return camera, batch
raise ValueError("No more eval images")
def get_train_rays_per_batch(self) -> int:
diff --git a/nerfstudio/data/datamanagers/full_images_datamanager.py b/nerfstudio/data/datamanagers/full_images_datamanager.py
new file mode 100644
index 0000000000..35837f05a1
--- /dev/null
+++ b/nerfstudio/data/datamanagers/full_images_datamanager.py
@@ -0,0 +1,410 @@
+# Copyright 2022 The Nerfstudio Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Data manager that outputs cameras / images instead of raybundles
+
+Good for things like gaussian splatting which require full cameras instead of the standard ray
+paradigm
+"""
+
+from __future__ import annotations
+
+import random
+from dataclasses import dataclass, field
+from functools import cached_property
+from pathlib import Path
+from typing import Dict, ForwardRef, Generic, List, Literal, Optional, Tuple, Type, Union, cast, get_args, get_origin
+
+import cv2
+import numpy as np
+import torch
+from copy import deepcopy
+from torch.nn import Parameter
+from tqdm import tqdm
+
+from nerfstudio.cameras.cameras import Cameras, CameraType
+from nerfstudio.configs.dataparser_configs import AnnotatedDataParserUnion
+from nerfstudio.data.datamanagers.base_datamanager import DataManager, DataManagerConfig, TDataset
+from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs
+from nerfstudio.data.dataparsers.nerfstudio_dataparser import NerfstudioDataParserConfig
+from nerfstudio.data.datasets.base_dataset import InputDataset
+from nerfstudio.utils.misc import get_orig_class
+from nerfstudio.utils.rich_utils import CONSOLE
+
+
+@dataclass
+class FullImageDatamanagerConfig(DataManagerConfig):
+ _target: Type = field(default_factory=lambda: FullImageDatamanager)
+ dataparser: AnnotatedDataParserUnion = NerfstudioDataParserConfig()
+ camera_res_scale_factor: float = 1.0
+ """The scale factor for scaling spatial data such as images, mask, semantics
+ along with relevant information about camera intrinsics
+ """
+ eval_num_images_to_sample_from: int = -1
+ """Number of images to sample during eval iteration."""
+ eval_num_times_to_repeat_images: int = -1
+ """When not evaluating on all images, number of iterations before picking
+ new images. If -1, never pick new images."""
+ eval_image_indices: Optional[Tuple[int, ...]] = (0,)
+ """Specifies the image indices to use during eval; if None, uses all."""
+ cache_images: Literal["no-cache", "cpu", "gpu"] = "cpu"
+ """Whether to cache images in memory. If "numpy", caches as numpy arrays, if "torch", caches as torch tensors."""
+
+
+class FullImageDatamanager(DataManager, Generic[TDataset]):
+ """
+ A datamanager that outputs full images and cameras instead of raybundles. This makes the
+ datamanager more lightweight since we don't have to do generate rays. Useful for full-image
+ training e.g. rasterization pipelines
+ """
+
+ config: FullImageDatamanagerConfig
+ train_dataset: TDataset
+ eval_dataset: TDataset
+
+ def __init__(
+ self,
+ config: FullImageDatamanagerConfig,
+ device: Union[torch.device, str] = "cpu",
+ test_mode: Literal["test", "val", "inference"] = "val",
+ world_size: int = 1,
+ local_rank: int = 0,
+ **kwargs,
+ ):
+ self.config = config
+ self.device = device
+ self.world_size = world_size
+ self.local_rank = local_rank
+ self.sampler = None
+ self.test_mode = test_mode
+ self.test_split = "test" if test_mode in ["test", "inference"] else "val"
+ self.dataparser_config = self.config.dataparser
+ if self.config.data is not None:
+ self.config.dataparser.data = Path(self.config.data)
+ else:
+ self.config.data = self.config.dataparser.data
+ self.dataparser = self.dataparser_config.setup()
+ if test_mode == "inference":
+ self.dataparser.downscale_factor = 1 # Avoid opening images
+ self.includes_time = self.dataparser.includes_time
+
+ self.train_dataparser_outputs: DataparserOutputs = self.dataparser.get_dataparser_outputs(split="train")
+ self.train_dataset = self.create_train_dataset()
+ self.eval_dataset = self.create_eval_dataset()
+ if len(self.train_dataset) > 500 and self.config.cache_images == "gpu":
+ CONSOLE.print("Train dataset has over 500 images, overriding cach_images to cpu", style="bold yellow")
+ self.config.cache_images = "cpu"
+ self.cached_train, self.cached_eval = self.cache_images(self.config.cache_images)
+ self.exclude_batch_keys_from_device = self.train_dataset.exclude_batch_keys_from_device
+ if self.config.masks_on_gpu is True:
+ self.exclude_batch_keys_from_device.remove("mask")
+ if self.config.images_on_gpu is True:
+ self.exclude_batch_keys_from_device.remove("image")
+
+ # Some logic to make sure we sample every camera in equal amounts
+ self.train_unseen_cameras = [i for i in range(len(self.train_dataset))]
+ self.eval_unseen_cameras = [i for i in range(len(self.eval_dataset))]
+ assert len(self.train_unseen_cameras) > 0, "No data found in dataset"
+
+ super().__init__()
+
+ def cache_images(self, cache_images_option):
+ cached_train = []
+ CONSOLE.log("Caching / undistorting train images")
+ for i in tqdm(range(len(self.train_dataset)), leave=False):
+ # cv2.undistort the images / cameras
+ data = self.train_dataset.get_data(i)
+ camera = self.train_dataset.cameras[i].reshape(())
+ K = camera.get_intrinsics_matrices().numpy()
+ if camera.distortion_params is None:
+ continue
+ distortion_params = camera.distortion_params.numpy()
+ image = data["image"].numpy()
+
+ if camera.camera_type.item() == CameraType.PERSPECTIVE.value:
+ distortion_params = np.array(
+ [
+ distortion_params[0],
+ distortion_params[1],
+ distortion_params[4],
+ distortion_params[5],
+ distortion_params[2],
+ distortion_params[3],
+ 0,
+ 0,
+ ]
+ )
+ newK, roi = cv2.getOptimalNewCameraMatrix(K, distortion_params, (image.shape[1], image.shape[0]), 0)
+ image = cv2.undistort(image, K, distortion_params, None, newK) # type: ignore
+ # crop the image and update the intrinsics accordingly
+ x, y, w, h = roi
+ image = image[y : y + h, x : x + w]
+ if "mask" in data:
+ data["mask"] = data["mask"][y : y + h, x : x + w]
+ if "depth_image" in data:
+ data["depth_image"] = data["depth_image"][y : y + h, x : x + w]
+ # update the width, height
+ self.train_dataset.cameras.width[i] = w
+ self.train_dataset.cameras.height[i] = h
+ if "mask" in data:
+ mask = data["mask"].numpy()
+ mask = mask.astype(np.uint8) * 255
+ mask = cv2.undistort(mask, K, distortion_params, None, newK) # type: ignore
+ mask = mask[y : y + h, x : x + w]
+ data["mask"] = torch.from_numpy(mask).bool()
+ K = newK
+
+ elif camera.camera_type.item() == CameraType.FISHEYE.value:
+ distortion_params = np.array(
+ [distortion_params[0], distortion_params[1], distortion_params[2], distortion_params[3]]
+ )
+ newK = cv2.fisheye.estimateNewCameraMatrixForUndistortRectify(
+ K, distortion_params, (image.shape[1], image.shape[0]), np.eye(3), balance=0
+ )
+ map1, map2 = cv2.fisheye.initUndistortRectifyMap(
+ K, distortion_params, np.eye(3), newK, (image.shape[1], image.shape[0]), cv2.CV_32FC1
+ )
+ # and then remap:
+ image = cv2.remap(image, map1, map2, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT)
+ if "mask" in data:
+ mask = data["mask"].numpy()
+ mask = mask.astype(np.uint8) * 255
+ mask = cv2.fisheye.undistortImage(mask, K, distortion_params, None, newK)
+ data["mask"] = torch.from_numpy(mask).bool()
+ K = newK
+ else:
+ raise NotImplementedError("Only perspective and fisheye cameras are supported")
+ data["image"] = torch.from_numpy(image)
+
+ cached_train.append(data)
+
+ self.train_dataset.cameras.fx[i] = float(K[0, 0])
+ self.train_dataset.cameras.fy[i] = float(K[1, 1])
+ self.train_dataset.cameras.cx[i] = float(K[0, 2])
+ self.train_dataset.cameras.cy[i] = float(K[1, 2])
+
+ cached_eval = []
+ CONSOLE.log("Caching / undistorting eval images")
+ for i in tqdm(range(len(self.eval_dataset)), leave=False):
+ # cv2.undistort the images / cameras
+ data = self.eval_dataset.get_data(i)
+ camera = self.eval_dataset.cameras[i].reshape(())
+ K = camera.get_intrinsics_matrices().numpy()
+ if camera.distortion_params is None:
+ continue
+ distortion_params = camera.distortion_params.numpy()
+ image = data["image"].numpy()
+
+ if camera.camera_type.item() == CameraType.PERSPECTIVE.value:
+ distortion_params = np.array(
+ [
+ distortion_params[0],
+ distortion_params[1],
+ distortion_params[4],
+ distortion_params[5],
+ distortion_params[2],
+ distortion_params[3],
+ 0,
+ 0,
+ ]
+ )
+ newK, roi = cv2.getOptimalNewCameraMatrix(K, distortion_params, (image.shape[1], image.shape[0]), 0)
+ image = cv2.undistort(image, K, distortion_params, None, newK) # type: ignore
+ # crop the image and update the intrinsics accordingly
+ x, y, w, h = roi
+ image = image[y : y + h, x : x + w]
+ # update the width, height
+ self.eval_dataset.cameras.width[i] = w
+ self.eval_dataset.cameras.height[i] = h
+ if "mask" in data:
+ mask = data["mask"].numpy()
+ mask = mask.astype(np.uint8) * 255
+ mask = cv2.undistort(mask, K, distortion_params, None, newK) # type: ignore
+ mask = mask[y : y + h, x : x + w]
+ data["mask"] = torch.from_numpy(mask).bool()
+ K = newK
+
+ elif camera.camera_type.item() == CameraType.FISHEYE.value:
+ distortion_params = np.array(
+ [distortion_params[0], distortion_params[1], distortion_params[2], distortion_params[3]]
+ )
+ newK = cv2.fisheye.estimateNewCameraMatrixForUndistortRectify(
+ K, distortion_params, (image.shape[1], image.shape[0]), np.eye(3), balance=0
+ )
+ map1, map2 = cv2.fisheye.initUndistortRectifyMap(
+ K, distortion_params, np.eye(3), newK, (image.shape[1], image.shape[0]), cv2.CV_32FC1
+ )
+ # and then remap:
+ image = cv2.remap(image, map1, map2, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT)
+ if "mask" in data:
+ mask = data["mask"].numpy()
+ mask = mask.astype(np.uint8) * 255
+ mask = cv2.fisheye.undistortImage(mask, K, distortion_params, None, newK)
+ data["mask"] = torch.from_numpy(mask).bool()
+ K = newK
+ else:
+ raise NotImplementedError("Only perspective and fisheye cameras are supported")
+ data["image"] = torch.from_numpy(image)
+
+ cached_eval.append(data)
+
+ self.eval_dataset.cameras.fx[i] = float(K[0, 0])
+ self.eval_dataset.cameras.fy[i] = float(K[1, 1])
+ self.eval_dataset.cameras.cx[i] = float(K[0, 2])
+ self.eval_dataset.cameras.cy[i] = float(K[1, 2])
+
+ if cache_images_option == "gpu":
+ for cache in cached_train:
+ cache["image"] = cache["image"].to(self.device)
+ if "mask" in cache:
+ cache["mask"] = cache["mask"].to(self.device)
+ for cache in cached_eval:
+ cache["image"] = cache["image"].to(self.device)
+ if "mask" in cache:
+ cache["mask"] = cache["mask"].to(self.device)
+ else:
+ for cache in cached_train:
+ cache["image"] = cache["image"].pin_memory()
+ if "mask" in cache:
+ cache["mask"] = cache["mask"].pin_memory()
+ for cache in cached_eval:
+ cache["image"] = cache["image"].pin_memory()
+ if "mask" in cache:
+ cache["mask"] = cache["mask"].pin_memory()
+
+ return cached_train, cached_eval
+
+ def create_train_dataset(self) -> TDataset:
+ """Sets up the data loaders for training"""
+ return self.dataset_type(
+ dataparser_outputs=self.train_dataparser_outputs,
+ scale_factor=self.config.camera_res_scale_factor,
+ )
+
+ def create_eval_dataset(self) -> TDataset:
+ """Sets up the data loaders for evaluation"""
+ return self.dataset_type(
+ dataparser_outputs=self.dataparser.get_dataparser_outputs(split=self.test_split),
+ scale_factor=self.config.camera_res_scale_factor,
+ )
+
+ @cached_property
+ def dataset_type(self) -> Type[TDataset]:
+ """Returns the dataset type passed as the generic argument"""
+ default: Type[TDataset] = cast(TDataset, TDataset.__default__) # type: ignore
+ orig_class: Type[FullImageDatamanager] = get_orig_class(self, default=None) # type: ignore
+ if type(self) is FullImageDatamanager and orig_class is None:
+ return default
+ if orig_class is not None and get_origin(orig_class) is FullImageDatamanager:
+ return get_args(orig_class)[0]
+
+ # For inherited classes, we need to find the correct type to instantiate
+ for base in getattr(self, "__orig_bases__", []):
+ if get_origin(base) is FullImageDatamanager:
+ for value in get_args(base):
+ if isinstance(value, ForwardRef):
+ if value.__forward_evaluated__:
+ value = value.__forward_value__
+ elif value.__forward_module__ is None:
+ value.__forward_module__ = type(self).__module__
+ value = getattr(value, "_evaluate")(None, None, set())
+ assert isinstance(value, type)
+ if issubclass(value, InputDataset):
+ return cast(Type[TDataset], value)
+ return default
+
+ def get_datapath(self) -> Path:
+ return self.config.dataparser.data
+
+ def setup_train(self):
+ """Sets up the data loaders for training"""
+
+ def setup_eval(self):
+ """Sets up the data loader for evaluation"""
+
+ @property
+ def fixed_indices_eval_dataloader(self) -> List[Tuple[Cameras, Dict]]:
+ """
+ Pretends to be the dataloader for evaluation, it returns a list of (camera, data) tuples
+ """
+ image_indices = list(range(len(self.eval_unseen_cameras)))
+ data = deepcopy(self.cached_eval)
+ _cameras = deepcopy(self.eval_dataset.cameras).to(self.device)
+ cameras = []
+ for i in image_indices:
+ data[i]["image"] = data[i]["image"].to(self.device)
+ cameras.append(_cameras[i : i + 1])
+ assert len(self.eval_dataset.cameras.shape) == 1, "Assumes single batch dimension"
+ return list(zip(cameras, data))
+
+ def get_param_groups(self) -> Dict[str, List[Parameter]]:
+ """Get the param groups for the data manager.
+ Returns:
+ A list of dictionaries containing the data manager's param groups.
+ """
+ return {}
+
+ def get_train_rays_per_batch(self):
+ # TODO: fix this to be the resolution of the last image rendered
+ return 800 * 800
+
+ def next_train(self, step: int) -> Tuple[Cameras, Dict]:
+ """Returns the next training batch
+
+ Returns a Camera instead of raybundle"""
+ image_idx = self.train_unseen_cameras.pop(random.randint(0, len(self.train_unseen_cameras) - 1))
+ # Make sure to re-populate the unseen cameras list if we have exhausted it
+ if len(self.train_unseen_cameras) == 0:
+ self.train_unseen_cameras = [i for i in range(len(self.train_dataset))]
+
+ data = deepcopy(self.cached_train[image_idx])
+ data["image"] = data["image"].to(self.device)
+
+ assert len(self.train_dataset.cameras.shape) == 1, "Assumes single batch dimension"
+ camera = self.train_dataset.cameras[image_idx : image_idx + 1].to(self.device)
+ if camera.metadata is None:
+ camera.metadata = {}
+ camera.metadata["cam_idx"] = image_idx
+ return camera, data
+
+ def next_eval(self, step: int) -> Tuple[Cameras, Dict]:
+ """Returns the next evaluation batch
+
+ Returns a Camera instead of raybundle"""
+ image_idx = self.eval_unseen_cameras.pop(random.randint(0, len(self.eval_unseen_cameras) - 1))
+ # Make sure to re-populate the unseen cameras list if we have exhausted it
+ if len(self.eval_unseen_cameras) == 0:
+ self.eval_unseen_cameras = [i for i in range(len(self.eval_dataset))]
+ data = deepcopy(self.cached_eval[image_idx])
+ data["image"] = data["image"].to(self.device)
+ assert len(self.eval_dataset.cameras.shape) == 1, "Assumes single batch dimension"
+ camera = self.eval_dataset.cameras[image_idx : image_idx + 1].to(self.device)
+ return camera, data
+
+ def next_eval_image(self, step: int) -> Tuple[Cameras, Dict]:
+ """Returns the next evaluation batch
+
+ Returns a Camera instead of raybundle
+
+ TODO: Make sure this logic is consistent with the vanilladatamanager"""
+ image_idx = self.eval_unseen_cameras.pop(random.randint(0, len(self.eval_unseen_cameras) - 1))
+ # Make sure to re-populate the unseen cameras list if we have exhausted it
+ if len(self.eval_unseen_cameras) == 0:
+ self.eval_unseen_cameras = [i for i in range(len(self.eval_dataset))]
+ data = deepcopy(self.cached_eval[image_idx])
+ data["image"] = data["image"].to(self.device)
+ assert len(self.eval_dataset.cameras.shape) == 1, "Assumes single batch dimension"
+ camera = self.eval_dataset.cameras[image_idx : image_idx + 1].to(self.device)
+ return camera, data
diff --git a/nerfstudio/data/datamanagers/parallel_datamanager.py b/nerfstudio/data/datamanagers/parallel_datamanager.py
index 5c4d5c4a27..4e22bee15b 100644
--- a/nerfstudio/data/datamanagers/parallel_datamanager.py
+++ b/nerfstudio/data/datamanagers/parallel_datamanager.py
@@ -38,7 +38,7 @@
from rich.progress import track
from torch.nn import Parameter
-from nerfstudio.cameras.cameras import CameraType
+from nerfstudio.cameras.cameras import Cameras, CameraType
from nerfstudio.cameras.rays import RayBundle
from nerfstudio.data.datamanagers.base_datamanager import (
DataManager,
@@ -293,12 +293,11 @@ def next_eval(self, step: int) -> Tuple[RayBundle, Dict]:
ray_bundle = self.eval_ray_generator(ray_indices)
return ray_bundle, batch
- def next_eval_image(self, step: int) -> Tuple[int, RayBundle, Dict]:
+ def next_eval_image(self, step: int) -> Tuple[Cameras, Dict]:
"""Retrieve the next eval image."""
- for camera_ray_bundle, batch in self.eval_dataloader:
- assert camera_ray_bundle.camera_indices is not None
- image_idx = int(camera_ray_bundle.camera_indices[0, 0, 0])
- return image_idx, camera_ray_bundle, batch
+ for camera, batch in self.eval_dataloader:
+ assert camera.shape[0] == 1
+ return camera, batch
raise ValueError("No more eval images")
def get_train_rays_per_batch(self) -> int:
diff --git a/nerfstudio/data/datamanagers/random_cameras_datamanager.py b/nerfstudio/data/datamanagers/random_cameras_datamanager.py
index 4269b428ab..7704f2f161 100644
--- a/nerfstudio/data/datamanagers/random_cameras_datamanager.py
+++ b/nerfstudio/data/datamanagers/random_cameras_datamanager.py
@@ -290,11 +290,10 @@ def next_eval(self, step: int) -> Tuple[RayBundle, Dict]:
return ray_bundle, {"vertical": vertical_rotation, "central": central_rotation}
- def next_eval_image(self, step: int) -> Tuple[int, RayBundle, Dict]:
- for camera_ray_bundle, batch in self.eval_dataloader:
- assert camera_ray_bundle.camera_indices is not None
- image_idx = int(camera_ray_bundle.camera_indices[0, 0, 0])
- return image_idx, camera_ray_bundle, batch
+ def next_eval_image(self, step: int) -> Tuple[Cameras, Dict]:
+ for camera, batch in self.eval_dataloader:
+ assert camera.shape[0] == 1
+ return camera, batch
raise ValueError("No more eval images")
def get_train_rays_per_batch(self) -> int:
diff --git a/nerfstudio/data/dataparsers/colmap_dataparser.py b/nerfstudio/data/dataparsers/colmap_dataparser.py
index 071a11e1ea..02ddca2934 100644
--- a/nerfstudio/data/dataparsers/colmap_dataparser.py
+++ b/nerfstudio/data/dataparsers/colmap_dataparser.py
@@ -70,7 +70,7 @@ class ColmapDataParserConfig(DataParserConfig):
"""Path to masks directory. If not set, masks are not loaded."""
depths_path: Optional[Path] = None
"""Path to depth maps directory. If not set, depths are not loaded."""
- colmap_path: Path = Path("sparse/0")
+ colmap_path: Path = Path("colmap/sparse/0")
"""Path to the colmap reconstruction directory relative to the data path."""
load_3D_points: bool = False
"""Whether to load the 3D points from the colmap reconstruction."""
diff --git a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py
index 9b4c2951ac..7f6f21b885 100644
--- a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py
+++ b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py
@@ -20,6 +20,7 @@
from typing import Literal, Optional, Type
import numpy as np
+import open3d as o3d
import torch
from PIL import Image
@@ -318,6 +319,12 @@ def _generate_dataparser_outputs(self, split="train"):
applied_scale = float(meta["applied_scale"])
scale_factor *= applied_scale
+ # Load 3D points
+ metadata = {}
+ if "ply_file_path" in meta:
+ ply_file_path = data_dir / meta["ply_file_path"]
+ metadata.update(self._load_3D_points(ply_file_path, transform_matrix, scale_factor))
+
dataparser_outputs = DataparserOutputs(
image_filenames=image_filenames,
cameras=cameras,
@@ -328,10 +335,34 @@ def _generate_dataparser_outputs(self, split="train"):
metadata={
"depth_filenames": depth_filenames if len(depth_filenames) > 0 else None,
"depth_unit_scale_factor": self.config.depth_unit_scale_factor,
+ **metadata,
},
)
return dataparser_outputs
+ def _load_3D_points(self, ply_file_path: Path, transform_matrix: torch.Tensor, scale_factor: float):
+ pcd = o3d.io.read_point_cloud(str(ply_file_path))
+
+ points3D = torch.from_numpy(np.asarray(pcd.points, dtype=np.float32))
+ points3D = (
+ torch.cat(
+ (
+ points3D,
+ torch.ones_like(points3D[..., :1]),
+ ),
+ -1,
+ )
+ @ transform_matrix.T
+ )
+ points3D *= scale_factor
+ points3D_rgb = torch.from_numpy((np.asarray(pcd.colors) * 255).astype(np.uint8))
+
+ out = {
+ "points3D_xyz": points3D,
+ "points3D_rgb": points3D_rgb,
+ }
+ return out
+
def _get_fname(self, filepath: Path, data_dir: Path, downsample_folder_prefix="images_") -> Path:
"""Get the filename of the image file.
downsample_folder_prefix can be used to point to auxiliary image data, e.g. masks
diff --git a/nerfstudio/data/utils/dataloaders.py b/nerfstudio/data/utils/dataloaders.py
index e546a130b6..6a64ba738b 100644
--- a/nerfstudio/data/utils/dataloaders.py
+++ b/nerfstudio/data/utils/dataloaders.py
@@ -175,13 +175,17 @@ def __iter__(self):
def __next__(self) -> Tuple[RayBundle, Dict]:
"""Returns the next batch of data"""
- def get_camera(self, image_idx: int = 0) -> Cameras:
+ def get_camera(self, image_idx: int = 0) -> Tuple[Cameras, Dict]:
"""Get camera for the given image index
Args:
image_idx: Camera image index
"""
- return self.cameras[image_idx]
+ camera = self.cameras[image_idx : image_idx + 1]
+ batch = self.input_dataset[image_idx]
+ batch = get_dict_to_torch(batch, device=self.device, exclude=["image"])
+ assert isinstance(batch, dict)
+ return camera, batch
def get_data_from_image_idx(self, image_idx: int) -> Tuple[RayBundle, Dict]:
"""Returns the data for a specific image index.
@@ -226,9 +230,9 @@ def __iter__(self):
def __next__(self):
if self.count < len(self.image_indices):
image_idx = self.image_indices[self.count]
- ray_bundle, batch = self.get_data_from_image_idx(image_idx)
+ camera, batch = self.get_camera(image_idx)
self.count += 1
- return ray_bundle, batch
+ return camera, batch
raise StopIteration
@@ -245,5 +249,5 @@ def __iter__(self):
def __next__(self):
# choose a random image index
image_idx = random.randint(0, len(self.cameras) - 1)
- ray_bundle, batch = self.get_data_from_image_idx(image_idx)
- return ray_bundle, batch
+ camera, batch = self.get_camera(image_idx)
+ return camera, batch
diff --git a/nerfstudio/engine/optimizers.py b/nerfstudio/engine/optimizers.py
index 9b8d48dba3..0a87947863 100644
--- a/nerfstudio/engine/optimizers.py
+++ b/nerfstudio/engine/optimizers.py
@@ -135,6 +135,12 @@ def zero_grad_all(self) -> None:
for _, optimizer in self.optimizers.items():
optimizer.zero_grad()
+ def zero_grad_some(self, param_groups: List[str]) -> None:
+ """Zero the gradients for the given parameter groups."""
+ for param_group in param_groups:
+ optimizer = self.optimizers[param_group]
+ optimizer.zero_grad()
+
def optimizer_scaler_step_all(self, grad_scaler: GradScaler) -> None:
"""Take an optimizer step using a grad scaler.
@@ -149,6 +155,21 @@ def optimizer_scaler_step_all(self, grad_scaler: GradScaler) -> None:
if any(any(p.grad is not None for p in g["params"]) for g in optimizer.param_groups):
grad_scaler.step(optimizer)
+ def optimizer_scaler_step_some(self, grad_scaler: GradScaler, param_groups: List[str]) -> None:
+ """Take an optimizer step using a grad scaler ONLY on the specified param groups.
+
+ Args:
+ grad_scaler: GradScaler to use
+ """
+ for param_group in param_groups:
+ optimizer = self.optimizers[param_group]
+ max_norm = self.config[param_group]["optimizer"].max_norm
+ if max_norm is not None:
+ grad_scaler.unscale_(optimizer)
+ torch.nn.utils.clip_grad_norm_(self.parameters[param_group], max_norm)
+ if any(any(p.grad is not None for p in g["params"]) for g in optimizer.param_groups):
+ grad_scaler.step(optimizer)
+
def optimizer_step_all(self) -> None:
"""Run step for all optimizers."""
for param_group, optimizer in self.optimizers.items():
diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py
index 2930db892e..c262887d8d 100644
--- a/nerfstudio/engine/trainer.py
+++ b/nerfstudio/engine/trainer.py
@@ -24,8 +24,8 @@
from dataclasses import dataclass, field
from pathlib import Path
from threading import Lock
-from typing import Dict, List, Literal, Optional, Tuple, Type, cast
-
+from typing import Dict, List, Literal, Optional, Tuple, Type, cast, DefaultDict
+from collections import defaultdict
import torch
from nerfstudio.configs.experiment_config import ExperimentConfig
from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes, TrainingCallbackLocation
@@ -80,8 +80,8 @@ class TrainerConfig(ExperimentConfig):
"""Path to checkpoint file."""
log_gradients: bool = False
"""Optionally log gradients during training"""
- gradient_accumulation_steps: int = 1
- """Number of steps to accumulate gradients over."""
+ gradient_accumulation_steps: Dict = field(default_factory=lambda: {})
+ """Number of steps to accumulate gradients over. Contains a mapping of {param_group:num}"""
class Trainer:
@@ -118,7 +118,8 @@ def __init__(self, config: TrainerConfig, local_rank: int = 0, world_size: int =
self.mixed_precision: bool = self.config.mixed_precision
self.use_grad_scaler: bool = self.mixed_precision or self.config.use_grad_scaler
self.training_state: Literal["training", "paused", "completed"] = "training"
- self.gradient_accumulation_steps: int = self.config.gradient_accumulation_steps
+ self.gradient_accumulation_steps: DefaultDict = defaultdict(lambda: 1)
+ self.gradient_accumulation_steps.update(self.config.gradient_accumulation_steps)
if self.device == "cpu":
self.mixed_precision = False
@@ -459,19 +460,23 @@ def train_iteration(self, step: int) -> TRAIN_INTERATION_OUTPUT:
step: Current training step.
"""
- self.optimizers.zero_grad_all()
+ needs_zero = [
+ group for group in self.optimizers.parameters.keys() if step % self.gradient_accumulation_steps[group] == 0
+ ]
+ self.optimizers.zero_grad_some(needs_zero)
cpu_or_cuda_str: str = self.device.split(":")[0]
cpu_or_cuda_str = "cpu" if cpu_or_cuda_str == "mps" else cpu_or_cuda_str
- assert (
- self.gradient_accumulation_steps > 0
- ), f"gradient_accumulation_steps must be > 0, not {self.gradient_accumulation_steps}"
- for _ in range(self.gradient_accumulation_steps):
- with torch.autocast(device_type=cpu_or_cuda_str, enabled=self.mixed_precision):
- _, loss_dict, metrics_dict = self.pipeline.get_train_loss_dict(step=step)
- loss = functools.reduce(torch.add, loss_dict.values())
- loss /= self.gradient_accumulation_steps
- self.grad_scaler.scale(loss).backward() # type: ignore
- self.optimizers.optimizer_scaler_step_all(self.grad_scaler)
+
+ with torch.autocast(device_type=cpu_or_cuda_str, enabled=self.mixed_precision):
+ _, loss_dict, metrics_dict = self.pipeline.get_train_loss_dict(step=step)
+ loss = functools.reduce(torch.add, loss_dict.values())
+ self.grad_scaler.scale(loss).backward() # type: ignore
+ needs_step = [
+ group
+ for group in self.optimizers.parameters.keys()
+ if step % self.gradient_accumulation_steps[group] == self.gradient_accumulation_steps[group] - 1
+ ]
+ self.optimizers.optimizer_scaler_step_some(self.grad_scaler, needs_step)
if self.config.log_gradients:
total_grad = 0
diff --git a/nerfstudio/exporter/exporter_utils.py b/nerfstudio/exporter/exporter_utils.py
index 66d6586901..56b4b64456 100644
--- a/nerfstudio/exporter/exporter_utils.py
+++ b/nerfstudio/exporter/exporter_utils.py
@@ -28,6 +28,7 @@
import pymeshlab
import torch
from jaxtyping import Float
+from nerfstudio.cameras.rays import RayBundle
from rich.progress import BarColumn, Progress, TaskProgressColumn, TextColumn, TimeRemainingColumn
from torch import Tensor
@@ -130,6 +131,7 @@ def generate_point_cloud(
with torch.no_grad():
ray_bundle, _ = pipeline.datamanager.next_train(0)
+ assert isinstance(ray_bundle, RayBundle)
outputs = pipeline.model(ray_bundle)
if rgb_output_name not in outputs:
CONSOLE.rule("Error", style="red")
diff --git a/nerfstudio/model_components/losses.py b/nerfstudio/model_components/losses.py
index e90ec8e613..d464669893 100644
--- a/nerfstudio/model_components/losses.py
+++ b/nerfstudio/model_components/losses.py
@@ -575,6 +575,10 @@ def depth_ranking_loss(rendered_depth, gt_depth):
and rendered_depth are from pixels with a radius of each other
"""
m = 1e-4
+ if rendered_depth.shape[0] % 2 != 0:
+ # chop off one index
+ rendered_depth = rendered_depth[:-1, :]
+ gt_depth = gt_depth[:-1, :]
dpt_diff = gt_depth[::2, :] - gt_depth[1::2, :]
out_diff = rendered_depth[::2, :] - rendered_depth[1::2, :] + m
differing_signs = torch.sign(dpt_diff) != torch.sign(out_diff)
diff --git a/nerfstudio/models/base_model.py b/nerfstudio/models/base_model.py
index d7a2217443..2a434a85e8 100644
--- a/nerfstudio/models/base_model.py
+++ b/nerfstudio/models/base_model.py
@@ -13,7 +13,7 @@
# limitations under the License.
"""
-Base Model implementation which takes in RayBundles
+Base Model implementation which takes in RayBundles or Cameras
"""
from __future__ import annotations
@@ -27,10 +27,11 @@
from torch import nn
from torch.nn import Parameter
+from nerfstudio.cameras.cameras import Cameras
from nerfstudio.cameras.rays import RayBundle
from nerfstudio.configs.base_config import InstantiateConfig
from nerfstudio.configs.config_utils import to_immutable_dict
-from nerfstudio.data.scene_box import SceneBox
+from nerfstudio.data.scene_box import SceneBox, OrientedBox
from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes
from nerfstudio.model_components.scene_colliders import NearFarCollider
@@ -117,7 +118,7 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]:
"""
@abstractmethod
- def get_outputs(self, ray_bundle: RayBundle) -> Dict[str, Union[torch.Tensor, List]]:
+ def get_outputs(self, ray_bundle: Union[RayBundle, Cameras]) -> Dict[str, Union[torch.Tensor, List]]:
"""Takes in a Ray Bundle and returns a dictionary of outputs.
Args:
@@ -128,7 +129,7 @@ def get_outputs(self, ray_bundle: RayBundle) -> Dict[str, Union[torch.Tensor, Li
Outputs of model. (ie. rendered colors)
"""
- def forward(self, ray_bundle: RayBundle) -> Dict[str, Union[torch.Tensor, List]]:
+ def forward(self, ray_bundle: Union[RayBundle, Cameras]) -> Dict[str, Union[torch.Tensor, List]]:
"""Run forward starting with a ray bundle. This outputs different things depending on the configuration
of the model and whether or not the batch is provided (whether or not we are training basically)
@@ -161,6 +162,18 @@ def get_loss_dict(self, outputs, batch, metrics_dict=None) -> Dict[str, torch.Te
metrics_dict: dictionary of metrics, some of which we can use for loss
"""
+ @torch.no_grad()
+ def get_outputs_for_camera(self, camera: Cameras, obb_box: Optional[OrientedBox] = None) -> Dict[str, torch.Tensor]:
+ """Takes in a camera, generates the raybundle, and computes the output of the model.
+ Assumes a ray-based model.
+
+ Args:
+ camera: generates raybundle
+ """
+ return self.get_outputs_for_camera_ray_bundle(
+ camera.generate_rays(camera_indices=0, keep_shape=True, obb_box=obb_box)
+ )
+
@torch.no_grad()
def get_outputs_for_camera_ray_bundle(self, camera_ray_bundle: RayBundle) -> Dict[str, torch.Tensor]:
"""Takes in camera parameters and computes the output of the model.
diff --git a/nerfstudio/models/gaussian_splatting.py b/nerfstudio/models/gaussian_splatting.py
new file mode 100644
index 0000000000..fe081734ec
--- /dev/null
+++ b/nerfstudio/models/gaussian_splatting.py
@@ -0,0 +1,764 @@
+# ruff: noqa: E741
+# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+NeRF implementation that combines many recent advancements.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Tuple, Type, Union
+from nerfstudio.data.scene_box import OrientedBox
+
+import torch
+from torch.nn import Parameter
+from torchmetrics.image import PeakSignalNoiseRatio
+from torchmetrics.image.lpip import LearnedPerceptualImagePatchSimilarity
+import torchvision.transforms.functional as TF
+
+from nerfstudio.cameras.cameras import Cameras
+from gsplat._torch_impl import quat_to_rotmat
+from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes, TrainingCallbackLocation
+from nerfstudio.engine.optimizers import Optimizers
+from nerfstudio.models.base_model import Model, ModelConfig
+import math
+import numpy as np
+from sklearn.neighbors import NearestNeighbors
+from nerfstudio.cameras.camera_optimizers import CameraOptimizer, CameraOptimizerConfig
+
+from gsplat.rasterize import RasterizeGaussians
+from gsplat.project_gaussians import ProjectGaussians
+from gsplat.sh import SphericalHarmonics, num_sh_bases
+from pytorch_msssim import SSIM
+
+# need following import for background color override
+from nerfstudio.model_components import renderers
+
+
+def random_quat_tensor(N):
+ """
+ Defines a random quaternion tensor of shape (N, 4)
+ """
+ u = torch.rand(N)
+ v = torch.rand(N)
+ w = torch.rand(N)
+ return torch.stack(
+ [
+ torch.sqrt(1 - u) * torch.sin(2 * math.pi * v),
+ torch.sqrt(1 - u) * torch.cos(2 * math.pi * v),
+ torch.sqrt(u) * torch.sin(2 * math.pi * w),
+ torch.sqrt(u) * torch.sin(2 * math.pi * w),
+ ],
+ dim=-1,
+ )
+
+
+def RGB2SH(rgb):
+ """
+ Converts from RGB values [0,1] to the 0th spherical harmonic coefficient
+ """
+ C0 = 0.28209479177387814
+ return (rgb - 0.5) / C0
+
+
+def SH2RGB(sh):
+ """
+ Converts from the 0th spherical harmonic coefficient to RGB values [0,1]
+ """
+ C0 = 0.28209479177387814
+ return sh * C0 + 0.5
+
+
+def projection_matrix(znear, zfar, fovx, fovy, device: Union[str, torch.device] = "cpu"):
+ """
+ Constructs an OpenGL-style perspective projection matrix.
+ """
+ t = znear * math.tan(0.5 * fovy)
+ b = -t
+ r = znear * math.tan(0.5 * fovx)
+ l = -r
+ n = znear
+ f = zfar
+ return torch.tensor(
+ [
+ [2 * n / (r - l), 0.0, (r + l) / (r - l), 0.0],
+ [0.0, 2 * n / (t - b), (t + b) / (t - b), 0.0],
+ [0.0, 0.0, (f + n) / (f - n), -1.0 * f * n / (f - n)],
+ [0.0, 0.0, 1.0, 0.0],
+ ],
+ device=device,
+ )
+
+
+@dataclass
+class GaussianSplattingModelConfig(ModelConfig):
+ """Gaussian Splatting Model Config"""
+
+ _target: Type = field(default_factory=lambda: GaussianSplattingModel)
+ warmup_length: int = 500
+ """period of steps where refinement is turned off"""
+ refine_every: int = 100
+ """period of steps where gaussians are culled and densified"""
+ resolution_schedule: int = 250
+ """training starts at 1/d resolution, every n steps this is doubled"""
+ num_downscales: int = 2
+ """at the beginning, resolution is 1/2^d, where d is this number"""
+ cull_alpha_thresh: float = 0.1
+ """threshold of opacity for culling gaussians"""
+ cull_scale_thresh: float = 0.5
+ """threshold of scale for culling gaussians"""
+ reset_alpha_every: int = 30
+ """Every this many refinement steps, reset the alpha"""
+ densify_grad_thresh: float = 0.0002
+ """threshold of positional gradient norm for densifying gaussians"""
+ densify_size_thresh: float = 0.01
+ """below this size, gaussians are *duplicated*, otherwise split"""
+ n_split_samples: int = 2
+ """number of samples to split gaussians into"""
+ sh_degree_interval: int = 1000
+ """every n intervals turn on another sh degree"""
+ cull_screen_size: float = 0.15
+ """if a gaussian is more than this percent of screen space, cull it"""
+ split_screen_size: float = 0.05
+ """if a gaussian is more than this percent of screen space, split it"""
+ stop_screen_size_at: int = 4000
+ """stop culling/splitting at this step WRT screen size of gaussians"""
+ random_init: bool = False
+ """whether to initialize the positions uniformly randomly (not SFM points)"""
+ ssim_lambda: float = 0.2
+ """weight of ssim loss"""
+ stop_split_at: int = 15000
+ """stop splitting at this step"""
+ sh_degree: int = 4
+ """maximum degree of spherical harmonics to use"""
+ camera_optimizer: CameraOptimizerConfig = CameraOptimizerConfig(mode="off")
+ """camera optimizer config"""
+ max_gauss_ratio: float = 10.0
+ """threshold of ratio of gaussian max to min scale before applying regularization
+ loss from the PhysGaussian paper
+ """
+
+
+class GaussianSplattingModel(Model):
+ """Gaussian Splatting model
+
+ Args:
+ config: Gaussian Splatting configuration to instantiate model
+ """
+
+ config: GaussianSplattingModelConfig
+
+ def __init__(self, *args, **kwargs):
+ if "seed_points" in kwargs:
+ self.seed_pts = kwargs["seed_points"]
+ else:
+ self.seed_pts = None
+ super().__init__(*args, **kwargs)
+
+ def populate_modules(self):
+ if self.seed_pts is not None and not self.config.random_init:
+ self.means = torch.nn.Parameter(self.seed_pts[0]) # (Location, Color)
+ else:
+ self.means = torch.nn.Parameter((torch.rand((500000, 3)) - 0.5) * 10)
+ self.xys_grad_norm = None
+ self.max_2Dsize = None
+ distances, _ = self.k_nearest_sklearn(self.means.data, 3)
+ distances = torch.from_numpy(distances)
+ # find the average of the three nearest neighbors for each point and use that as the scale
+ avg_dist = distances.mean(dim=-1, keepdim=True)
+ self.scales = torch.nn.Parameter(torch.log(avg_dist.repeat(1, 3)))
+ self.quats = torch.nn.Parameter(random_quat_tensor(self.num_points))
+ dim_sh = num_sh_bases(self.config.sh_degree)
+
+ if self.seed_pts is not None and not self.config.random_init:
+ fused_color = RGB2SH(self.seed_pts[1] / 255)
+ shs = torch.zeros((fused_color.shape[0], dim_sh, 3)).float().cuda()
+ shs[:, 0, :3] = fused_color
+ shs[:, 1:, 3:] = 0.0
+ self.colors_all = torch.nn.Parameter(shs)
+ else:
+ colors = torch.nn.Parameter(torch.rand(self.num_points, 1, 3))
+ shs_rest = torch.nn.Parameter(torch.zeros((self.num_points, dim_sh - 1, 3)))
+ self.colors_all = torch.nn.Parameter(torch.cat([colors, shs_rest], dim=1))
+
+ self.opacities = torch.nn.Parameter(torch.logit(0.1 * torch.ones(self.num_points, 1)))
+
+ # metrics
+ self.psnr = PeakSignalNoiseRatio(data_range=1.0)
+ self.ssim = SSIM(data_range=1.0, size_average=True, channel=3)
+ self.lpips = LearnedPerceptualImagePatchSimilarity(normalize=True)
+ self.step = 0
+
+ self.crop_box: Optional[OrientedBox] = None
+ self.back_color = torch.zeros(3)
+
+ self.camera_optimizer: CameraOptimizer = self.config.camera_optimizer.setup(
+ num_cameras=self.num_train_data, device="cpu"
+ )
+
+ @property
+ def colors(self):
+ return SH2RGB(self.colors_all[:, 0, :])
+
+ @property
+ def shs_rest(self):
+ return self.colors_all[:, 1:, :]
+
+ def load_state_dict(self, dict, **kwargs): # type: ignore
+ # resize the parameters to match the new number of points
+ self.step = 30000
+ newp = dict["means"].shape[0]
+ self.means = torch.nn.Parameter(torch.zeros(newp, 3, device=self.device))
+ self.scales = torch.nn.Parameter(torch.zeros(newp, 3, device=self.device))
+ self.quats = torch.nn.Parameter(torch.zeros(newp, 4, device=self.device))
+ self.opacities = torch.nn.Parameter(torch.zeros(newp, 1, device=self.device))
+ self.colors_all = torch.nn.Parameter(
+ torch.zeros(newp, num_sh_bases(self.config.sh_degree), 3, device=self.device)
+ )
+ super().load_state_dict(dict, **kwargs)
+
+ def k_nearest_sklearn(self, x: torch.Tensor, k: int):
+ """
+ Find k-nearest neighbors using sklearn's NearestNeighbors.
+ x: The data tensor of shape [num_samples, num_features]
+ k: The number of neighbors to retrieve
+ """
+ # Convert tensor to numpy array
+ x_np = x.cpu().numpy()
+
+ # Build the nearest neighbors model
+ nn_model = NearestNeighbors(n_neighbors=k + 1, algorithm="auto", metric="euclidean").fit(x_np)
+
+ # Find the k-nearest neighbors
+ distances, indices = nn_model.kneighbors(x_np)
+
+ # Exclude the point itself from the result and return
+ return distances[:, 1:].astype(np.float32), indices[:, 1:].astype(np.float32)
+
+ def remove_from_optim(self, optimizer, deleted_mask, new_params):
+ """removes the deleted_mask from the optimizer provided"""
+ assert len(new_params) == 1
+ # assert isinstance(optimizer, torch.optim.Adam), "Only works with Adam"
+
+ param = optimizer.param_groups[0]["params"][0]
+ param_state = optimizer.state[param]
+ del optimizer.state[param]
+
+ # Modify the state directly without deleting and reassigning.
+ param_state["exp_avg"] = param_state["exp_avg"][~deleted_mask]
+ param_state["exp_avg_sq"] = param_state["exp_avg_sq"][~deleted_mask]
+
+ # Update the parameter in the optimizer's param group.
+ del optimizer.param_groups[0]["params"][0]
+ del optimizer.param_groups[0]["params"]
+ optimizer.param_groups[0]["params"] = new_params
+ optimizer.state[new_params[0]] = param_state
+
+ def dup_in_optim(self, optimizer, dup_mask, new_params, n=2):
+ """adds the parameters to the optimizer"""
+ param = optimizer.param_groups[0]["params"][0]
+ param_state = optimizer.state[param]
+ repeat_dims = (n,) + tuple(1 for _ in range(param_state["exp_avg"].dim() - 1))
+ param_state["exp_avg"] = torch.cat(
+ [param_state["exp_avg"], torch.zeros_like(param_state["exp_avg"][dup_mask.squeeze()]).repeat(*repeat_dims)],
+ dim=0,
+ )
+ param_state["exp_avg_sq"] = torch.cat(
+ [
+ param_state["exp_avg_sq"],
+ torch.zeros_like(param_state["exp_avg_sq"][dup_mask.squeeze()]).repeat(*repeat_dims),
+ ],
+ dim=0,
+ )
+ del optimizer.state[param]
+ optimizer.state[new_params[0]] = param_state
+ optimizer.param_groups[0]["params"] = new_params
+ del param
+
+ def after_train(self, step: int):
+ with torch.no_grad():
+ # keep track of a moving average of grad norms
+ visible_mask = (self.radii > 0).flatten()
+ grads = self.xys.grad.detach().norm(dim=-1) # TODO fill in
+ # print(f"grad norm min {grads.min().item()} max {grads.max().item()} mean {grads.mean().item()} size {grads.shape}")
+ if self.xys_grad_norm is None:
+ self.xys_grad_norm = grads
+ self.vis_counts = torch.ones_like(self.xys_grad_norm)
+ else:
+ assert self.vis_counts is not None
+ self.vis_counts[visible_mask] = self.vis_counts[visible_mask] + 1
+ self.xys_grad_norm[visible_mask] = grads[visible_mask] + self.xys_grad_norm[visible_mask]
+
+ # update the max screen size, as a ratio of number of pixels
+ if self.max_2Dsize is None:
+ self.max_2Dsize = torch.zeros_like(self.radii, dtype=torch.float32)
+ newradii = self.radii.detach()[visible_mask]
+ self.max_2Dsize[visible_mask] = torch.maximum(
+ self.max_2Dsize[visible_mask], newradii / float(max(self.last_size[0], self.last_size[1]))
+ )
+
+ def set_crop(self, crop_box: Optional[OrientedBox]):
+ self.crop_box = crop_box
+
+ def set_background(self, back_color: torch.Tensor):
+ assert back_color.shape == (3,)
+ self.back_color = back_color
+
+ def refinement_after(self, optimizers: Optimizers, step):
+ if self.step >= self.config.warmup_length:
+ with torch.no_grad():
+ # only split/cull if we've seen every image since opacity reset
+ reset_interval = self.config.reset_alpha_every * self.config.refine_every
+ if (
+ self.step < self.config.stop_split_at
+ and self.step % reset_interval > self.num_train_data + self.config.refine_every
+ ):
+ # then we densify
+ assert (
+ self.xys_grad_norm is not None and self.vis_counts is not None and self.max_2Dsize is not None
+ )
+ avg_grad_norm = (
+ (self.xys_grad_norm / self.vis_counts) * 0.5 * max(self.last_size[0], self.last_size[1])
+ )
+ high_grads = (avg_grad_norm > self.config.densify_grad_thresh).squeeze()
+ splits = (self.scales.exp().max(dim=-1).values > self.config.densify_size_thresh).squeeze()
+ if self.step < self.config.stop_screen_size_at:
+ splits |= (self.max_2Dsize > self.config.split_screen_size).squeeze()
+ splits &= high_grads
+ nsamps = self.config.n_split_samples
+ (
+ split_means,
+ split_colors,
+ split_opacities,
+ split_scales,
+ split_quats,
+ ) = self.split_gaussians(splits, nsamps)
+
+ dups = (self.scales.exp().max(dim=-1).values <= self.config.densify_size_thresh).squeeze()
+ dups &= high_grads
+ dup_means, dup_colors, dup_opacities, dup_scales, dup_quats = self.dup_gaussians(dups)
+ self.means = Parameter(torch.cat([self.means.detach(), split_means, dup_means], dim=0))
+ self.colors_all = Parameter(torch.cat([self.colors_all.detach(), split_colors, dup_colors], dim=0))
+
+ self.opacities = Parameter(
+ torch.cat([self.opacities.detach(), split_opacities, dup_opacities], dim=0)
+ )
+ self.scales = Parameter(torch.cat([self.scales.detach(), split_scales, dup_scales], dim=0))
+ self.quats = Parameter(torch.cat([self.quats.detach(), split_quats, dup_quats], dim=0))
+ # append zeros to the max_2Dsize tensor
+ self.max_2Dsize = torch.cat(
+ [self.max_2Dsize, torch.zeros_like(split_scales[:, 0]), torch.zeros_like(dup_scales[:, 0])],
+ dim=0,
+ )
+ split_idcs = torch.where(splits)[0]
+ param_groups = self.get_gaussian_param_groups()
+ for group, param in param_groups.items():
+ self.dup_in_optim(optimizers.optimizers[group], split_idcs, param, n=nsamps)
+ dup_idcs = torch.where(dups)[0]
+
+ param_groups = self.get_gaussian_param_groups()
+ for group, param in param_groups.items():
+ self.dup_in_optim(optimizers.optimizers[group], dup_idcs, param, 1)
+
+ # Offset all the opacity reset logic by refine_every so that we don't
+ # save checkpoints right when the opacity is reset (saves every 2k)
+ if self.step % reset_interval > self.num_train_data + self.config.refine_every:
+ # then cull
+ deleted_mask = self.cull_gaussians()
+ param_groups = self.get_gaussian_param_groups()
+ for group, param in param_groups.items():
+ self.remove_from_optim(optimizers.optimizers[group], deleted_mask, param)
+
+ if self.step % reset_interval == self.config.refine_every:
+ reset_value = self.config.cull_alpha_thresh * 0.8
+ self.opacities.data = torch.full_like(
+ self.opacities.data, torch.logit(torch.tensor(reset_value)).item()
+ )
+ # reset the exp of optimizer
+ optim = optimizers.optimizers["opacity"]
+ param = optim.param_groups[0]["params"][0]
+ param_state = optim.state[param]
+ param_state["exp_avg"] = torch.zeros_like(param_state["exp_avg"])
+ param_state["exp_avg_sq"] = torch.zeros_like(param_state["exp_avg_sq"])
+ self.xys_grad_norm = None
+ self.vis_counts = None
+ self.max_2Dsize = None
+
+ def cull_gaussians(self):
+ """
+ This function deletes gaussians with under a certain opacity threshold
+ """
+ n_bef = self.num_points
+ # cull transparent ones
+ culls = (torch.sigmoid(self.opacities) < self.config.cull_alpha_thresh).squeeze()
+ if self.step > self.config.refine_every * self.config.reset_alpha_every:
+ # cull huge ones
+ toobigs = (torch.exp(self.scales).max(dim=-1).values > self.config.cull_scale_thresh).squeeze()
+ culls = culls | toobigs
+ if self.step < self.config.stop_screen_size_at:
+ # cull big screen space
+ assert self.max_2Dsize is not None
+ culls = culls | (self.max_2Dsize > self.config.cull_screen_size).squeeze()
+ self.means = Parameter(self.means[~culls].detach())
+ self.scales = Parameter(self.scales[~culls].detach())
+ self.quats = Parameter(self.quats[~culls].detach())
+ self.colors_all = Parameter(self.colors_all[~culls].detach())
+ self.opacities = Parameter(self.opacities[~culls].detach())
+
+ print(f"Culled {n_bef - self.num_points} gaussians")
+ return culls
+
+ def split_gaussians(self, split_mask, samps):
+ """
+ This function splits gaussians that are too large
+ """
+
+ n_splits = split_mask.sum().item()
+ print(f"Splitting {split_mask.sum().item()/self.num_points} gaussians: {n_splits}/{self.num_points}")
+ centered_samples = torch.randn((samps * n_splits, 3), device=self.device) # Nx3 of axis-aligned scales
+ scaled_samples = (
+ torch.exp(self.scales[split_mask].repeat(samps, 1)) * centered_samples
+ ) # how these scales are rotated
+ quats = self.quats[split_mask] / self.quats[split_mask].norm(dim=-1, keepdim=True) # normalize them first
+ rots = quat_to_rotmat(quats.repeat(samps, 1)) # how these scales are rotated
+ rotated_samples = torch.bmm(rots, scaled_samples[..., None]).squeeze()
+ new_means = rotated_samples + self.means[split_mask].repeat(samps, 1)
+ # step 2, sample new colors
+ new_colors_all = self.colors_all[split_mask].repeat(samps, 1, 1)
+ # step 3, sample new opacities
+ new_opacities = self.opacities[split_mask].repeat(samps, 1)
+ # step 4, sample new scales
+ size_fac = 1.6
+ new_scales = torch.log(torch.exp(self.scales[split_mask]) / size_fac).repeat(samps, 1)
+ self.scales[split_mask] = torch.log(torch.exp(self.scales[split_mask]) / size_fac)
+ # step 5, sample new quats
+ new_quats = self.quats[split_mask].repeat(samps, 1)
+ return new_means, new_colors_all, new_opacities, new_scales, new_quats
+
+ def dup_gaussians(self, dup_mask):
+ """
+ This function duplicates gaussians that are too small
+ """
+ n_dups = dup_mask.sum().item()
+ print(f"Duplicating {dup_mask.sum().item()/self.num_points} gaussians: {n_dups}/{self.num_points}")
+ dup_means = self.means[dup_mask]
+ dup_colors = self.colors_all[dup_mask]
+ dup_opacities = self.opacities[dup_mask]
+ dup_scales = self.scales[dup_mask]
+ dup_quats = self.quats[dup_mask]
+ return dup_means, dup_colors, dup_opacities, dup_scales, dup_quats
+
+ @property
+ def num_points(self):
+ return self.means.shape[0]
+
+ def get_training_callbacks(
+ self, training_callback_attributes: TrainingCallbackAttributes
+ ) -> List[TrainingCallback]:
+ cbs = []
+ cbs.append(TrainingCallback([TrainingCallbackLocation.BEFORE_TRAIN_ITERATION], self.step_cb))
+ # The order of these matters
+ cbs.append(
+ TrainingCallback(
+ [TrainingCallbackLocation.AFTER_TRAIN_ITERATION],
+ self.after_train,
+ )
+ )
+ cbs.append(
+ TrainingCallback(
+ [TrainingCallbackLocation.AFTER_TRAIN_ITERATION],
+ self.refinement_after,
+ update_every_num_iters=self.config.refine_every,
+ args=[training_callback_attributes.optimizers],
+ )
+ )
+ return cbs
+
+ def step_cb(self, step):
+ self.step = step
+
+ def get_gaussian_param_groups(self) -> Dict[str, List[Parameter]]:
+ return {
+ "xyz": [self.means],
+ "color": [self.colors_all],
+ "opacity": [self.opacities],
+ "scaling": [self.scales],
+ "rotation": [self.quats],
+ }
+
+ def get_param_groups(self) -> Dict[str, List[Parameter]]:
+ """Obtain the parameter groups for the optimizers
+
+ Returns:
+ Mapping of different parameter groups
+ """
+ gps = self.get_gaussian_param_groups()
+ # add camera optimizer param groups
+ self.camera_optimizer.get_param_groups(gps)
+ return gps
+
+ def _get_downscale_factor(self):
+ if self.training:
+ return 2 ** max((self.config.num_downscales - self.step // self.config.resolution_schedule), 0)
+ else:
+ return 1
+
+ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
+ """Takes in a Ray Bundle and returns a dictionary of outputs.
+
+ Args:
+ ray_bundle: Input bundle of rays. This raybundle should have all the
+ needed information to compute the outputs.
+
+ Returns:
+ Outputs of model. (ie. rendered colors)
+ """
+ if not isinstance(camera, Cameras):
+ print("Called get_outputs with not a camera")
+ return {}
+ assert camera.shape[0] == 1, "Only one camera at a time"
+ if self.training:
+ # currently relies on the branch vickie/camera-grads
+ self.camera_optimizer.apply_to_camera(camera)
+ if self.training:
+ background = torch.rand(3, device=self.device)
+ else:
+ # logic for setting the background of the scene
+ if renderers.BACKGROUND_COLOR_OVERRIDE is not None:
+ background = renderers.BACKGROUND_COLOR_OVERRIDE
+ else:
+ background = self.back_color.to(self.device)
+ if self.crop_box is not None and not self.training:
+ crop_ids = self.crop_box.within(self.means).squeeze()
+ if crop_ids.sum() == 0:
+ return {"rgb": background.repeat(int(camera.height.item()), int(camera.width.item()), 1)}
+ else:
+ crop_ids = None
+ camera_downscale = self._get_downscale_factor()
+ camera.rescale_output_resolution(1 / camera_downscale)
+ # shift the camera to center of scene looking at center
+ R = camera.camera_to_worlds[0, :3, :3] # 3 x 3
+ T = camera.camera_to_worlds[0, :3, 3:4] # 3 x 1
+ # flip the z and y axes to align with gsplat conventions
+ R_edit = torch.diag(torch.tensor([1, -1, -1], device="cuda", dtype=R.dtype))
+ R = R @ R_edit
+ # analytic matrix inverse to get world2camera matrix
+ R_inv = R.T
+ T_inv = -R_inv @ T
+ viewmat = torch.eye(4, device=R.device, dtype=R.dtype)
+ viewmat[:3, :3] = R_inv
+ viewmat[:3, 3:4] = T_inv
+ # calculate the FOV of the camera given fx and fy, width and height
+ cx = camera.cx.item()
+ cy = camera.cy.item()
+ fovx = 2 * math.atan(camera.width / (2 * camera.fx))
+ fovy = 2 * math.atan(camera.height / (2 * camera.fy))
+ W, H = camera.width.item(), camera.height.item()
+ self.last_size = (H, W)
+ projmat = projection_matrix(0.001, 1000, fovx, fovy, device=self.device)
+ BLOCK_X, BLOCK_Y = 16, 16
+ tile_bounds = (
+ (W + BLOCK_X - 1) // BLOCK_X,
+ (H + BLOCK_Y - 1) // BLOCK_Y,
+ 1,
+ )
+
+ if crop_ids is not None:
+ opacities_crop = self.opacities[crop_ids]
+ means_crop = self.means[crop_ids]
+ colors_crop = self.colors_all[crop_ids]
+ scales_crop = self.scales[crop_ids]
+ quats_crop = self.quats[crop_ids]
+ else:
+ opacities_crop = self.opacities
+ means_crop = self.means
+ colors_crop = self.colors_all
+ scales_crop = self.scales
+ quats_crop = self.quats
+ self.xys, depths, self.radii, conics, num_tiles_hit, _ = ProjectGaussians.apply( # type: ignore
+ means_crop,
+ torch.exp(scales_crop),
+ 1,
+ quats_crop / quats_crop.norm(dim=-1, keepdim=True),
+ viewmat.squeeze()[:3, :],
+ projmat.squeeze() @ viewmat.squeeze(),
+ camera.fx.item(),
+ camera.fy.item(),
+ cx,
+ cy,
+ H,
+ W,
+ tile_bounds,
+ )
+ if (self.radii).sum() == 0:
+ return {"rgb": background.repeat(int(camera.height.item()), int(camera.width.item()), 1)}
+
+ # Important to allow xys grads to populate properly
+ if self.training:
+ self.xys.retain_grad()
+ if self.config.sh_degree > 0:
+ viewdirs = means_crop.detach() - camera.camera_to_worlds.detach()[..., :3, 3] # (N, 3)
+ viewdirs = viewdirs / viewdirs.norm(dim=-1, keepdim=True)
+ n = min(self.step // self.config.sh_degree_interval, self.config.sh_degree)
+ rgbs = SphericalHarmonics.apply(n, viewdirs, colors_crop)
+ rgbs = torch.clamp(rgbs + 0.5, 0.0, 1.0) # type: ignore
+ else:
+ rgbs = self.get_colors.squeeze() # (N, 3)
+ rgbs = torch.sigmoid(rgbs)
+ rgb = RasterizeGaussians.apply(
+ self.xys,
+ depths,
+ self.radii,
+ conics,
+ num_tiles_hit,
+ rgbs,
+ torch.sigmoid(opacities_crop),
+ H,
+ W,
+ background,
+ )
+ depth_im = None
+ if not self.training:
+ depth_im = RasterizeGaussians.apply( # type: ignore
+ self.xys,
+ depths,
+ self.radii,
+ conics,
+ num_tiles_hit,
+ depths[:, None].repeat(1, 3),
+ torch.sigmoid(opacities_crop),
+ H,
+ W,
+ torch.ones(3, device=self.device) * 10,
+ )[..., 0:1]
+ # rescale the camera back to original dimensions
+ camera.rescale_output_resolution(camera_downscale)
+ return {"rgb": rgb, "depth": depth_im} # type: ignore
+
+ def get_metrics_dict(self, outputs, batch) -> Dict[str, torch.Tensor]:
+ """Compute and returns metrics.
+
+ Args:
+ outputs: the output to compute loss dict to
+ batch: ground truth batch corresponding to outputs
+ """
+ d = self._get_downscale_factor()
+ if d > 1:
+ newsize = [batch["image"].shape[0] // d, batch["image"].shape[1] // d]
+ gt_img = TF.resize(batch["image"].permute(2, 0, 1), newsize, antialias=None).permute(1, 2, 0)
+ else:
+ gt_img = batch["image"]
+ metrics_dict = {}
+ gt_rgb = gt_img.to(self.device) # RGB or RGBA image
+ predicted_rgb = outputs["rgb"]
+ metrics_dict["psnr"] = self.psnr(predicted_rgb, gt_rgb)
+
+ self.camera_optimizer.get_metrics_dict(metrics_dict)
+ metrics_dict["gaussian_count"] = self.num_points
+ return metrics_dict
+
+ def get_loss_dict(self, outputs, batch, metrics_dict=None) -> Dict[str, torch.Tensor]:
+ """Computes and returns the losses dict.
+
+ Args:
+ outputs: the output to compute loss dict to
+ batch: ground truth batch corresponding to outputs
+ metrics_dict: dictionary of metrics, some of which we can use for loss
+ """
+ d = self._get_downscale_factor()
+ if d > 1:
+ newsize = [batch["image"].shape[0] // d, batch["image"].shape[1] // d]
+ gt_img = TF.resize(batch["image"].permute(2, 0, 1), newsize, antialias=None).permute(1, 2, 0)
+ else:
+ gt_img = batch["image"]
+ Ll1 = torch.abs(gt_img - outputs["rgb"]).mean()
+ simloss = 1 - self.ssim(gt_img.permute(2, 0, 1)[None, ...], outputs["rgb"].permute(2, 0, 1)[None, ...])
+ if self.step % 10 == 0:
+ # Before, we made split sh and colors onto different optimizer, with shs having a low learning rate
+ # This is slow, instead we apply a regularization every few steps
+ sh_reg = self.colors_all[:, 1:, :].norm(dim=1).mean()
+ scale_exp = torch.exp(self.scales)
+ scale_reg = (
+ torch.maximum(
+ scale_exp.amax(dim=-1) / scale_exp.amin(dim=-1), torch.tensor(self.config.max_gauss_ratio)
+ )
+ - self.config.max_gauss_ratio
+ )
+ scale_reg = 0.1 * scale_reg.mean()
+ else:
+ sh_reg = torch.tensor(0.0).to(self.device)
+ scale_reg = torch.tensor(0.0).to(self.device)
+ return {
+ "main_loss": (1 - self.config.ssim_lambda) * Ll1 + self.config.ssim_lambda * simloss,
+ "sh_reg": sh_reg,
+ "scale_reg": scale_reg,
+ }
+
+ @torch.no_grad()
+ def get_outputs_for_camera(self, camera: Cameras, obb_box: Optional[OrientedBox] = None) -> Dict[str, torch.Tensor]:
+ """Takes in a camera, generates the raybundle, and computes the output of the model.
+ Overridden for a camera-based gaussian model.
+
+ Args:
+ camera: generates raybundle
+ """
+ assert camera is not None, "must provide camera to gaussian model"
+ self.set_crop(obb_box)
+ outs = self.get_outputs(camera.to(self.device))
+ return outs # type: ignore
+
+ def get_image_metrics_and_images(
+ self, outputs: Dict[str, torch.Tensor], batch: Dict[str, torch.Tensor]
+ ) -> Tuple[Dict[str, float], Dict[str, torch.Tensor]]:
+ """Writes the test image outputs.
+
+ Args:
+ image_idx: Index of the image.
+ step: Current step.
+ batch: Batch of data.
+ outputs: Outputs of the model.
+
+ Returns:
+ A dictionary of metrics.
+ """
+ d = self._get_downscale_factor()
+ if d > 1:
+ newsize = [batch["image"].shape[0] // d, batch["image"].shape[1] // d]
+ gt_img = TF.resize(batch["image"].permute(2, 0, 1), newsize, antialias=None).permute(1, 2, 0)
+ predicted_rgb = TF.resize(outputs["rgb"].permute(2, 0, 1), newsize, antialias=None).permute(1, 2, 0)
+ else:
+ gt_img = batch["image"]
+ predicted_rgb = outputs["rgb"]
+
+ gt_rgb = gt_img.to(self.device)
+
+ combined_rgb = torch.cat([gt_rgb, predicted_rgb], dim=1)
+
+ # Switch images from [H, W, C] to [1, C, H, W] for metrics computations
+ gt_rgb = torch.moveaxis(gt_rgb, -1, 0)[None, ...]
+ predicted_rgb = torch.moveaxis(predicted_rgb, -1, 0)[None, ...]
+
+ psnr = self.psnr(gt_rgb, predicted_rgb)
+ ssim = self.ssim(gt_rgb, predicted_rgb)
+ lpips = self.lpips(gt_rgb, predicted_rgb)
+
+ # all of these metrics will be logged as scalars
+ metrics_dict = {"psnr": float(psnr.item()), "ssim": float(ssim)} # type: ignore
+ metrics_dict["lpips"] = float(lpips)
+
+ images_dict = {"img": combined_rgb}
+
+ return metrics_dict, images_dict
diff --git a/nerfstudio/pipelines/base_pipeline.py b/nerfstudio/pipelines/base_pipeline.py
index 345a39d4b6..b75adc6adb 100644
--- a/nerfstudio/pipelines/base_pipeline.py
+++ b/nerfstudio/pipelines/base_pipeline.py
@@ -26,18 +26,11 @@
import torch
import torch.distributed as dist
-from PIL import Image
-from rich.progress import (
- BarColumn,
- MofNCompleteColumn,
- Progress,
- TextColumn,
- TimeElapsedColumn,
-)
+from rich.progress import BarColumn, MofNCompleteColumn, Progress, TextColumn, TimeElapsedColumn
from torch import nn
+from torch.cuda.amp.grad_scaler import GradScaler
from torch.nn import Parameter
from torch.nn.parallel import DistributedDataParallel as DDP
-from torch.cuda.amp.grad_scaler import GradScaler
from nerfstudio.configs import base_config as cfg
from nerfstudio.data.datamanagers.base_datamanager import (
@@ -46,6 +39,7 @@
VanillaDataManager,
)
from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManager
+from nerfstudio.data.datamanagers.full_images_datamanager import FullImageDatamanager
from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes
from nerfstudio.models.base_model import Model, ModelConfig
from nerfstudio.utils import profiler
@@ -264,6 +258,15 @@ def __init__(
self.datamanager: DataManager = config.datamanager.setup(
device=device, test_mode=test_mode, world_size=world_size, local_rank=local_rank
)
+ # TODO make cleaner
+ seed_pts = None
+ if (
+ hasattr(self.datamanager, "train_dataparser_outputs")
+ and "points3D_xyz" in self.datamanager.train_dataparser_outputs.metadata
+ ):
+ pts = self.datamanager.train_dataparser_outputs.metadata["points3D_xyz"]
+ pts_rgb = self.datamanager.train_dataparser_outputs.metadata["points3D_rgb"]
+ seed_pts = (pts, pts_rgb)
self.datamanager.to(device)
# TODO(ethan): get rid of scene_bounds from the model
assert self.datamanager.train_dataset is not None, "Missing input dataset"
@@ -274,6 +277,7 @@ def __init__(
metadata=self.datamanager.train_dataset.metadata,
device=device,
grad_scaler=grad_scaler,
+ seed_points=seed_pts,
)
self.model.to(device)
@@ -335,13 +339,11 @@ def get_eval_image_metrics_and_images(self, step: int):
step: current iteration step
"""
self.eval()
- image_idx, camera_ray_bundle, batch = self.datamanager.next_eval_image(step)
- outputs = self.model.get_outputs_for_camera_ray_bundle(camera_ray_bundle)
+ camera, batch = self.datamanager.next_eval_image(step)
+ outputs = self.model.get_outputs_for_camera(camera)
metrics_dict, images_dict = self.model.get_image_metrics_and_images(outputs, batch)
- assert "image_idx" not in metrics_dict
- metrics_dict["image_idx"] = image_idx
assert "num_rays" not in metrics_dict
- metrics_dict["num_rays"] = len(camera_ray_bundle)
+ metrics_dict["num_rays"] = (camera.height * camera.width * camera.size).item()
self.train()
return metrics_dict, images_dict
@@ -361,7 +363,7 @@ def get_average_eval_image_metrics(
"""
self.eval()
metrics_dict_list = []
- assert isinstance(self.datamanager, (VanillaDataManager, ParallelDataManager))
+ assert isinstance(self.datamanager, (VanillaDataManager, ParallelDataManager, FullImageDatamanager))
num_images = len(self.datamanager.fixed_indices_eval_dataloader)
with Progress(
TextColumn("[progress.description]{task.description}"),
@@ -371,26 +373,21 @@ def get_average_eval_image_metrics(
transient=True,
) as progress:
task = progress.add_task("[green]Evaluating all eval images...", total=num_images)
- for camera_ray_bundle, batch in self.datamanager.fixed_indices_eval_dataloader:
+ for camera, batch in self.datamanager.fixed_indices_eval_dataloader:
# time this the following line
inner_start = time()
- height, width = camera_ray_bundle.shape
+ outputs = self.model.get_outputs_for_camera(camera=camera)
+ height, width = camera.height, camera.width
num_rays = height * width
- outputs = self.model.get_outputs_for_camera_ray_bundle(camera_ray_bundle)
- metrics_dict, images_dict = self.model.get_image_metrics_and_images(outputs, batch)
-
+ metrics_dict, _ = self.model.get_image_metrics_and_images(outputs, batch)
if output_path is not None:
- camera_indices = camera_ray_bundle.camera_indices
- assert camera_indices is not None
- for key, val in images_dict.items():
- Image.fromarray((val * 255).byte().cpu().numpy()).save(
- output_path / "{0:06d}-{1}.jpg".format(int(camera_indices[0, 0, 0]), key)
- )
+ raise NotImplementedError("Saving images is not implemented yet")
+
assert "num_rays_per_sec" not in metrics_dict
- metrics_dict["num_rays_per_sec"] = num_rays / (time() - inner_start)
+ metrics_dict["num_rays_per_sec"] = (num_rays / (time() - inner_start)).item()
fps_str = "fps"
assert fps_str not in metrics_dict
- metrics_dict[fps_str] = metrics_dict["num_rays_per_sec"] / (height * width)
+ metrics_dict[fps_str] = (metrics_dict["num_rays_per_sec"] / (height * width)).item()
metrics_dict_list.append(metrics_dict)
progress.advance(task)
# average the metrics list
diff --git a/nerfstudio/scripts/exporter.py b/nerfstudio/scripts/exporter.py
index b104597abd..9cebd896c2 100644
--- a/nerfstudio/scripts/exporter.py
+++ b/nerfstudio/scripts/exporter.py
@@ -37,15 +37,10 @@
from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManager
from nerfstudio.data.scene_box import OrientedBox
from nerfstudio.exporter import texture_utils, tsdf_utils
-from nerfstudio.exporter.exporter_utils import (
- collect_camera_poses,
- generate_point_cloud,
- get_mesh_from_filename,
-)
-from nerfstudio.exporter.marching_cubes import (
- generate_mesh_with_multires_marching_cubes,
-)
+from nerfstudio.exporter.exporter_utils import collect_camera_poses, generate_point_cloud, get_mesh_from_filename
+from nerfstudio.exporter.marching_cubes import generate_mesh_with_multires_marching_cubes
from nerfstudio.fields.sdf_field import SDFField
+from nerfstudio.models.gaussian_splatting import GaussianSplattingModel
from nerfstudio.pipelines.base_pipeline import Pipeline, VanillaPipeline
from nerfstudio.utils.eval_utils import eval_setup
from nerfstudio.utils.rich_utils import CONSOLE
@@ -126,6 +121,9 @@ class ExportPointCloud(Exporter):
"""Number of rays to evaluate per batch. Decrease if you run out of memory."""
std_ratio: float = 10.0
"""Threshold based on STD of the average distances across the point cloud to remove outliers."""
+ save_world_frame: bool = True
+ """If true, saves in the frame of the transform.json file, if false saves in the frame of the scaled
+ dataparser transform"""
def main(self) -> None:
"""Export point cloud."""
@@ -162,6 +160,17 @@ def main(self) -> None:
crop_obb=crop_obb,
std_ratio=self.std_ratio,
)
+ if self.save_world_frame:
+ # apply the inverse dataparser transform to the point cloud
+ points = np.asarray(pcd.points)
+ poses = np.eye(4, dtype=np.float32)[None, ...].repeat(points.shape[0], axis=0)[:, :3, :]
+ poses[:, :3, 3] = points
+ poses = pipeline.datamanager.train_dataparser_outputs.transform_poses_to_original_space(
+ torch.from_numpy(poses)
+ )
+ points = poses[:, :3, 3].numpy()
+ pcd.points = o3d.utility.Vector3dVector(points)
+
torch.cuda.empty_cache()
CONSOLE.print(f"[bold green]:white_check_mark: Generated {pcd}")
@@ -469,6 +478,58 @@ def main(self) -> None:
CONSOLE.print(f"[bold green]:white_check_mark: Saved poses to {output_file_path}")
+@dataclass
+class ExportGaussianSplat(Exporter):
+ """
+ Export 3D Gaussian Splatting model to a .ply
+ """
+
+ def main(self) -> None:
+ if not self.output_dir.exists():
+ self.output_dir.mkdir(parents=True)
+
+ _, pipeline, _, _ = eval_setup(self.load_config)
+
+ assert isinstance(pipeline.model, GaussianSplattingModel)
+
+ model: GaussianSplattingModel = pipeline.model
+
+ filename = self.output_dir / "point_cloud.ply"
+
+ map_to_tensors = {}
+
+ with torch.no_grad():
+ positions = model.means.cpu().numpy()
+ map_to_tensors["positions"] = o3d.core.Tensor(positions, o3d.core.float32)
+ map_to_tensors["normals"] = o3d.core.Tensor(np.zeros_like(positions), o3d.core.float32)
+
+ colors = model.colors.data.cpu().numpy()
+ map_to_tensors["colors"] = (colors * 255).astype(np.uint8)
+ for i in range(colors.shape[1]):
+ map_to_tensors[f"f_dc_{i}"] = colors[:, i : i + 1]
+
+ shs = model.shs_rest.data.cpu().numpy()
+ if model.config.sh_degree > 0:
+ shs = shs.reshape((colors.shape[0], -1, 1))
+ for i in range(shs.shape[-1]):
+ map_to_tensors[f"f_rest_{i}"] = shs[:, i]
+
+ map_to_tensors["opacity"] = model.opacities.data.cpu().numpy()
+
+ scales = model.scales.data.cpu().unsqueeze(-1).numpy()
+ for i in range(3):
+ map_to_tensors[f"scale_{i}"] = scales[:, i]
+
+ quats = model.quats.data.cpu().unsqueeze(-1).numpy()
+
+ for i in range(4):
+ map_to_tensors[f"rot_{i}"] = quats[:, i]
+
+ pcd = o3d.t.geometry.PointCloud(map_to_tensors)
+
+ o3d.t.io.write_point_cloud(str(filename), pcd)
+
+
Commands = tyro.conf.FlagConversionOff[
Union[
Annotated[ExportPointCloud, tyro.conf.subcommand(name="pointcloud")],
@@ -476,6 +537,7 @@ def main(self) -> None:
Annotated[ExportPoissonMesh, tyro.conf.subcommand(name="poisson")],
Annotated[ExportMarchingCubesMesh, tyro.conf.subcommand(name="marching-cubes")],
Annotated[ExportCameraPoses, tyro.conf.subcommand(name="cameras")],
+ Annotated[ExportGaussianSplat, tyro.conf.subcommand(name="gaussian-splat")],
]
]
diff --git a/nerfstudio/scripts/render.py b/nerfstudio/scripts/render.py
index 79cc6c7bd9..114e4195ec 100644
--- a/nerfstudio/scripts/render.py
+++ b/nerfstudio/scripts/render.py
@@ -156,7 +156,6 @@ def _render_trajectory_video(
obb_box = None
if crop_data is not None:
obb_box = crop_data.obb
- camera_ray_bundle = cameras.generate_rays(camera_indices=camera_idx, obb_box=obb_box)
max_dist, max_idx = -1, -1
true_max_dist, true_max_idx = -1, -1
@@ -206,10 +205,14 @@ def _render_trajectory_video(
with renderers.background_color_override_context(
crop_data.background_color.to(pipeline.device)
), torch.no_grad():
- outputs = pipeline.model.get_outputs_for_camera_ray_bundle(camera_ray_bundle)
+ outputs = pipeline.model.get_outputs_for_camera(
+ cameras[camera_idx : camera_idx + 1], obb_box=obb_box
+ )
else:
with torch.no_grad():
- outputs = pipeline.model.get_outputs_for_camera_ray_bundle(camera_ray_bundle)
+ outputs = pipeline.model.get_outputs_for_camera(
+ cameras[camera_idx : camera_idx + 1], obb_box=obb_box
+ )
render_image = []
for rendered_output_name in rendered_output_names:
@@ -679,7 +682,7 @@ def main(self) -> None:
),
)
steps = int(self.frame_rate * self.seconds)
- camera_start = pipeline.datamanager.eval_dataloader.get_camera(image_idx=0).flatten()
+ camera_start, _ = pipeline.datamanager.eval_dataloader.get_camera(image_idx=0)
camera_path = get_spiral_path(camera_start, steps=steps, radius=self.radius)
_render_trajectory_video(
@@ -788,10 +791,9 @@ def update_config(config: TrainerConfig) -> TrainerConfig:
TimeRemainingColumn(elapsed_when_finished=False, compact=False),
TimeElapsedColumn(),
) as progress:
- for camera_idx, (ray_bundle, batch) in enumerate(progress.track(dataloader, total=len(dataset))):
- ray_bundle: RayBundle
+ for camera_idx, (camera, batch) in enumerate(progress.track(dataloader, total=len(dataset))):
with torch.no_grad():
- outputs = pipeline.model.get_outputs_for_camera_ray_bundle(ray_bundle)
+ outputs = pipeline.model.get_outputs_for_camera(camera)
gt_batch = batch.copy()
gt_batch["rgb"] = gt_batch.pop("image")
diff --git a/nerfstudio/scripts/viewer/run_viewer.py b/nerfstudio/scripts/viewer/run_viewer.py
index de210a30d8..b1b87c42c0 100644
--- a/nerfstudio/scripts/viewer/run_viewer.py
+++ b/nerfstudio/scripts/viewer/run_viewer.py
@@ -104,7 +104,7 @@ def _start_viewer(config: TrainerConfig, pipeline: Pipeline, step: int):
viewer_state = ViewerBetaState(
config.viewer,
log_filename=viewer_log_path,
- datapath=base_dir,
+ datapath=pipeline.datamanager.get_datapath(),
pipeline=pipeline,
share=config.viewer.make_share_url,
)
diff --git a/nerfstudio/viewer/server/render_state_machine.py b/nerfstudio/viewer/server/render_state_machine.py
index 390390d480..7533005214 100644
--- a/nerfstudio/viewer/server/render_state_machine.py
+++ b/nerfstudio/viewer/server/render_state_machine.py
@@ -24,6 +24,7 @@
from nerfstudio.cameras.cameras import Cameras
from nerfstudio.model_components.renderers import background_color_override_context
+from nerfstudio.models.gaussian_splatting import GaussianSplattingModel
from nerfstudio.utils import colormaps, writer
from nerfstudio.utils.writer import GLOBAL_BUFFER, EventName, TimeWriter
from nerfstudio.viewer.server import viewer_utils
@@ -127,7 +128,14 @@ def _render_img(self, cam_msg: CameraMessage):
assert camera is not None, "render called before viewer connected"
with self.viewer.train_lock if self.viewer.train_lock is not None else contextlib.nullcontext():
- camera_ray_bundle = camera.generate_rays(camera_indices=0, aabb_box=self.viewer.get_model().render_aabb)
+ # TODO jake-austin: Make this check whether the model inherits from a camera based model or a ray based model
+ # TODO Zhuoyang: First made some dummy judgements, need to be fixed later
+ isGaussianSplattingModel = isinstance(self.viewer.get_model(), GaussianSplattingModel)
+ if isGaussianSplattingModel:
+ # TODO fix me before ship
+ camera_ray_bundle = camera.generate_rays(camera_indices=0, aabb_box=self.viewer.get_model().render_aabb)
+ else:
+ camera_ray_bundle = camera.generate_rays(camera_indices=0, aabb_box=self.viewer.get_model().render_aabb)
with TimeWriter(None, None, write=False) as vis_t:
self.viewer.get_model().eval()
@@ -142,12 +150,21 @@ def _render_img(self, cam_msg: CameraMessage):
device=self.viewer.get_model().device,
)
with background_color_override_context(background_color), torch.no_grad():
- outputs = self.viewer.get_model().get_outputs_for_camera_ray_bundle(camera_ray_bundle)
+ if isGaussianSplattingModel:
+ outputs = self.viewer.get_model().get_outputs_for_camera(camera)
+ else:
+ outputs = self.viewer.get_model().get_outputs_for_camera_ray_bundle(camera_ray_bundle)
else:
with torch.no_grad():
- outputs = self.viewer.get_model().get_outputs_for_camera_ray_bundle(camera_ray_bundle)
+ if isGaussianSplattingModel:
+ outputs = self.viewer.get_model().get_outputs_for_camera(camera)
+ else:
+ outputs = self.viewer.get_model().get_outputs_for_camera_ray_bundle(camera_ray_bundle)
self.viewer.get_model().train()
- num_rays = len(camera_ray_bundle)
+ if True:
+ num_rays = (camera.height * camera.width).item()
+ else:
+ num_rays = len(camera_ray_bundle)
render_time = vis_t.duration
if writer.is_initialized():
writer.put_time(
diff --git a/nerfstudio/viewer_beta/export_panel.py b/nerfstudio/viewer_beta/export_panel.py
index 425e10a19d..a5564327ae 100644
--- a/nerfstudio/viewer_beta/export_panel.py
+++ b/nerfstudio/viewer_beta/export_panel.py
@@ -81,6 +81,11 @@ def populate_point_cloud_tab(
) -> None:
server.add_gui_markdown("Render depth, project to an oriented point cloud, and filter. ")
num_points = server.add_gui_number("# Points", initial_value=1_000_000, min=1, max=None, step=1)
+ world_frame = server.add_gui_checkbox(
+ "Save in world frame",
+ True,
+ hint="Save the point cloud in the transforms.json frame, rather than scaled scene frame",
+ )
remove_outliers = server.add_gui_checkbox("Remove outliers", True)
normals = server.add_gui_dropdown(
"Normals",
@@ -104,6 +109,7 @@ def _(event: viser.GuiEvent) -> None:
f"--remove-outliers {remove_outliers.value}",
f"--normal-method {normals.value}",
f"--use_bounding_box {control_panel.crop_viewport}",
+ f"--save-world-frame {world_frame.value}",
get_crop_string(control_panel.crop_obb),
]
)
diff --git a/nerfstudio/viewer_beta/render_state_machine.py b/nerfstudio/viewer_beta/render_state_machine.py
index 53c73c34e8..849cf3d4e7 100644
--- a/nerfstudio/viewer_beta/render_state_machine.py
+++ b/nerfstudio/viewer_beta/render_state_machine.py
@@ -27,6 +27,8 @@
from nerfstudio.utils.writer import GLOBAL_BUFFER, EventName, TimeWriter
from nerfstudio.viewer.server import viewer_utils
from nerfstudio.viewer_beta.utils import CameraState, get_camera
+from nerfstudio.models.gaussian_splatting import GaussianSplattingModel
+from nerfstudio.cameras.cameras import Cameras
if TYPE_CHECKING:
from nerfstudio.viewer_beta.viewer import Viewer
@@ -125,11 +127,18 @@ def _render_img(self, camera_state: CameraState):
camera = get_camera(camera_state, image_height, image_width)
camera = camera.to(self.viewer.get_model().device)
+ assert isinstance(camera, Cameras)
assert camera is not None, "render called before viewer connected"
with TimeWriter(None, None, write=False) as vis_t:
with self.viewer.train_lock if self.viewer.train_lock is not None else contextlib.nullcontext():
- camera_ray_bundle = camera.generate_rays(camera_indices=0, obb_box=obb)
+ if isinstance(self.viewer.get_model(), GaussianSplattingModel):
+ color = self.viewer.control_panel.background_color
+ background_color = torch.tensor(
+ [color[0] / 255.0, color[1] / 255.0, color[2] / 255.0],
+ device=self.viewer.get_model().device,
+ )
+ self.viewer.get_model().set_background(background_color)
self.viewer.get_model().eval()
step = self.viewer.step
try:
@@ -145,21 +154,27 @@ def _render_img(self, camera_state: CameraState):
with background_color_override_context(
background_color
), torch.no_grad(), viewer_utils.SetTrace(self.check_interrupt):
- outputs = self.viewer.get_model().get_outputs_for_camera_ray_bundle(camera_ray_bundle)
+ outputs = self.viewer.get_model().get_outputs_for_camera(camera, obb_box=obb)
else:
with torch.no_grad(), viewer_utils.SetTrace(self.check_interrupt):
- outputs = self.viewer.get_model().get_outputs_for_camera_ray_bundle(camera_ray_bundle)
+ outputs = self.viewer.get_model().get_outputs_for_camera(camera, obb_box=obb)
except viewer_utils.IOChangeException:
self.viewer.get_model().train()
raise
self.viewer.get_model().train()
- num_rays = len(camera_ray_bundle)
+ num_rays = (camera.height * camera.width).item()
if self.viewer.control_panel.layer_depth:
- # convert to z_depth if depth compositing is enabled
- R = camera.camera_to_worlds[0:3, 0:3].T
- pts = camera_ray_bundle.directions * outputs["depth"]
- pts = (R @ (pts.view(-1, 3).T)).T.view(*camera_ray_bundle.directions.shape)
- outputs["gl_z_buf_depth"] = -pts[..., 2:3] # negative z axis is the coordinate convention
+ if isinstance(self.viewer.get_model(), GaussianSplattingModel):
+ # TODO: sending depth at high resolution lags the network a lot, figure out how to do this more efficiently
+ # outputs["gl_z_buf_depth"] = outputs["depth"]
+ pass
+ else:
+ # convert to z_depth if depth compositing is enabled
+ R = camera.camera_to_worlds[0, 0:3, 0:3].T
+ camera_ray_bundle = camera.generate_rays(camera_indices=0, obb_box=obb)
+ pts = camera_ray_bundle.directions * outputs["depth"]
+ pts = (R @ (pts.view(-1, 3).T)).T.view(*camera_ray_bundle.directions.shape)
+ outputs["gl_z_buf_depth"] = -pts[..., 2:3] # negative z axis is the coordinate convention
render_time = vis_t.duration
if writer.is_initialized() and render_time != 0:
writer.put_time(
@@ -187,7 +202,7 @@ def run(self):
except viewer_utils.IOChangeException:
# if we got interrupted, don't send the output to the viewer
continue
- self._send_output_to_viewer(outputs)
+ self._send_output_to_viewer(outputs, static_render=(action.action in ["static", "step"]))
def check_interrupt(self, frame, event, arg):
"""Raises interrupt when flag has been set and not already on lowest resolution.
@@ -199,7 +214,7 @@ def check_interrupt(self, frame, event, arg):
raise viewer_utils.IOChangeException
return self.check_interrupt
- def _send_output_to_viewer(self, outputs: Dict[str, Any]):
+ def _send_output_to_viewer(self, outputs: Dict[str, Any], static_render: bool = True):
"""Chooses the correct output and sends it to the viewer
Args:
@@ -239,11 +254,11 @@ def _send_output_to_viewer(self, outputs: Dict[str, Any]):
depth = (
outputs["gl_z_buf_depth"].cpu().numpy() * self.viser_scale_ratio if "gl_z_buf_depth" in outputs else None
)
-
+ jpg_quality = self.viewer.config.jpeg_quality if static_render else 40
self.client.set_background_image(
selected_output.cpu().numpy(),
format=self.viewer.config.image_format,
- jpeg_quality=self.viewer.config.jpeg_quality,
+ jpeg_quality=jpg_quality,
depth=depth,
)
res = f"{selected_output.shape[0]}x{selected_output.shape[1]}px"
diff --git a/nerfstudio/viewer_beta/utils.py b/nerfstudio/viewer_beta/utils.py
index a3dd7b1c2c..3614fce419 100644
--- a/nerfstudio/viewer_beta/utils.py
+++ b/nerfstudio/viewer_beta/utils.py
@@ -65,7 +65,7 @@ def get_camera(
cx=pp_w,
cy=pp_h,
camera_type=camera_type,
- camera_to_worlds=camera_state.c2w.to(torch.float32),
+ camera_to_worlds=camera_state.c2w.to(torch.float32)[None, ...],
times=torch.tensor([0.0], dtype=torch.float32),
)
return camera
diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py
index 92e609f076..2b6f4e140d 100644
--- a/nerfstudio/viewer_beta/viewer.py
+++ b/nerfstudio/viewer_beta/viewer.py
@@ -291,13 +291,16 @@ def set_camera_visibility(self, visible: bool) -> None:
self.camera_handles[idx].visible = visible
def update_camera_poses(self):
+ # TODO this fn accounts for like ~5% of total train time
# Update the train camera locations based on optimization
assert self.camera_handles is not None
- idxs = list(self.camera_handles.keys())
if hasattr(self.pipeline.datamanager, "train_camera_optimizer"):
camera_optimizer = self.pipeline.datamanager.train_camera_optimizer
- else:
+ elif hasattr(self.pipeline.model, "camera_optimizer"):
camera_optimizer = self.pipeline.model.camera_optimizer
+ else:
+ return
+ idxs = list(self.camera_handles.keys())
with torch.no_grad():
assert isinstance(camera_optimizer, CameraOptimizer)
c2ws_delta = camera_optimizer(torch.tensor(idxs, device=camera_optimizer.device)).cpu().numpy()
@@ -378,7 +381,7 @@ def init_scene(
camera = train_dataset.cameras[idx]
image_uint8 = (image * 255).detach().type(torch.uint8)
image_uint8 = image_uint8.permute(2, 0, 1)
- image_uint8 = torchvision.transforms.functional.resize(image_uint8, 100) # type: ignore
+ image_uint8 = torchvision.transforms.functional.resize(image_uint8, 100, antialias=None) # type: ignore
image_uint8 = image_uint8.permute(1, 2, 0)
image_uint8 = image_uint8.cpu().numpy()
c2w = camera.camera_to_worlds.cpu().numpy()
@@ -420,7 +423,6 @@ def update_scene(self, step: int, num_rays_per_batch: Optional[int] = None) -> N
# this stops training while moving to make the response smoother
while time.time() - self.last_move_time < 0.1:
time.sleep(0.05)
- # self.render_statemachine.action(RenderAction("static", self.camera_state))
if self.trainer is not None and self.trainer.training_state == "training" and self.train_util != 1:
if (
EventName.TRAIN_RAYS_PER_SEC.value in GLOBAL_BUFFER["events"]
diff --git a/pyproject.toml b/pyproject.toml
index 019ef6b6a7..fd058f9e87 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,12 +56,14 @@ dependencies = [
"torchvision>=0.14.1",
"torchmetrics[image]>=1.0.1",
"typing_extensions>=4.4.0",
- "viser==0.1.13",
+ "viser==0.1.14",
"nuscenes-devkit>=1.1.1",
"wandb>=0.13.3",
"xatlas",
"trimesh>=3.20.2",
- "timm==0.6.7"
+ "timm==0.6.7",
+ "gsplat==0.1.0",
+ "pytorch-msssim"
]
[project.urls]
diff --git a/tests/test_train.py b/tests/test_train.py
index a88283a592..45d8e348f0 100644
--- a/tests/test_train.py
+++ b/tests/test_train.py
@@ -26,6 +26,7 @@
"neus",
"generfacto",
"neus-facto",
+ "gaussian-splatting",
]
From 626441e15e8e59970ba95229e40727458bbf65a3 Mon Sep 17 00:00:00 2001
From: Otto Seiskari
Date: Fri, 15 Dec 2023 18:37:27 +0200
Subject: [PATCH 096/101] Add instructions for data collection and processing
with Spectacular AI tools (#2668)
* Add instructions for using Spectacular AI tools
Adds easy support for new device types: OAK-D, RealSense and
Azure Kinect, as well as an alternative pipeline for iOS.
* Grammar fixes in docs/quickstart/custom_dataset.md (Spectacular AI section)
Co-authored-by: Matias Turkulainen <30566358+maturk@users.noreply.github.com>
* Fix section label in the Spectacular AI section of custom_dataset.md
* Update custom_dataset.md: typo fix.
"montions" -> "motions"
---------
Co-authored-by: Matias Turkulainen <30566358+maturk@users.noreply.github.com>
---
README.md | 2 ++
docs/quickstart/custom_dataset.md | 39 +++++++++++++++++++++++++++++++
2 files changed, 41 insertions(+)
diff --git a/README.md b/README.md
index b6c5bb6ff2..dc4a0093e3 100644
--- a/README.md
+++ b/README.md
@@ -241,12 +241,14 @@ Using an existing dataset is great, but likely you want to use your own data! We
| π± [Polycam](https://docs.nerf.studio/quickstart/custom_dataset.html#polycam-capture) | IOS with LiDAR | [Polycam App](https://poly.cam/) | π |
| π± [KIRI Engine](https://docs.nerf.studio/quickstart/custom_dataset.html#kiri-engine-capture) | IOS or Android | [KIRI Engine App](https://www.kiriengine.com/) | π |
| π± [Record3D](https://docs.nerf.studio/quickstart/custom_dataset.html#record3d-capture) | IOS with LiDAR | [Record3D app](https://record3d.app/) | π |
+| π± [Spectacular AI](https://docs.nerf.studio/quickstart/custom_dataset.html#spectacularai) | IOS, OAK, [others](https://www.spectacularai.com/mapping#supported-devices) | [App](https://apps.apple.com/us/app/spectacular-rec/id6473188128) / [`sai-cli`](https://www.spectacularai.com/mapping) | π |
| π₯ [Metashape](https://docs.nerf.studio/quickstart/custom_dataset.html#metashape) | Any | [Metashape](https://www.agisoft.com/) | π |
| π₯ [RealityCapture](https://docs.nerf.studio/quickstart/custom_dataset.html#realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | π |
| π₯ [ODM](https://docs.nerf.studio/quickstart/custom_dataset.html#odm) | Any | [ODM](https://github.com/OpenDroneMap/ODM) | π |
| π [Aria](https://docs.nerf.studio/quickstart/custom_dataset.html#aria) | Aria glasses | [Project Aria](https://projectaria.com/) | π |
| π [Custom](https://docs.nerf.studio/quickstart/data_conventions.html) | Any | Camera Poses | π |
+
## 5. Advanced Options
### Training models other than nerfacto
diff --git a/docs/quickstart/custom_dataset.md b/docs/quickstart/custom_dataset.md
index fddbe1d56b..6444ca4456 100644
--- a/docs/quickstart/custom_dataset.md
+++ b/docs/quickstart/custom_dataset.md
@@ -19,6 +19,7 @@ We currently support the following custom data types:
| π± [Polycam](polycam) | IOS with LiDAR | [Polycam App](https://poly.cam/) | π |
| π± [KIRI Engine](kiri) | IOS or Android | [KIRI Engine App](https://www.kiriengine.com/) | π |
| π± [Record3D](record3d) | IOS with LiDAR | [Record3D app](https://record3d.app/) | π |
+| π± [Spectacular AI](spectacularai) | IOS, OAK, others| [App](https://apps.apple.com/us/app/spectacular-rec/id6473188128) / [`sai-cli`](https://www.spectacularai.com/mapping) | π |
| π₯ [Metashape](metashape) | Any | [Metashape](https://www.agisoft.com/) | π |
| π₯ [RealityCapture](realitycapture) | Any | [RealityCapture](https://www.capturingreality.com/realitycapture) | π |
| π₯ [ODM](odm) | Any | [ODM](https://github.com/OpenDroneMap/ODM) | π |
@@ -267,6 +268,44 @@ ns-process-data record3d --data {data directory} --output-dir {output directory}
ns-train nerfacto --data {output directory}
```
+(spectacularai)=
+
+## Spectacular AI
+
+Spectacular AI SDK and apps can be used to capture data from various devices:
+
+ * iPhones (with LiDAR)
+ * OAK-D cameras
+ * RealSense D455/D435i
+ * Azure Kinect DK
+
+The SDK also records IMU data, which is fused with camera and (if available) LiDAR/ToF data when computing the camera poses. This approach, VISLAM, is more robust than purely image based methods (e.g., COLMAP) and can work better and faster for difficult data (monotonic environments, fast motions, narrow FoV, etc.).
+
+Instructions:
+
+1. Installation. With the Nerfstudio Conda environment active, first install the Spectacular AI Python library
+
+```bash
+pip install spectacularAI[full]
+```
+
+2. Install FFmpeg. Linux: `apt install ffmpeg` (or similar, if using another package manager). Windows: [see here](https://www.editframe.com/guides/how-to-install-and-start-using-ffmpeg-in-under-10-minutes). FFmpeg must be in your `PATH` so that `ffmpeg` works on the command line.
+
+3. Data capture. See [here for specific instructions for each supported device](https://github.com/SpectacularAI/sdk-examples/tree/main/python/mapping#recording-data).
+
+4. Process and export. Once you have recorded a dataset in Spectacular AI format and have it stored in `{data directory}` it can be converted into a Nerfstudio supported format with:
+
+```bash
+sai-cli process {data directory} --preview3d --key_frame_distance=0.05 {output directory}
+```
+The optional `--preview3d` flag shows a 3D preview of the point cloud and estimated trajectory live while VISLAM is running. The `--key_frame_distance` argument can be tuned based on the recorded scene size: 0.05 (5cm) is good for small scans and 0.15 for room-sized scans. If the processing gets slow, you can also try adding a --fast flag to `sai-cli process` to trade off quality for speed.
+
+5. Train. No separate `ns-process-data` step is needed. The data in `{output directory}` can now be trained with Nerfstudio:
+
+```bash
+ns-train nerfacto --data {output directory}
+```
+
(metashape)=
## Metashape
From f7a49563a0b93df1fac2ada0cb15b0ea9ec36155 Mon Sep 17 00:00:00 2001
From: Zhuoyang Pan <102644383+Zhuoyang-Pan@users.noreply.github.com>
Date: Mon, 18 Dec 2023 17:52:14 -0800
Subject: [PATCH 097/101] Fix a bug of random quaternions (#2687)
fix a bug for random quaternions
---
nerfstudio/models/gaussian_splatting.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/nerfstudio/models/gaussian_splatting.py b/nerfstudio/models/gaussian_splatting.py
index fe081734ec..78ad185a2c 100644
--- a/nerfstudio/models/gaussian_splatting.py
+++ b/nerfstudio/models/gaussian_splatting.py
@@ -60,7 +60,7 @@ def random_quat_tensor(N):
torch.sqrt(1 - u) * torch.sin(2 * math.pi * v),
torch.sqrt(1 - u) * torch.cos(2 * math.pi * v),
torch.sqrt(u) * torch.sin(2 * math.pi * w),
- torch.sqrt(u) * torch.sin(2 * math.pi * w),
+ torch.sqrt(u) * torch.cos(2 * math.pi * w),
],
dim=-1,
)
From 4a54763323041941f43140736d1b76c80a75af7f Mon Sep 17 00:00:00 2001
From: Hardik Dava <39372750+hardikdava@users.noreply.github.com>
Date: Tue, 19 Dec 2023 18:20:17 +0100
Subject: [PATCH 098/101] fixed color issue in ply file (#2693)
fixed color export
---
nerfstudio/models/gaussian_splatting.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/nerfstudio/models/gaussian_splatting.py b/nerfstudio/models/gaussian_splatting.py
index 78ad185a2c..91686b44a2 100644
--- a/nerfstudio/models/gaussian_splatting.py
+++ b/nerfstudio/models/gaussian_splatting.py
@@ -211,7 +211,7 @@ def populate_modules(self):
@property
def colors(self):
- return SH2RGB(self.colors_all[:, 0, :])
+ return self.colors_all[:, 0, :]
@property
def shs_rest(self):
From 7a8e31d84e424611db77f780e05f8af2e80394b4 Mon Sep 17 00:00:00 2001
From: Brent Yi
Date: Wed, 20 Dec 2023 17:20:41 -0800
Subject: [PATCH 099/101] Improved render panel for beta viewer (#2667)
* Continuous velocities for render splines
* Configure ruff line length
* Improved render preview
* No longer need to update camera FOV in render preview
* Details
* Polishing
* Cleanup
* Details
* Address pyright errors
* type: ignore for broken (?) Aria stub
---
.../scripts/datasets/process_project_aria.py | 2 +-
nerfstudio/viewer_beta/control_panel.py | 75 ++---
nerfstudio/viewer_beta/render_panel.py | 261 ++++++++++++------
.../viewer_beta/render_state_machine.py | 37 ++-
nerfstudio/viewer_beta/utils.py | 23 +-
nerfstudio/viewer_beta/viewer.py | 56 ++--
nerfstudio/viewer_beta/viewer_elements.py | 12 +-
pyproject.toml | 2 +-
8 files changed, 311 insertions(+), 157 deletions(-)
diff --git a/nerfstudio/scripts/datasets/process_project_aria.py b/nerfstudio/scripts/datasets/process_project_aria.py
index f2fdcc94c6..10f26653fd 100644
--- a/nerfstudio/scripts/datasets/process_project_aria.py
+++ b/nerfstudio/scripts/datasets/process_project_aria.py
@@ -103,7 +103,7 @@ def get_camera_calibs(provider: VrsDataProvider) -> Dict[str, AriaCameraCalibrat
def read_trajectory_csv_to_dict(file_iterable_csv: str) -> TimedPoses:
- closed_loop_traj = mps.read_closed_loop_trajectory(file_iterable_csv)
+ closed_loop_traj = mps.read_closed_loop_trajectory(file_iterable_csv) # type: ignore
timestamps_secs, poses = zip(
*[(it.tracking_timestamp.total_seconds(), it.transform_world_device) for it in closed_loop_traj]
diff --git a/nerfstudio/viewer_beta/control_panel.py b/nerfstudio/viewer_beta/control_panel.py
index c94b4745f6..a937edce16 100644
--- a/nerfstudio/viewer_beta/control_panel.py
+++ b/nerfstudio/viewer_beta/control_panel.py
@@ -18,9 +18,8 @@
import numpy as np
import torch
-import viser.transforms as vtf
-from viser import ViserServer
import viser
+import viser.transforms as vtf
from nerfstudio.data.scene_box import OrientedBox
from nerfstudio.utils.colormaps import ColormapOptions, Colormaps
from nerfstudio.viewer_beta.viewer_elements import ( # ViewerButtonGroup,
@@ -33,6 +32,7 @@
ViewerSlider,
ViewerVec3,
)
+from viser import ViserServer
class ControlPanel:
@@ -42,7 +42,6 @@ class ControlPanel:
time_enabled: whether or not the time slider should be enabled
rerender_cb: a callback that will be called when the user changes a parameter that requires a rerender
(eg train speed, max res, etc)
- crop_update_cb: a callback that will be called when the user changes the crop parameters
update_output_cb: a callback that will be called when the user changes the output render
default_composite_depth: whether to default to compositing depth or not
"""
@@ -52,8 +51,7 @@ def __init__(
viser_server: ViserServer,
time_enabled: bool,
scale_ratio: float,
- rerender_cb: Callable,
- crop_update_cb: Callable,
+ rerender_cb: Callable[[], None],
update_output_cb: Callable,
update_split_output_cb: Callable,
default_composite_depth: bool = True,
@@ -71,51 +69,53 @@ def __init__(
cb_hook=lambda han: self._train_speed_cb(),
)
self._output_render = ViewerDropdown(
- "Output Render",
+ "Output type",
"not set",
["not set"],
- cb_hook=lambda han: [self.update_control_panel(), update_output_cb(han), rerender_cb(han)],
+ cb_hook=lambda han: [self.update_control_panel(), update_output_cb(han), rerender_cb()],
hint="The output to render",
)
self._colormap = ViewerDropdown[Colormaps](
- "Colormap", "default", ["default"], cb_hook=rerender_cb, hint="The colormap to use"
+ "Colormap", "default", ["default"], cb_hook=lambda _: rerender_cb(), hint="The colormap to use"
+ )
+ self._invert = ViewerCheckbox("Invert", False, cb_hook=lambda _: rerender_cb(), hint="Invert the colormap")
+ self._normalize = ViewerCheckbox(
+ "Normalize", True, cb_hook=lambda _: rerender_cb(), hint="Normalize the colormap"
)
- self._invert = ViewerCheckbox("Invert", False, cb_hook=rerender_cb, hint="Invert the colormap")
- self._normalize = ViewerCheckbox("Normalize", True, cb_hook=rerender_cb, hint="Normalize the colormap")
- self._min = ViewerNumber("Min", 0.0, cb_hook=rerender_cb, hint="Min value of the colormap")
- self._max = ViewerNumber("Max", 1.0, cb_hook=rerender_cb, hint="Max value of the colormap")
+ self._min = ViewerNumber("Min", 0.0, cb_hook=lambda _: rerender_cb(), hint="Min value of the colormap")
+ self._max = ViewerNumber("Max", 1.0, cb_hook=lambda _: rerender_cb(), hint="Max value of the colormap")
self._split = ViewerCheckbox(
"Enable",
False,
- cb_hook=lambda han: [self.update_control_panel(), rerender_cb(han)],
+ cb_hook=lambda han: [self.update_control_panel(), rerender_cb()],
hint="Render two outputs",
)
self._split_percentage = ViewerSlider(
- "Split Percentage", 0.5, 0.0, 1.0, 0.01, cb_hook=rerender_cb, hint="Where to split"
+ "Split percentage", 0.5, 0.0, 1.0, 0.01, cb_hook=lambda _: rerender_cb(), hint="Where to split"
)
self._split_output_render = ViewerDropdown(
- "Output Render Split",
+ "Output render split",
"not set",
["not set"],
- cb_hook=lambda han: [self.update_control_panel(), update_split_output_cb(han), rerender_cb(han)],
+ cb_hook=lambda han: [self.update_control_panel(), update_split_output_cb(han), rerender_cb()],
hint="The second output",
)
# Hack: spaces are after at the end of the names to make them unique
self._split_colormap = ViewerDropdown[Colormaps](
- "Colormap ", "default", ["default"], cb_hook=rerender_cb, hint="Colormap of the second output"
+ "Colormap ", "default", ["default"], cb_hook=lambda _: rerender_cb(), hint="Colormap of the second output"
)
self._split_invert = ViewerCheckbox(
- "Invert ", False, cb_hook=rerender_cb, hint="Invert the colormap of the second output"
+ "Invert ", False, cb_hook=lambda _: rerender_cb(), hint="Invert the colormap of the second output"
)
self._split_normalize = ViewerCheckbox(
- "Normalize ", True, cb_hook=rerender_cb, hint="Normalize the colormap of the second output"
+ "Normalize ", True, cb_hook=lambda _: rerender_cb(), hint="Normalize the colormap of the second output"
)
self._split_min = ViewerNumber(
- "Min ", 0.0, cb_hook=rerender_cb, hint="Min value of the colormap of the second output"
+ "Min ", 0.0, cb_hook=lambda _: rerender_cb(), hint="Min value of the colormap of the second output"
)
self._split_max = ViewerNumber(
- "Max ", 1.0, cb_hook=rerender_cb, hint="Max value of the colormap of the second output"
+ "Max ", 1.0, cb_hook=lambda _: rerender_cb(), hint="Max value of the colormap of the second output"
)
self._train_util = ViewerSlider(
@@ -127,22 +127,28 @@ def __init__(
hint="Target training utilization, 0.0 is slow, 1.0 is fast. Doesn't affect final render quality",
)
self._layer_depth = ViewerCheckbox(
- "Composite Depth",
+ "Composite depth",
self.default_composite_depth,
- cb_hook=rerender_cb,
+ cb_hook=lambda _: rerender_cb(),
hint="Allow NeRF to occlude 3D browser objects",
)
self._max_res = ViewerSlider(
- "Max Res", 512, 64, 2048, 100, cb_hook=rerender_cb, hint="Maximum resolution to render in viewport"
+ "Max res",
+ 512,
+ 64,
+ 2048,
+ 100,
+ cb_hook=lambda _: rerender_cb(),
+ hint="Maximum resolution to render in viewport",
)
self._crop_viewport = ViewerCheckbox(
"Enable ",
False,
- cb_hook=lambda han: [self.update_control_panel(), crop_update_cb(han), rerender_cb(han)],
+ cb_hook=lambda han: [self.update_control_panel(), rerender_cb()],
hint="Crop the scene to a specified box",
)
self._background_color = ViewerRGB(
- "Background color", (38, 42, 55), cb_hook=crop_update_cb, hint="Color of the background"
+ "Background color", (38, 42, 55), cb_hook=lambda _: rerender_cb(), hint="Color of the background"
)
self._crop_handle = self.viser_server.add_transform_controls("Crop", depth_test=False, line_width=4.0)
@@ -150,10 +156,10 @@ def update_center(han):
self._crop_handle.position = tuple(p * self.viser_scale_ratio for p in han.value) # type: ignore
self._crop_center = ViewerVec3(
- "Crop Center",
+ "Crop center",
(0.0, 0.0, 0.0),
step=0.01,
- cb_hook=lambda e: [crop_update_cb(e), update_center(e)],
+ cb_hook=lambda e: [rerender_cb(), update_center(e)],
hint="Center of the crop box",
)
@@ -161,15 +167,15 @@ def update_rot(han):
self._crop_handle.wxyz = vtf.SO3.from_rpy_radians(*han.value).wxyz
self._crop_rot = ViewerVec3(
- "Crop Rotation",
+ "Crop rotation",
(0.0, 0.0, 0.0),
step=0.01,
- cb_hook=lambda e: [crop_update_cb(e), update_rot(e)],
+ cb_hook=lambda e: [rerender_cb(), update_rot(e)],
hint="Rotation of the crop box",
)
self._crop_scale = ViewerVec3(
- "Crop Scale", (1.0, 1.0, 1.0), step=0.01, cb_hook=crop_update_cb, hint="Scale of the crop box"
+ "Crop scale", (1.0, 1.0, 1.0), step=0.01, cb_hook=lambda _: rerender_cb(), hint="Size of the crop box."
)
@self._crop_handle.on_update
@@ -179,7 +185,7 @@ def _update_crop_handle(han):
rpy = vtf.SO3(self._crop_handle.wxyz).as_rpy_radians()
self._crop_rot.value = (float(rpy.roll), float(rpy.pitch), float(rpy.yaw))
- self._time = ViewerSlider("Time", 0.0, 0.0, 1.0, 0.01, cb_hook=rerender_cb, hint="Time to render")
+ self._time = ViewerSlider("Time", 0.0, 0.0, 1.0, 0.01, cb_hook=lambda _: rerender_cb(), hint="Time to render")
self._time_enabled = time_enabled
self.add_element(self._train_speed)
@@ -219,7 +225,10 @@ def _update_crop_handle(han):
self.add_element(self._time, additional_tags=("time",))
self._reset_camera = viser_server.add_gui_button(
- label="Reset Up Dir", disabled=False, icon=viser.Icon.ARROW_BIG_UP_LINES, color="gray"
+ label="Reset Up Direction",
+ icon=viser.Icon.ARROW_BIG_UP_LINES,
+ color="gray",
+ hint="Set the up direction of the camera orbit controls to the camera's current up direction.",
)
self._reset_camera.on_click(self._reset_camera_cb)
diff --git a/nerfstudio/viewer_beta/render_panel.py b/nerfstudio/viewer_beta/render_panel.py
index 0e666038ef..6c31447bfb 100644
--- a/nerfstudio/viewer_beta/render_panel.py
+++ b/nerfstudio/viewer_beta/render_panel.py
@@ -21,14 +21,14 @@
import threading
import time
from pathlib import Path
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Dict, List, Literal, Optional, Tuple
import numpy as np
+import scipy.interpolate
import splines
import splines.quaternion
import viser
import viser.transforms as tf
-
from nerfstudio.viewer_beta.control_panel import ControlPanel
@@ -209,21 +209,50 @@ def update_aspect(self, aspect: float) -> None:
frame = dataclasses.replace(frame[0], aspect=aspect)
self.add_camera(frame, keyframe_index=keyframe_index)
+ def get_aspect(self) -> float:
+ """Get W/H aspect ratio, which is shared across all keyframes."""
+ assert len(self._keyframes) > 0
+ return next(iter(self._keyframes.values()))[0].aspect
+
def reset(self) -> None:
for frame in self._keyframes.values():
frame[1].remove()
self._keyframes.clear()
self.update_spline()
- def interpolate_pose_and_fov_rad(self, normalized_t: float) -> Optional[Tuple[tf.SE3, float]]:
- if len(self._keyframes) < 2:
- return None
+ def spline_t_from_t_sec(self, time: np.ndarray) -> np.ndarray:
+ """From a time value in seconds, compute a t value for our geometric
+ spline interpolation. An increment of 1 for the latter will move the
+ camera forward by one keyframe.
+ We use a PCHIP spline here to guarantee monotonicity.
+ """
transition_times_cumsum = self.compute_transition_times_cumsum()
spline_indices = np.arange(transition_times_cumsum.shape[0])
- def spline_t_from_t_sec(time: Union[float, np.ndarray]) -> np.ndarray:
- return np.interp(time, transition_times_cumsum, spline_indices)
+ if self.loop:
+ # In the case of a loop, we pad the spline to match the start/end
+ # slopes.
+ interpolator = scipy.interpolate.PchipInterpolator(
+ x=np.concatenate(
+ [
+ [-(transition_times_cumsum[-1] - transition_times_cumsum[-2])],
+ transition_times_cumsum,
+ transition_times_cumsum[-1:] + transition_times_cumsum[1:2],
+ ],
+ axis=0,
+ ),
+ y=np.concatenate([[-1], spline_indices, [spline_indices[-1] + 1]], axis=0),
+ )
+ else:
+ interpolator = scipy.interpolate.PchipInterpolator(x=transition_times_cumsum, y=spline_indices)
+
+ # Clip to account for floating point error.
+ return np.clip(interpolator(time), 0, spline_indices[-1])
+
+ def interpolate_pose_and_fov_rad(self, normalized_t: float) -> Optional[Tuple[tf.SE3, float]]:
+ if len(self._keyframes) < 2:
+ return None
self._fov_spline = splines.KochanekBartels(
[
@@ -239,32 +268,29 @@ def spline_t_from_t_sec(time: Union[float, np.ndarray]) -> np.ndarray:
assert self._fov_spline is not None
max_t = self.compute_duration()
t = max_t * normalized_t
+ spline_t = float(self.spline_t_from_t_sec(np.array(t)))
- quat = self._orientation_spline.evaluate(spline_t_from_t_sec(t))
+ quat = self._orientation_spline.evaluate(spline_t)
assert isinstance(quat, splines.quaternion.UnitQuaternion)
return (
tf.SE3.from_rotation_and_translation(
tf.SO3(np.array([quat.scalar, *quat.vector])),
- self._position_spline.evaluate(spline_t_from_t_sec(t)),
+ self._position_spline.evaluate(spline_t),
),
- float(self._fov_spline.evaluate(spline_t_from_t_sec(t))),
+ float(self._fov_spline.evaluate(spline_t)),
)
def update_spline(self) -> None:
num_frames = int(self.compute_duration() * self.framerate)
- if num_frames <= 0 or not self.show_spline:
+ keyframes = list(self._keyframes.values())
+
+ if num_frames <= 0 or not self.show_spline or len(keyframes) < 2:
for node in self._spline_nodes:
node.remove()
self._spline_nodes.clear()
return
- # Update internal splines.
- keyframes = list(self._keyframes.values())
transition_times_cumsum = self.compute_transition_times_cumsum()
- spline_indices = np.arange(transition_times_cumsum.shape[0])
-
- def spline_t_from_t_sec(time: Union[float, np.ndarray]) -> np.ndarray:
- return np.interp(time, transition_times_cumsum, spline_indices)
self._orientation_spline = splines.quaternion.KochanekBartels(
[
@@ -282,7 +308,7 @@ def spline_t_from_t_sec(time: Union[float, np.ndarray]) -> np.ndarray:
# Update visualized spline.
points_array = self._position_spline.evaluate(
- spline_t_from_t_sec(np.linspace(0, transition_times_cumsum[-1], num_frames))
+ self.spline_t_from_t_sec(np.linspace(0, transition_times_cumsum[-1], num_frames))
)
colors_array = np.array([colorsys.hls_to_rgb(h, 0.5, 1.0) for h in np.linspace(0.0, 1.0, len(points_array))])
@@ -313,7 +339,11 @@ def spline_t_from_t_sec(time: Union[float, np.ndarray]) -> np.ndarray:
def make_transition_handle(i: int) -> None:
assert self._position_spline is not None
transition_pos = self._position_spline.evaluate(
- spline_t_from_t_sec((transition_times_cumsum[i] + transition_times_cumsum[i + 1]) / 2.0)
+ float(
+ self.spline_t_from_t_sec(
+ (transition_times_cumsum[i] + transition_times_cumsum[i + 1]) / 2.0,
+ )
+ )
)
transition_sphere = self._server.add_icosphere(
f"/render_camera_spline/transition_{i}",
@@ -340,7 +370,8 @@ def _(_) -> None:
) as camera_edit_panel:
self._camera_edit_panel = camera_edit_panel
override_transition_enabled = server.add_gui_checkbox(
- "Override transition", initial_value=keyframe.override_transition_enabled
+ "Override transition",
+ initial_value=keyframe.override_transition_enabled,
)
override_transition_sec = server.add_gui_number(
"Override transition (sec)",
@@ -418,14 +449,31 @@ def compute_transition_times_cumsum(self) -> np.ndarray:
return np.array(out)
+@dataclasses.dataclass
+class RenderTabState:
+ """Useful GUI handles exposed by the render tab."""
+
+ preview_render: bool
+ preview_fov: float
+ preview_aspect: float
+ preview_camera_type: Literal["Perspective", "Fisheye", "Equirectangular"]
+
+
def populate_render_tab(
server: viser.ViserServer,
config_path: Path,
datapath: Path,
control_panel: Optional[ControlPanel] = None,
-) -> None:
+) -> RenderTabState:
from nerfstudio.viewer_beta.viewer import VISER_NERFSTUDIO_SCALE_RATIO
+ render_tab_state = RenderTabState(
+ preview_render=False,
+ preview_fov=0.0,
+ preview_aspect=1.0,
+ preview_camera_type="Perspective",
+ )
+
fov_degrees = server.add_gui_slider(
"Default FOV",
initial_value=75.0,
@@ -445,6 +493,7 @@ def _(_) -> None:
# Updating the aspect ratio will also re-render the camera frustums.
# Could rethink this.
camera_path.update_aspect(resolution.value[0] / resolution.value[1])
+ compute_and_update_preview_camera_state()
resolution = server.add_gui_vector2(
"Resolution",
@@ -454,16 +503,20 @@ def _(_) -> None:
step=1,
hint="Render output resolution in pixels.",
)
- resolution.on_update(lambda _: camera_path.update_aspect(resolution.value[0] / resolution.value[1]))
+
+ @resolution.on_update
+ def _(_) -> None:
+ camera_path.update_aspect(resolution.value[0] / resolution.value[1])
+ compute_and_update_preview_camera_state()
camera_type = server.add_gui_dropdown(
- "Camera Type",
+ "Camera type",
("Perspective", "Fisheye", "Equirectangular"),
initial_value="Perspective",
hint="Camera model to render with. This is applied to all keyframes.",
)
add_button = server.add_gui_button(
- "Add keyframe",
+ "Add Keyframe",
icon=viser.Icon.PLUS,
hint="Add a new keyframe at the current pose.",
)
@@ -483,19 +536,8 @@ def _(event: viser.GuiEvent) -> None:
duration_number.value = camera_path.compute_duration()
camera_path.update_spline()
- reset_up_button = server.add_gui_button(
- "Reset up direction",
- icon=viser.Icon.ARROW_BIG_UP_LINES,
- hint="Reset the orbit up direction.",
- )
-
- @reset_up_button.on_click
- def _(event: viser.GuiEvent) -> None:
- assert event.client is not None
- event.client.camera.up_direction = tf.SO3(event.client.camera.wxyz) @ np.array([0.0, -1.0, 0.0])
-
clear_keyframes_button = server.add_gui_button(
- "Clear keyframes",
+ "Clear Keyframes",
icon=viser.Icon.TRASH,
hint="Remove all keyframes from the render path.",
)
@@ -535,7 +577,7 @@ def _(_) -> None:
duration_number.value = camera_path.compute_duration()
tension_slider = server.add_gui_slider(
- "Spline Tension",
+ "Spline tension",
min=0.0,
max=1.0,
initial_value=0.0,
@@ -554,6 +596,8 @@ def _(_) -> None:
hint="Toggle move handles for keyframes in the scene.",
)
+ transform_controls: List[viser.SceneNodeHandle] = []
+
@move_checkbox.on_update
def _(event: viser.GuiEvent) -> None:
# Clear move handles when toggled off.
@@ -614,7 +658,11 @@ def _(_) -> None:
with playback_folder:
play_button = server.add_gui_button("Play", icon=viser.Icon.PLAYER_PLAY)
pause_button = server.add_gui_button("Pause", icon=viser.Icon.PLAYER_PAUSE, visible=False)
- attach_viewport_checkbox = server.add_gui_checkbox("Attach viewport", initial_value=False)
+ preview_render_button = server.add_gui_button(
+ "Preview Render", hint="Show a preview of the render in the viewport."
+ )
+ preview_render_stop_button = server.add_gui_button("Exit Render Preview", color="red", visible=False)
+
transition_sec_number = server.add_gui_number(
"Transition (sec)",
min=0.001,
@@ -654,6 +702,24 @@ def remove_preview_camera() -> None:
preview_camera_handle.remove()
preview_camera_handle = None
+ def compute_and_update_preview_camera_state() -> Optional[Tuple[tf.SE3, float]]:
+ """Update the render tab state with the current preview camera pose.
+ Returns current camera pose + FOV if available."""
+
+ if preview_frame_slider is None:
+ return
+ maybe_pose_and_fov_rad = camera_path.interpolate_pose_and_fov_rad(
+ preview_frame_slider.value / get_max_frame_index()
+ )
+ if maybe_pose_and_fov_rad is None:
+ remove_preview_camera()
+ return
+ pose, fov_rad = maybe_pose_and_fov_rad
+ render_tab_state.preview_fov = fov_rad
+ render_tab_state.preview_aspect = camera_path.get_aspect()
+ render_tab_state.preview_camera_type = camera_type.value
+ return pose, fov_rad
+
def add_preview_frame_slider() -> Optional[viser.GuiInputHandle[int]]:
"""Helper for creating the current frame # slider. This is removed and
re-added anytime the `max` value changes."""
@@ -666,18 +732,17 @@ def add_preview_frame_slider() -> Optional[viser.GuiInputHandle[int]]:
step=1,
initial_value=0,
# Place right after the pause button.
- order=pause_button.order + 0.01,
+ order=preview_render_stop_button.order + 0.01,
+ disabled=get_max_frame_index() == 1,
)
+ play_button.disabled = preview_frame_slider.disabled
+ preview_render_button.disabled = preview_frame_slider.disabled
@preview_frame_slider.on_update
def _(_) -> None:
nonlocal preview_camera_handle
-
- maybe_pose_and_fov_rad = camera_path.interpolate_pose_and_fov_rad(
- preview_frame_slider.value / get_max_frame_index()
- )
+ maybe_pose_and_fov_rad = compute_and_update_preview_camera_state()
if maybe_pose_and_fov_rad is None:
- remove_preview_camera()
return
pose, fov_rad = maybe_pose_and_fov_rad
@@ -689,57 +754,61 @@ def _(_) -> None:
wxyz=pose.rotation().wxyz,
position=pose.translation(),
color=(10, 200, 30),
- # Hack: hide green frustum if the viewport is attached.
- # This is a waste of bandwidth, but will ensure that any old
- # frustums are removed/aren't rendered.
- #
- # Easy to fix with a global variable.
- visible=not attach_viewport_checkbox.value,
)
- if attach_viewport_checkbox.value:
+ if render_tab_state.preview_render:
for client in server.get_clients().values():
client.camera.wxyz = pose.rotation().wxyz
client.camera.position = pose.translation()
- client.camera.fov = fov_rad
return preview_frame_slider
- @attach_viewport_checkbox.on_update
+ # We back up the camera poses before and after we start previewing renders.
+ camera_pose_backup_from_id: Dict[int, tuple] = {}
+
+ @preview_render_button.on_click
def _(_) -> None:
- if preview_frame_slider is None:
- remove_preview_camera()
- return
- maybe_pose_and_fov_rad = camera_path.interpolate_pose_and_fov_rad(
- preview_frame_slider.value / get_max_frame_index()
- )
+ render_tab_state.preview_render = True
+ preview_render_button.visible = False
+ preview_render_stop_button.visible = True
+
+ maybe_pose_and_fov_rad = compute_and_update_preview_camera_state()
if maybe_pose_and_fov_rad is None:
remove_preview_camera()
return
pose, fov = maybe_pose_and_fov_rad
- server.add_camera_frustum(
- "/preview_camera",
- fov=fov,
- aspect=resolution.value[0] / resolution.value[1],
- scale=0.35,
- wxyz=pose.rotation().wxyz,
- position=pose.translation(),
- color=(10, 200, 30),
- # Hack: hide green frustum if the viewport is attached.
- # This is a waste of bandwidth, but will ensure that any old
- # frustums are removed/aren't rendered.
- #
- # Easy to fix with a global variable.
- visible=not attach_viewport_checkbox.value,
- )
- if not attach_viewport_checkbox.value:
- for client in server.get_clients().values():
- client.camera.fov = fov_degrees.value / 180 * np.pi
- else:
- if attach_viewport_checkbox.value:
- for client in server.get_clients().values():
- client.camera.wxyz = pose.rotation().wxyz
- client.camera.position = pose.translation()
- client.camera.fov = fov
+ del fov
+
+ # Hide all scene nodes when we're previewing the render.
+ server.set_global_scene_node_visibility(False)
+
+ # Back up and then set camera poses.
+ for client in server.get_clients().values():
+ camera_pose_backup_from_id[client.client_id] = (
+ client.camera.position,
+ client.camera.look_at,
+ client.camera.up_direction,
+ )
+ client.camera.wxyz = pose.rotation().wxyz
+ client.camera.position = pose.translation()
+
+ @preview_render_stop_button.on_click
+ def _(_) -> None:
+ render_tab_state.preview_render = False
+ preview_render_button.visible = True
+ preview_render_stop_button.visible = False
+
+ # Revert camera poses.
+ for client in server.get_clients().values():
+ if client.client_id not in camera_pose_backup_from_id:
+ continue
+ cam_position, cam_look_at, cam_up = camera_pose_backup_from_id.pop(client.client_id)
+ client.camera.position = cam_position
+ client.camera.look_at = cam_look_at
+ client.camera.up_direction = cam_up
+ client.flush()
+
+ # Un-hide scene nodes.
+ server.set_global_scene_node_visibility(True)
preview_frame_slider = add_preview_frame_slider()
@@ -823,13 +892,16 @@ def _(_) -> None:
pose = tf.SE3.from_matrix(np.array(frame["matrix"]).reshape(4, 4))
# apply the x rotation by 180 deg
pose = tf.SE3.from_rotation_and_translation(
- pose.rotation() @ tf.SO3.from_x_radians(np.pi), pose.translation()
+ pose.rotation() @ tf.SO3.from_x_radians(np.pi),
+ pose.translation(),
)
camera_path.add_camera(
Keyframe(
position=pose.translation() * VISER_NERFSTUDIO_SCALE_RATIO,
wxyz=pose.rotation().wxyz,
- override_fov_enabled=True,
+ # There are some floating point conversions between degrees and radians, so the fov and
+ # default_Fov values will not be exactly matched.
+ override_fov_enabled=abs(frame["fov"] - json_data.get("default_fov", 0.0)) < 1e-3,
override_fov_rad=frame["fov"] / 180.0 * np.pi,
aspect=frame["aspect"],
override_transition_enabled=frame.get("override_transition_enabled", None),
@@ -853,7 +925,9 @@ def _(_) -> None:
# set the initial value to the current date-time string
now = datetime.datetime.now()
render_name_text = server.add_gui_text(
- "Render Name", initial_value=now.strftime("%Y-%m-%d-%H-%M-%S"), hint="Name of the render"
+ "Render name",
+ initial_value=now.strftime("%Y-%m-%d-%H-%M-%S"),
+ hint="Name of the render",
)
render_button = server.add_gui_button(
"Generate Command",
@@ -862,6 +936,18 @@ def _(_) -> None:
hint="Generate the ns-render command for rendering the camera path.",
)
+ reset_up_button = server.add_gui_button(
+ "Reset Up Direction",
+ icon=viser.Icon.ARROW_BIG_UP_LINES,
+ color="gray",
+ hint="Set the up direction of the camera orbit controls to the camera's current up direction.",
+ )
+
+ @reset_up_button.on_click
+ def _(event: viser.GuiEvent) -> None:
+ assert event.client is not None
+ event.client.camera.up_direction = tf.SO3(event.client.camera.wxyz) @ np.array([0.0, -1.0, 0.0])
+
@render_button.on_click
def _(event: viser.GuiEvent) -> None:
assert event.client is not None
@@ -901,6 +987,7 @@ def _(event: viser.GuiEvent) -> None:
"override_transition_sec": keyframe.override_transition_sec,
}
)
+ json_data["default_fov"] = fov_degrees.value
json_data["default_transition_sec"] = transition_sec_number.value
json_data["keyframes"] = keyframes
json_data["camera_type"] = camera_type.value.lower()
@@ -980,7 +1067,7 @@ def _(_) -> None:
camera_path.default_fov = fov_degrees.value / 180.0 * np.pi
camera_path.default_transition_sec = transition_sec_number.value
- transform_controls: List[viser.SceneNodeHandle] = []
+ return render_tab_state
if __name__ == "__main__":
diff --git a/nerfstudio/viewer_beta/render_state_machine.py b/nerfstudio/viewer_beta/render_state_machine.py
index 849cf3d4e7..54f8e69ae2 100644
--- a/nerfstudio/viewer_beta/render_state_machine.py
+++ b/nerfstudio/viewer_beta/render_state_machine.py
@@ -20,15 +20,16 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Tuple, get_args
+import numpy as np
import torch
-from viser import ClientHandle
+from nerfstudio.cameras.cameras import Cameras
from nerfstudio.model_components.renderers import background_color_override_context
+from nerfstudio.models.gaussian_splatting import GaussianSplattingModel
from nerfstudio.utils import colormaps, writer
from nerfstudio.utils.writer import GLOBAL_BUFFER, EventName, TimeWriter
from nerfstudio.viewer.server import viewer_utils
from nerfstudio.viewer_beta.utils import CameraState, get_camera
-from nerfstudio.models.gaussian_splatting import GaussianSplattingModel
-from nerfstudio.cameras.cameras import Cameras
+from viser import ClientHandle
if TYPE_CHECKING:
from nerfstudio.viewer_beta.viewer import Viewer
@@ -96,7 +97,7 @@ def action(self, action: RenderAction):
# 1. we are in low_moving state
# 2. the current next_action is move, static, or rerender
return
- elif self.next_action == "rerender":
+ elif self.next_action.action == "rerender":
# never overwrite rerenders
pass
elif action.action == "static" and self.next_action.action == "move":
@@ -254,9 +255,33 @@ def _send_output_to_viewer(self, outputs: Dict[str, Any], static_render: bool =
depth = (
outputs["gl_z_buf_depth"].cpu().numpy() * self.viser_scale_ratio if "gl_z_buf_depth" in outputs else None
)
- jpg_quality = self.viewer.config.jpeg_quality if static_render else 40
+
+ # Convert to numpy.
+ selected_output = selected_output.cpu().numpy()
+ assert selected_output.shape[-1] == 3
+
+ # Pad image if the aspect ratio (W/H) doesn't match the client!
+ current_h, current_w = selected_output.shape[:2]
+ desired_aspect = self.client.camera.aspect
+ pad_width = int(max(0, (desired_aspect * current_h - current_w) // 2))
+ pad_height = int(max(0, (current_w / desired_aspect - current_h) // 2))
+ if pad_width > 5 or pad_height > 5:
+ selected_output = np.pad(
+ selected_output,
+ ((pad_height, pad_height), (pad_width, pad_width), (0, 0)),
+ mode="constant",
+ constant_values=0,
+ )
+
+ jpg_quality = (
+ self.viewer.config.jpeg_quality
+ if static_render
+ else 75
+ if self.viewer.render_tab_state.preview_render
+ else 40
+ )
self.client.set_background_image(
- selected_output.cpu().numpy(),
+ selected_output,
format=self.viewer.config.image_format,
jpeg_quality=jpg_quality,
depth=depth,
diff --git a/nerfstudio/viewer_beta/utils.py b/nerfstudio/viewer_beta/utils.py
index 3614fce419..8b9e237938 100644
--- a/nerfstudio/viewer_beta/utils.py
+++ b/nerfstudio/viewer_beta/utils.py
@@ -15,7 +15,7 @@
from __future__ import annotations
from dataclasses import dataclass
-from typing import Any, List, Optional, Tuple, Union
+from typing import Any, List, Literal, Optional, Tuple, Union
import numpy as np
import torch
@@ -31,11 +31,13 @@ class CameraState:
"""A dataclass for storing the camera state."""
fov: float
- """ The field of view of the camera. """
+ """The field of view of the camera."""
aspect: float
- """ The aspect ratio of the image. """
+ """The aspect ratio of the image. """
c2w: Float[torch.Tensor, "3 4"]
- """ The camera matrix. """
+ """The camera matrix."""
+ camera_type: Literal[CameraType.PERSPECTIVE, CameraType.EQUIRECTANGULAR, CameraType.FISHEYE]
+ """Type of camera to render."""
def get_camera(
@@ -57,14 +59,19 @@ def get_camera(
focal_length = pp_h / np.tan(fov / 2.0)
intrinsics_matrix = torch.tensor([[focal_length, 0, pp_w], [0, focal_length, pp_h], [0, 0, 1]], dtype=torch.float32)
- camera_type = CameraType.PERSPECTIVE
+ if camera_state.camera_type is CameraType.EQUIRECTANGULAR:
+ fx = float(image_width / 2)
+ fy = float(image_height)
+ else:
+ fx = intrinsics_matrix[0, 0]
+ fy = intrinsics_matrix[1, 1]
camera = Cameras(
- fx=intrinsics_matrix[0, 0],
- fy=intrinsics_matrix[1, 1],
+ fx=fx,
+ fy=fy,
cx=pp_w,
cy=pp_h,
- camera_type=camera_type,
+ camera_type=camera_state.camera_type,
camera_to_worlds=camera_state.c2w.to(torch.float32)[None, ...],
times=torch.tensor([0.0], dtype=torch.float32),
)
diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py
index 2b6f4e140d..0c560f9d86 100644
--- a/nerfstudio/viewer_beta/viewer.py
+++ b/nerfstudio/viewer_beta/viewer.py
@@ -26,8 +26,8 @@
import viser
import viser.theme
import viser.transforms as vtf
-
from nerfstudio.cameras.camera_optimizers import CameraOptimizer
+from nerfstudio.cameras.cameras import CameraType
from nerfstudio.configs import base_config as cfg
from nerfstudio.data.datasets.base_dataset import InputDataset
from nerfstudio.models.base_model import Model
@@ -41,6 +41,7 @@
from nerfstudio.viewer_beta.render_state_machine import RenderAction, RenderStateMachine
from nerfstudio.viewer_beta.utils import CameraState, parse_object
from nerfstudio.viewer_beta.viewer_elements import ViewerControl, ViewerElement
+from typing_extensions import assert_never
if TYPE_CHECKING:
from nerfstudio.engine.trainer import Trainer
@@ -80,6 +81,7 @@ def __init__(
train_lock: Optional[threading.Lock] = None,
share: bool = False,
):
+ self.ready = False # Set to True at end of constructor.
self.config = config
self.trainer = trainer
self.last_step = 0
@@ -182,15 +184,16 @@ def __init__(
self.viser_server,
self.include_time,
VISER_NERFSTUDIO_SCALE_RATIO,
- self._interrupt_render,
- self._crop_params_update,
+ self._trigger_rerender,
self._output_type_change,
self._output_split_type_change,
default_composite_depth=self.config.default_composite_depth,
)
config_path = self.log_filename.parents[0] / "config.yml"
with tabs.add_tab("Render", viser.Icon.CAMERA):
- populate_render_tab(self.viser_server, config_path, self.datapath, self.control_panel)
+ self.render_tab_state = populate_render_tab(
+ self.viser_server, config_path, self.datapath, self.control_panel
+ )
with tabs.add_tab("Export", viser.Icon.PACKAGE_EXPORT):
populate_export_tab(self.viser_server, self.control_panel, config_path)
@@ -203,7 +206,7 @@ def nested_folder_install(folder_labels: List[str], prev_labels: List[str], elem
element.install(self.viser_server)
# also rewire the hook to rerender
prev_cb = element.cb_hook
- element.cb_hook = lambda element: [prev_cb(element), self._interrupt_render(element)]
+ element.cb_hook = lambda element: [prev_cb(element), self._trigger_rerender()]
else:
# recursively create folders
# If the folder name is "Custom Elements/a/b", then:
@@ -237,6 +240,8 @@ def nested_folder_install(folder_labels: List[str], prev_labels: List[str], elem
for c in self.viewer_controls:
c._setup(self)
+ self.ready = True
+
def toggle_pause_button(self) -> None:
self.pause_train.visible = not self.pause_train.visible
self.resume_train.visible = not self.resume_train.visible
@@ -266,7 +271,27 @@ def get_camera_state(self, client: viser.ClientHandle) -> CameraState:
R = torch.tensor(R.as_matrix())
pos = torch.tensor(client.camera.position, dtype=torch.float64) / VISER_NERFSTUDIO_SCALE_RATIO
c2w = torch.concatenate([R, pos[:, None]], dim=1)
- camera_state = CameraState(fov=client.camera.fov, aspect=client.camera.aspect, c2w=c2w)
+ if self.ready and self.render_tab_state.preview_render:
+ camera_type = self.render_tab_state.preview_camera_type
+ camera_state = CameraState(
+ fov=self.render_tab_state.preview_fov,
+ aspect=self.render_tab_state.preview_aspect,
+ c2w=c2w,
+ camera_type=CameraType.PERSPECTIVE
+ if camera_type == "Perspective"
+ else CameraType.FISHEYE
+ if camera_type == "Fisheye"
+ else CameraType.EQUIRECTANGULAR
+ if camera_type == "Equirectangular"
+ else assert_never(camera_type),
+ )
+ else:
+ camera_state = CameraState(
+ fov=client.camera.fov,
+ aspect=client.camera.aspect,
+ c2w=c2w,
+ camera_type=CameraType.PERSPECTIVE,
+ )
return camera_state
def handle_disconnect(self, client: viser.ClientHandle) -> None:
@@ -278,7 +303,9 @@ def handle_new_client(self, client: viser.ClientHandle) -> None:
self.render_statemachines[client.client_id].start()
@client.camera.on_update
- def _(cam: viser.CameraHandle) -> None:
+ def _(_: viser.CameraHandle) -> None:
+ if not self.ready:
+ return
self.last_move_time = time.time()
with self.viser_server.atomic():
camera_state = self.get_camera_state(client)
@@ -314,13 +341,14 @@ def update_camera_poses(self):
self.camera_handles[key].position = c2w[:3, 3] * VISER_NERFSTUDIO_SCALE_RATIO
self.camera_handles[key].wxyz = R.wxyz
- def _interrupt_render(self, _) -> None:
+ def _trigger_rerender(self) -> None:
"""Interrupt current render."""
+ if not self.ready:
+ return
clients = self.viser_server.get_clients()
for id in clients:
camera_state = self.get_camera_state(clients[id])
- if camera_state is not None:
- self.render_statemachines[id].action(RenderAction("rerender", camera_state))
+ self.render_statemachines[id].action(RenderAction("move", camera_state))
def _toggle_training_state(self, _) -> None:
"""Toggle the trainer's training state."""
@@ -330,14 +358,6 @@ def _toggle_training_state(self, _) -> None:
elif self.trainer.training_state == "paused":
self.trainer.training_state = "training"
- def _crop_params_update(self, _) -> None:
- """Update crop parameters"""
- clients = self.viser_server.get_clients()
- for id in clients:
- camera_state = self.get_camera_state(clients[id])
- if camera_state is not None:
- self.render_statemachines[id].action(RenderAction("move", camera_state))
-
def _output_type_change(self, _):
self.output_type_changed = True
diff --git a/nerfstudio/viewer_beta/viewer_elements.py b/nerfstudio/viewer_beta/viewer_elements.py
index e8de855e0d..b53da0535a 100644
--- a/nerfstudio/viewer_beta/viewer_elements.py
+++ b/nerfstudio/viewer_beta/viewer_elements.py
@@ -35,7 +35,7 @@
ViserServer,
)
-from nerfstudio.cameras.cameras import Cameras
+from nerfstudio.cameras.cameras import Cameras, CameraType
from nerfstudio.viewer_beta.utils import CameraState, get_camera
if TYPE_CHECKING:
@@ -141,7 +141,9 @@ def get_camera(self, img_height: int, img_width: int, client_id: Optional[int] =
R = torch.tensor(R.as_matrix())
pos = torch.tensor(client.camera.position, dtype=torch.float64) / VISER_NERFSTUDIO_SCALE_RATIO
c2w = torch.concatenate([R, pos[:, None]], dim=1)
- camera_state = CameraState(fov=client.camera.fov, aspect=client.camera.aspect, c2w=c2w)
+ camera_state = CameraState(
+ fov=client.camera.fov, aspect=client.camera.aspect, c2w=c2w, camera_type=CameraType.PERSPECTIVE
+ )
return get_camera(camera_state, img_height, img_width)
def register_click_cb(self, cb: Callable):
@@ -483,7 +485,11 @@ def __init__(
def _create_gui_handle(self, viser_server: ViserServer) -> None:
assert self.gui_handle is None, "gui_handle should be initialized once"
self.gui_handle = viser_server.add_gui_dropdown(
- self.name, self.options, self.default_value, disabled=self.disabled, hint=self.hint # type: ignore
+ self.name,
+ self.options,
+ self.default_value,
+ disabled=self.disabled,
+ hint=self.hint, # type: ignore
)
def set_options(self, new_options: List[TString]) -> None:
diff --git a/pyproject.toml b/pyproject.toml
index fd058f9e87..cd971dd6f4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,7 +56,7 @@ dependencies = [
"torchvision>=0.14.1",
"torchmetrics[image]>=1.0.1",
"typing_extensions>=4.4.0",
- "viser==0.1.14",
+ "viser==0.1.17",
"nuscenes-devkit>=1.1.1",
"wandb>=0.13.3",
"xatlas",
From 38fc72729cc86d3e0d11c2df16f69f5da603b2d8 Mon Sep 17 00:00:00 2001
From: Abhik Ahuja
Date: Tue, 26 Dec 2023 17:07:08 -0800
Subject: [PATCH 100/101] Fix dycheck depth and rgb rescale (#2623)
Fix depth and rgb rescale
Not sure if depth is correct since I'm not sure what the 3rd dim is for, the code runs properly though.
---
nerfstudio/data/dataparsers/dycheck_dataparser.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/nerfstudio/data/dataparsers/dycheck_dataparser.py b/nerfstudio/data/dataparsers/dycheck_dataparser.py
index d9d4c8f4c6..9af4f58001 100644
--- a/nerfstudio/data/dataparsers/dycheck_dataparser.py
+++ b/nerfstudio/data/dataparsers/dycheck_dataparser.py
@@ -173,7 +173,10 @@ def _rescale_depth(depth_raw: np.ndarray, cam: Dict) -> np.ndarray:
viewdirs /= np.linalg.norm(viewdirs, axis=-1, keepdims=True)
viewdirs = viewdirs.reshape((*batch_shape, 3))
cosa = viewdirs @ (cam["camera_to_worlds"][:, 2])
- depth = depth_raw / cosa[..., None]
+ if depth_raw.ndim == cosa.ndim:
+ depth = depth_raw[..., None] / cosa[..., None]
+ else:
+ depth = depth_raw / cosa[..., None]
return depth
From a8e6f8fa3fd6c0ad2f3e681dcf1519e74ad2230f Mon Sep 17 00:00:00 2001
From: Cyrus Vachha
Date: Wed, 27 Dec 2023 21:19:19 -0800
Subject: [PATCH 101/101] Adding docs for Instruct-GS2GS (#2702)
* Adding docs for Instruct-GS2GS
Adding documentation for Instruct-GS2GS including its page and adding it to the list of external and 3rd party methods.
* fixing blank formatting dev checks
* updating subtitle
Updating the subtitle to specify gaussian splatting
---
docs/index.md | 1 +
docs/nerfology/methods/igs2gs.md | 103 +++++++++++++++++++++++++
docs/nerfology/methods/index.md | 3 +-
nerfstudio/configs/external_methods.py | 13 ++++
4 files changed, 119 insertions(+), 1 deletion(-)
create mode 100644 docs/nerfology/methods/igs2gs.md
diff --git a/docs/index.md b/docs/index.md
index 6b160fcecc..3815860a87 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -151,6 +151,7 @@ This documentation is organized into 3 parts:
- [Nerfbusters](nerfology/methods/nerfbusters.md): Removing Ghostly Artifacts from Casually Captured NeRFs
- [NeRFPlayer](nerfology/methods/nerfplayer.md): 4D Radiance Fields by Streaming Feature Channels
- [Tetra-NeRF](nerfology/methods/tetranerf.md): Representing Neural Radiance Fields Using Tetrahedra
+- [Instruct-GS2GS](nerfology/methods/igs2gs.md): Editing 3DGS Scenes with Instructions
**Eager to contribute a method?** We'd love to see you use nerfstudio in implementing new (or even existing) methods! Please view our {ref}`guide` for more details about how to add to this list!
diff --git a/docs/nerfology/methods/igs2gs.md b/docs/nerfology/methods/igs2gs.md
new file mode 100644
index 0000000000..0b60486fdf
--- /dev/null
+++ b/docs/nerfology/methods/igs2gs.md
@@ -0,0 +1,103 @@
+# Instruct-GS2GS
+
+
Editing Gaussian Splatting Scenes with Instructions
+
+```{button-link} https://instruct-gs2gs.github.io/
+:color: primary
+:outline:
+Paper Website
+```
+
+```{button-link} https://github.com/cvachha/instruct-gs2gs
+:color: primary
+:outline:
+Code
+```
+
+
+
+**Instruct-GS2GS enables instruction-based editing of 3D Gaussian Splatting scenes via a 2D diffusion model**
+
+## Installation
+
+First install nerfstudio dependencies. Then run:
+
+```bash
+pip install git+https://github.com/cvachha/instruct-gs2gs
+cd instruct-gs2gs
+pip install --upgrade pip setuptools
+pip install -e .
+```
+
+## Running Instruct-GS2GS
+
+Details for running Instruct-GS2GS (built with Nerfstudio!) can be found [here](https://github.com/cvachha/instruct-gs2gs). Once installed, run:
+
+```bash
+ns-train igs2gs --help
+```
+
+| Method | Description | Memory |
+| ------------ | ---------------------------- | ------ |
+| `igs2gs` | Full model, used in paper | ~15GB |
+
+Datasets need to be processed with COLMAP for Gaussian Splatting support.
+
+Once you have trained your GS scene for 20k iterations, the checkpoints will be saved to the `outputs` directory. Copy the path to the `nerfstudio_models` folder. (Note: We noticed that training for 20k iterations rather than 30k seemed to run more reliably)
+
+To start training for editing the GS, run the following command:
+
+```bash
+ns-train igs2gs --data {PROCESSED_DATA_DIR} --load-dir {outputs/.../nerfstudio_models} --pipeline.prompt {"prompt"} --pipeline.guidance-scale 12.5 --pipeline.image-guidance-scale 1.5
+```
+
+The `{PROCESSED_DATA_DIR}` must be the same path as used in training the original GS. Using the CLI commands, you can choose the prompt and the guidance scales used for InstructPix2Pix.
+
+## Method
+
+### Overview
+
+Instruct-GS2GS is a method for editing 3D Gaussian Splatting (3DGS) scenes with text instructions in a method based on [Instruct-NeRF2NeRF](https://instruct-nerf2nerf.github.io/). Given a 3DGS scene of a scene and the collection of images used to reconstruct it, this method uses an image-conditioned diffusion model ([InstructPix2Pix](https://www.timothybrooks.com/instruct-pix2pix)) to iteratively edit the input images while optimizing the underlying scene, resulting in an optimized 3D scene that respects the edit instruction. The paper demonstrates that our proposed method is able to edit large-scale, real-world scenes, and is able to accomplish realistic and targeted edits.
+
+
+## Pipeline
+
+
+
+This section will walk through each component of the Instruct-GS2GS method.
+
+### How it Works
+
+Instruct-GS2GS gradually updates a reconstructed Gaussian Splatting scene by iteratively updating the dataset images while training the 3DGS:
+
+1. Images are rendered from the scene at all training viewpoints.
+2. They get edited by InstructPix2Pix given a global text instruction.
+3. The training dataset images are replaced with the edited images.
+4. The 3DGS continues training as usual for 2.5k iterations.
+
+### Editing Images with InstructPix2Pix
+
+To update a dataset image from a given viewpoint, Instruct-GS2GS takes the original, unedited training image as image conditioning and uses the global text instruction as text conditioning. This process mixes the information of the diffusion model, which attempts to edit the image, the current 3D structure of the 3DGS, and view-consistent information from the unedited, ground-truth images. By combining this set of information, the edit is respected while maintaining 3D consistency.
+
+The code snippet for how an image is edited in the pipeline can be found [here](https://github.com/cvachha/instruct-gs2gs/blob/main/igs2gs/ip2p.py).
+
+### Iterative Dataset Update and Implementation
+
+The method takes in a dataset of camera poses and training images, a trained 3DGS scene, and a user-specified text-prompt instruction, e.g. βmake him a marble statueβ. Instruct-GS2GS constructs the edited GS scene guided by the text-prompt by applying a 2D text and image conditioned diffusion model, in this case Instruct-Pix2Pix, to all training images over the course of training. It performs these edits using an iterative udpate scheme in which all training dataset images are updated using a diffusion model individually, for sequential iterations spanning the size of the training images, every 2.5k training iterations. This process allows the GS to have a holistic edit and maintain 3D consistency.
+
+The process is similar to Instruct-NeRF2NeRF where for a given training camera view, it sets the original training image as the conditioning image, the noisy image input as the GS rendered from the camera combined with some randomly selected noise, and receives an edited image respecting the text conditioning. With this method, it is able to propagate the edited changes to the GS scene. The method is able to maintain grounded edits by conditioning Instruct-Pix2Pix on the original unedited training image.
+
+This method uses Nerfstudioβs gsplat library for our underlying gaussian splatting model. We adapt similar parameters for the diffusion model from Instruct-NeRF2NeRF. Among these are the values that define the amount of noise (and therefore the amount signal retained from the original images). We vary the classifier-free guidance scales per edit and scene, using a range of values. We edit the entire dataset and then train the scene for 2.5k iterations. For GS training, we use L1 and LPIPS losses. We train our method for a maximum of 27.5k iterations (starting with a GS scene trained for 20k iterations). However, in practice we stop training once the edit has converged. In many cases, the optimal training length is a subjective decision β a user may prefer more subtle or more extreme edits that are best found at different stages of training.
+
+
+## Results
+
+For results, view the [project page](https://instruct-gs2gs.github.io/)!
+
+
\ No newline at end of file
diff --git a/docs/nerfology/methods/index.md b/docs/nerfology/methods/index.md
index f1d4bd5e83..0cc68432d8 100644
--- a/docs/nerfology/methods/index.md
+++ b/docs/nerfology/methods/index.md
@@ -38,6 +38,7 @@ The following methods are supported in nerfstudio:
Tetra-NeRF
TensoRF
Generfacto
+ Instruct-GS2GS
```
(own_method_docs)=
@@ -50,7 +51,7 @@ We also welcome additions to the list of methods above. To do this, simply creat
1. Add a markdown file describing the model to the `docs/nerfology/methods` folder
2. Update the above list of implement methods in this file.
-3. Add the method to the {ref}`this` list in `docs/index.md`.
+3. Add the method to {ref}`this` list in `docs/index.md`.
4. Add a new `ExternalMethod` entry to the `nerfstudio/configs/external_methods.py` file.
For the method description, please refer to the [Instruct-NeRF2NeRF](in2n) page as an example of the layout. Please try to include the following information:
diff --git a/nerfstudio/configs/external_methods.py b/nerfstudio/configs/external_methods.py
index ce4e7bbc2b..72a679066f 100644
--- a/nerfstudio/configs/external_methods.py
+++ b/nerfstudio/configs/external_methods.py
@@ -139,6 +139,19 @@ class ExternalMethod:
)
)
+# Instruct-GS2GS
+external_methods.append(
+ ExternalMethod(
+ """[bold yellow]Instruct-GS2GS[/bold yellow]
+For more information visit: https://docs.nerf.studio/nerfology/methods/igs2gs.html
+
+To enable Instruct-GS2GS, you must install it first by running:
+ [grey]pip install git+https://github.com/cvachha/instruct-gs2gs[/grey]""",
+ configurations=[("igs2gs", "Instruct-GS2GS. Full model, used in paper")],
+ pip_package="git+https://github.com/cvachha/instruct-gs2gs",
+ )
+)
+
@dataclass
class ExternalMethodTrainerConfig(TrainerConfig):