Skip to content

Commit

Permalink
Fix OutputWeight nodes when locally constant
Browse files Browse the repository at this point in the history
  • Loading branch information
Zylann committed Aug 22, 2024
1 parent c40d320 commit a76509d
Show file tree
Hide file tree
Showing 5 changed files with 158 additions and 9 deletions.
48 changes: 39 additions & 9 deletions generators/graph/voxel_generator_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,25 +142,52 @@ void VoxelGeneratorGraph::gather_indices_and_weights(
// TODO Optimization: exclude up-front outputs that are known to be zero?
// So we choose the cases below based on non-zero outputs instead of total output count

// It is unfortunately not practical to handle all combinations of single-value/array-of-values for every
// layer, so for now we try to workaround that
struct WeightBufferArray {
FixedArray<Span<const float>, 16> array;

inline float get_weight(unsigned int output_index, size_t value_index) const {
Span<const float> s = array[output_index];
#ifdef DEV_ENABLED
// That span can either have size 1 (means all values would be the same) or have regular buffer
// size. We use min() to quickly access that without branching, but that could hide actual OOB bugs, so
// make sure they are detected
ZN_ASSERT(s.size() > 0 && (value_index < s.size() || s.size() == 1));
#endif
const float weight = s[math::min(value_index, s.size() - 1)];
return weight;
}
};

// TODO Could maybe put this part outside?
FixedArray<Span<const float>, 16> buffers;
WeightBufferArray buffers;
const unsigned int buffers_count = weight_outputs.size();
for (unsigned int oi = 0; oi < buffers_count; ++oi) {
const WeightOutput &info = weight_outputs[oi];
const pg::Runtime::Buffer &buffer = state.get_buffer(info.output_buffer_index);
buffers[oi] = Span<const float>(buffer.data, buffer.size);
const math::Interval &range = state.get_range_const_ref(info.output_buffer_index);

if (range.is_single_value()) {
// The weight is locally constant (or compile-time constant).
// We can't use the usual buffer because if we use optimized execution mapping, they won't be filled by any
// operation
buffers.array[oi] = Span<const float>(&range.min, 1);
} else {
const pg::Runtime::Buffer &buffer = state.get_buffer(info.output_buffer_index);
buffers.array[oi] = Span<const float>(buffer.data, buffer.size);
}
}

if (buffers_count <= 4) {
// Pick all results and fill with spare indices to keep semantic
unsigned int value_index = 0;
size_t value_index = 0;
for (int rz = rmin.z; rz < rmax.z; ++rz) {
for (int rx = rmin.x; rx < rmax.x; ++rx) {
FixedArray<uint8_t, 4> weights;
FixedArray<uint8_t, 4> indices = spare_indices;
fill(weights, uint8_t(0));
for (unsigned int oi = 0; oi < buffers_count; ++oi) {
const float weight = buffers[oi][value_index];
const float weight = buffers.get_weight(oi, value_index);
// TODO Optimization: weight output nodes could already multiply by 255 and clamp afterward
// so we would not need to do it here
weights[oi] = math::clamp(weight * 255.f, 0.f, 255.f);
Expand All @@ -180,13 +207,13 @@ void VoxelGeneratorGraph::gather_indices_and_weights(

} else if (buffers_count == 4) {
// Pick all results
unsigned int value_index = 0;
size_t value_index = 0;
for (int rz = rmin.z; rz < rmax.z; ++rz) {
for (int rx = rmin.x; rx < rmax.x; ++rx) {
FixedArray<uint8_t, 4> weights;
FixedArray<uint8_t, 4> indices;
for (unsigned int oi = 0; oi < buffers_count; ++oi) {
const float weight = buffers[oi][value_index];
const float weight = buffers.get_weight(oi, value_index);
weights[oi] = math::clamp(weight * 255.f, 0.f, 255.f);
indices[oi] = weight_outputs[oi].layer_index;
}
Expand All @@ -204,7 +231,7 @@ void VoxelGeneratorGraph::gather_indices_and_weights(
} else {
// More weights than we can have per voxel. Will need to pick most represented weights
const float pivot = 1.f / 5.f;
unsigned int value_index = 0;
size_t value_index = 0;
FixedArray<uint8_t, 16> skipped_outputs;
for (int rz = rmin.z; rz < rmax.z; ++rz) {
for (int rx = rmin.x; rx < rmax.x; ++rx) {
Expand All @@ -219,7 +246,8 @@ void VoxelGeneratorGraph::gather_indices_and_weights(
unsigned int recorded_weights = 0;
// Pick up weights above pivot (this is not as correct as a sort but faster)
for (unsigned int oi = 0; oi < buffers_count && recorded_weights < indices.size(); ++oi) {
const float weight = buffers[oi][value_index];
const float weight = buffers.get_weight(oi, value_index);

if (weight > pivot) {
weights[recorded_weights] = math::clamp(weight * 255.f, 0.f, 255.f);
indices[recorded_weights] = weight_outputs[oi].layer_index;
Expand Down Expand Up @@ -648,6 +676,8 @@ VoxelGenerator::Result VoxelGeneratorGraph::generate_block(VoxelGenerator::Voxel
if (runtime_ptr->weight_outputs_count > 0 && !sdf_is_air) {
// We can skip this when SDF is air because there won't be any matter to give a texture to
// TODO Range analysis on that?
// Not easy to do that from here, they would have to ALL be locally constant in order to use a
// short-circuit...
for (unsigned int i = 0; i < runtime_ptr->weight_outputs_count; ++i) {
required_outputs.push_back(runtime_ptr->weight_output_indices[i]);
}
Expand Down
10 changes: 10 additions & 0 deletions generators/graph/voxel_graph_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,16 @@ class Runtime {
return ranges[address];
}

inline const math::Interval &get_range_const_ref(uint16_t address) const {
// TODO Just for convenience because STL bound checks aren't working in Godot 3
ZN_ASSERT(address < buffers.size());
return ranges[address];
}

inline uint32_t get_buffer_size() const {
return buffer_size;
}

void clear() {
buffer_size = 0;
// buffer_capacity = 0;
Expand Down
1 change: 1 addition & 0 deletions tests/tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ void run_voxel_tests() {
VOXEL_TEST(test_voxel_stream_sqlite_key_blob80_encoding);
VOXEL_TEST(test_voxel_stream_sqlite_basic);
VOXEL_TEST(test_voxel_stream_sqlite_coordinate_format);
VOXEL_TEST(test_voxel_graph_4_default_weights);

print_line("------------ Voxel tests end -------------");
}
Expand Down
107 changes: 107 additions & 0 deletions tests/voxel/test_voxel_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2222,4 +2222,111 @@ void test_voxel_graph_non_square_image() {
ZN_TEST_ASSERT(sd.f > 2.9f && sd.f < 3.1);
}

void test_voxel_graph_4_default_weights() { // Related to issue #686
static constexpr uint32_t block_size = 16;

struct L {
// The idea is to generate data into the memory cache, which may affect the result if there is a garbage buffer
// bug
static void warmup() {
Ref<VoxelGeneratorGraph> warmup_generator;
warmup_generator.instantiate();

VoxelGraphFunction &g = **warmup_generator->get_main_function();

// X --- Sin --- Add --- OutSDF
// /
// Y

const uint32_t n_x = g.create_node(VoxelGraphFunction::NODE_INPUT_X);
const uint32_t n_y = g.create_node(VoxelGraphFunction::NODE_INPUT_Y);
const uint32_t n_sin = g.create_node(VoxelGraphFunction::NODE_SIN);
const uint32_t n_add = g.create_node(VoxelGraphFunction::NODE_ADD);
const uint32_t n_out_sdf = g.create_node(VoxelGraphFunction::NODE_OUTPUT_SDF);

g.add_connection(n_x, 0, n_sin, 0);
g.add_connection(n_sin, 0, n_add, 0);
g.add_connection(n_add, 0, n_out_sdf, 0);
g.add_connection(n_y, 0, n_add, 1);

CompilationResult result = warmup_generator->compile(false);
ZN_TEST_ASSERT(result.success);

VoxelBuffer buffer(VoxelBuffer::ALLOCATOR_DEFAULT);
buffer.create(Vector3iUtil::create(block_size));

VoxelGenerator::VoxelQueryData q{ buffer, Vector3i(0, 0, 0), 0 };
warmup_generator->generate_block(q);
}

static void test(const float test_w0, const float test_w1, const float test_w2, const float test_w3) {
const uint8_t test_w0b = math::clamp(static_cast<int>(255.0 * test_w0), 0, 255);
const uint8_t test_w1b = math::clamp(static_cast<int>(255.0 * test_w1), 0, 255);
const uint8_t test_w2b = math::clamp(static_cast<int>(255.0 * test_w2), 0, 255);
const uint8_t test_w3b = math::clamp(static_cast<int>(255.0 * test_w3), 0, 255);
const uint32_t test_ew = encode_weights_to_packed_u16_lossy(test_w0b, test_w1b, test_w2b, test_w3b);

Ref<VoxelGeneratorGraph> generator;
generator.instantiate();
{
VoxelGraphFunction &g = **generator->get_main_function();

// Y --- Plane --- OutSDF
// OutWeight1
// OutWeight2
// OutWeight3
// OutWeight4

const uint32_t n_y = g.create_node(VoxelGraphFunction::NODE_INPUT_Y);
const uint32_t n_plane = g.create_node(VoxelGraphFunction::NODE_SDF_PLANE);
const uint32_t n_ow0 = g.create_node(VoxelGraphFunction::NODE_OUTPUT_WEIGHT);
const uint32_t n_ow1 = g.create_node(VoxelGraphFunction::NODE_OUTPUT_WEIGHT);
const uint32_t n_ow2 = g.create_node(VoxelGraphFunction::NODE_OUTPUT_WEIGHT);
const uint32_t n_ow3 = g.create_node(VoxelGraphFunction::NODE_OUTPUT_WEIGHT);
// Putting this one last can make bugs show up. Unusual setups increase entropy.
const uint32_t n_out_sdf = g.create_node(VoxelGraphFunction::NODE_OUTPUT_SDF);

g.set_node_default_input(n_plane, 1, 1.0);

g.set_node_default_input(n_ow0, 0, test_w0);
g.set_node_default_input(n_ow1, 0, test_w1);
g.set_node_default_input(n_ow2, 0, test_w2);
g.set_node_default_input(n_ow3, 0, test_w3);

g.set_node_param(n_ow0, 0, 0);
g.set_node_param(n_ow1, 0, 1);
g.set_node_param(n_ow2, 0, 2);
g.set_node_param(n_ow3, 0, 3);

g.add_connection(n_y, 0, n_plane, 0);
g.add_connection(n_plane, 0, n_out_sdf, 0);

CompilationResult result = generator->compile(false);
ZN_TEST_ASSERT(result.success);
}

VoxelBuffer buffer(VoxelBuffer::ALLOCATOR_DEFAULT);
buffer.create(Vector3iUtil::create(block_size));

VoxelGenerator::VoxelQueryData q{ buffer, Vector3i(0, 0, 0), 0 };
// generator->set_use_optimized_execution_map(false);
generator->generate_block(q);

const uint32_t ei = buffer.get_voxel(10, 0, 0, VoxelBuffer::CHANNEL_INDICES);
const uint32_t ew = buffer.get_voxel(10, 0, 0, VoxelBuffer::CHANNEL_WEIGHTS);

const FixedArray<uint8_t, 4> indices = decode_indices_from_packed_u16(ei);
const FixedArray<uint8_t, 4> weights = decode_weights_from_packed_u16(ew);

ZN_TEST_ASSERT(indices[0] == 0 && indices[1] == 1 && indices[2] == 2 && indices[3] == 3);
ZN_TEST_ASSERT(ew == test_ew);
}
};

// L::warmup();
L::test(0.5, 0.2, 0.4, 0.8);
L::test(0.5, 0.0, 0.25, 1.0);
L::test(0.5, 0.2, 0.4, 0.8);
}

} // namespace zylann::voxel::tests
1 change: 1 addition & 0 deletions tests/voxel/test_voxel_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ void test_voxel_graph_many_weight_outputs();
void test_image_range_grid();
void test_voxel_graph_many_subdivisions();
void test_voxel_graph_non_square_image();
void test_voxel_graph_4_default_weights();

} // namespace zylann::voxel::tests

Expand Down

0 comments on commit a76509d

Please sign in to comment.