Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Acceleration Structure Conversion #790

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion include/nbl/asset/IAccelerationStructure.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ class ITopLevelAccelerationStructure : public AccelerationStructure
PREFER_FAST_BUILD_BIT = 0x1u<<3u,
LOW_MEMORY_BIT = 0x1u<<4u,
// Synthetic flag we use to indicate `VkAccelerationStructureGeometryInstancesDataKHR::arrayOfPointers`
INSTANCE_DATA_IS_POINTERS_TYPE_ENCODED_LSB = 0x1u<<5u,
INSTANCE_DATA_IS_POINTERS_TYPE_ENCODED_LSB = 0x1u<<5u, // this flag really shouldn't be settable outside of `video::IGPU`
// Provided by VK_NV_ray_tracing_motion_blur, but we always override and deduce from creation flag because of
// https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-VkAccelerationStructureBuildGeometryInfoKHR-dstAccelerationStructure-04927
//MOTION_BIT = 0x1u<<5u,
Expand Down
26 changes: 13 additions & 13 deletions include/nbl/asset/ICPUAccelerationStructure.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class ICPUBottomLevelAccelerationStructure final : public IBottomLevelAccelerati
return {m_geometryPrimitiveCount->begin(),m_geometryPrimitiveCount->end()};
return {};
}
inline std::span<const uint32_t> getGeometryPrimitiveCounts(const size_t geomIx) const
inline std::span<const uint32_t> getGeometryPrimitiveCounts() const
{
if (m_geometryPrimitiveCount)
return {m_geometryPrimitiveCount->begin(),m_geometryPrimitiveCount->end()};
Expand Down Expand Up @@ -79,25 +79,25 @@ class ICPUBottomLevelAccelerationStructure final : public IBottomLevelAccelerati
{
if (!isMutable())
return false;
m_buildFlags &= BUILD_FLAGS::GEOMETRY_TYPE_IS_AABB_BIT;
m_buildFlags &= ~BUILD_FLAGS::GEOMETRY_TYPE_IS_AABB_BIT;
m_geometryPrimitiveCount = std::move(ranges);
m_triangleGeoms = std::move(geometries);
m_AABBGeoms = nullptr;
return true;
}

//
inline core::SRange<AABBs<asset::ICPUBuffer>> getAABBGeometries()
inline std::span<AABBs<asset::ICPUBuffer>> getAABBGeometries()
{
if (!isMutable() || !m_AABBGeoms)
return {nullptr,nullptr};
return {m_AABBGeoms->begin(),m_AABBGeoms->end()};
return {m_AABBGeoms->data(),m_AABBGeoms->size()};
}
inline core::SRange<const AABBs<asset::ICPUBuffer>> getAABBGeometries() const
inline std::span<const AABBs<asset::ICPUBuffer>> getAABBGeometries() const
{
if (!m_AABBGeoms)
return {nullptr,nullptr};
return {m_AABBGeoms->begin(),m_AABBGeoms->end()};
return {m_AABBGeoms->data(),m_AABBGeoms->size()};
}
inline bool setGeometries(core::smart_refctd_dynamic_array<AABBs<ICPUBuffer>>&& geometries, core::smart_refctd_dynamic_array<uint32_t>&& ranges)
{
Expand Down Expand Up @@ -337,17 +337,17 @@ class ICPUTopLevelAccelerationStructure final : public ITopLevelAccelerationStru
std::variant<StaticInstance,MatrixMotionInstance,SRTMotionInstance> instance = StaticInstance{};
};

core::SRange<PolymorphicInstance> getInstances()
std::span<PolymorphicInstance> getInstances()
{
if (!isMutable() || !m_instances)
return {nullptr,nullptr};
return {m_instances->begin(),m_instances->end()};
return {};
return {m_instances->data(),m_instances->size()};
}
core::SRange<const PolymorphicInstance> getInstances() const
std::span<const PolymorphicInstance> getInstances() const
{
if (!m_instances)
return {nullptr,nullptr};
return {m_instances->begin(),m_instances->end()};
return {};
return {m_instances->data(),m_instances->size()};
}
bool setInstances(core::smart_refctd_dynamic_array<PolymorphicInstance>&& _instances)
{
Expand All @@ -367,7 +367,7 @@ class ICPUTopLevelAccelerationStructure final : public ITopLevelAccelerationStru
}

//!
constexpr static inline auto AssetType = ET_BOTOM_LEVEL_ACCELERATION_STRUCTURE;
constexpr static inline auto AssetType = ET_TOP_LEVEL_ACCELERATION_STRUCTURE;
inline IAsset::E_TYPE getAssetType() const override { return AssetType; }

inline core::smart_refctd_ptr<IAsset> clone(uint32_t _depth = ~0u) const override
Expand Down
6 changes: 3 additions & 3 deletions include/nbl/video/ILogicalDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
// Create a sampler object to use with ImageViews
virtual core::smart_refctd_ptr<IGPUSampler> createSampler(const IGPUSampler::SParams& _params) = 0;
// acceleration structures
inline core::smart_refctd_ptr<IGPUBottomLevelAccelerationStructure> createBottomLevelAccelerationStructure(IGPUAccelerationStructure::SCreationParams&& params)
inline core::smart_refctd_ptr<IGPUBottomLevelAccelerationStructure> createBottomLevelAccelerationStructure(IGPUBottomLevelAccelerationStructure::SCreationParams&& params)
{
if (invalidCreationParams(params))
{
Expand Down Expand Up @@ -402,7 +402,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
inline AccelerationStructureBuildSizes getAccelerationStructureBuildSizes(
const core::bitflag<IGPUBottomLevelAccelerationStructure::BUILD_FLAGS> flags,
const bool motionBlur,
const std::span<Geometry> geometries,
const std::span<const Geometry> geometries,
const uint32_t* const pMaxPrimitiveCounts
) const
{
Expand All @@ -412,7 +412,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
return {};
}

if (!IGPUBottomLevelAccelerationStructure::validBuildFlags(flags, m_enabledFeatures))
if (!IGPUBottomLevelAccelerationStructure::validBuildFlags(flags,m_enabledFeatures))
{
NBL_LOG_ERROR("Invalid build flags");
return {};
Expand Down
127 changes: 122 additions & 5 deletions include/nbl/video/utilities/CAssetConverter.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ class CAssetConverter : public core::IReferenceCounted
asset::ICPUSampler,
asset::ICPUShader,
asset::ICPUBuffer,
// acceleration structures,
asset::ICPUBottomLevelAccelerationStructure,
asset::ICPUTopLevelAccelerationStructure,
asset::ICPUImage,
asset::ICPUBufferView,
asset::ICPUImageView,
Expand Down Expand Up @@ -71,6 +72,14 @@ class CAssetConverter : public core::IReferenceCounted
{
if (!params.valid())
return nullptr;
#ifndef _NBL_DEBUG
if (!params.optimizer)
{
using pass_e = asset::ISPIRVOptimizer::E_OPTIMIZER_PASS;
// shall we do others?
params.optimizer = core::make_smart_rectd_ptr<asset::ISPIRVOptimizer>({EOP_STRIP_DEBUG_INFO});
}
#endif
return core::smart_refctd_ptr<CAssetConverter>(new CAssetConverter(std::move(params)),core::dont_grab);
}
// When getting dependents, the creation parameters of GPU objects will be produced and patched appropriately.
Expand Down Expand Up @@ -149,6 +158,75 @@ class CAssetConverter : public core::IReferenceCounted
return {true,retval};
}
};
struct NBL_API2 acceleration_structure_patch_base
{
public:
enum class BuildPreference : uint8_t
{
None = 0,
FastTrace = 1,
FastBuild = 2,
Invalid = 3
};

//! select build flags
uint8_t allowUpdate : 1 = false;
uint8_t allowCompaction : 1 = false;
uint8_t allowDataAccess : 1 = false;
BuildPreference preference : 2 = BuildPreference::Invalid;
uint8_t lowMemory : 1 = false;
//! things that control the build
uint8_t hostBuild : 1 = false;
uint8_t compactAfterBuild : 1 = false;

protected:
bool valid(const ILogicalDevice* device);

template<typename CRTP>
std::pair<bool,CRTP> combine_impl(const CRTP& _this, const CRTP& other) const
{
if (_this.preference!=other.preference || _this.preference==BuildPreference::Invalid)
return {false,_this};
CRTP retval = _this;
retval.allowUpdate |= other.allowUpdate;
retval.allowCompaction |= other.allowCompaction;
retval.allowDataAccess |= other.allowDataAccess;
retval.lowMemory |= other.lowMemory;
retval.hostBuild |= other.hostBuild;
retval.compactAfterBuild |= other.compactAfterBuild;
return {true,retval};
}
};
template<>
struct NBL_API2 patch_impl_t<asset::ICPUBottomLevelAccelerationStructure> : acceleration_structure_patch_base
{
public:
PATCH_IMPL_BOILERPLATE(asset::ICPUBottomLevelAccelerationStructure);

using build_flags_t = asset::ICPUBottomLevelAccelerationStructure::BUILD_FLAGS;
core::bitflag<build_flags_t> getBuildFlags(const asset::ICPUBottomLevelAccelerationStructure* blas) const;

protected:
inline std::pair<bool,this_t> combine(const this_t& other) const
{
return combine_impl<this_t>(*this,other);
}
};
template<>
struct NBL_API2 patch_impl_t<asset::ICPUTopLevelAccelerationStructure> : acceleration_structure_patch_base
{
public:
PATCH_IMPL_BOILERPLATE(asset::ICPUTopLevelAccelerationStructure);

using build_flags_t = asset::ICPUTopLevelAccelerationStructure::BUILD_FLAGS;
core::bitflag<build_flags_t> getBuildFlags(const asset::ICPUTopLevelAccelerationStructure* tlas) const;

protected:
inline std::pair<bool,this_t> combine(const this_t& other) const
{
return combine_impl<this_t>(*this,other);
}
};
template<>
struct NBL_API2 patch_impl_t<asset::ICPUImage>
{
Expand Down Expand Up @@ -458,6 +536,8 @@ class CAssetConverter : public core::IReferenceCounted
virtual const patch_t<asset::ICPUSampler>* operator()(const lookup_t<asset::ICPUSampler>&) const = 0;
virtual const patch_t<asset::ICPUShader>* operator()(const lookup_t<asset::ICPUShader>&) const = 0;
virtual const patch_t<asset::ICPUBuffer>* operator()(const lookup_t<asset::ICPUBuffer>&) const = 0;
virtual const patch_t<asset::ICPUBottomLevelAccelerationStructure>* operator()(const lookup_t<asset::ICPUBottomLevelAccelerationStructure>&) const = 0;
virtual const patch_t<asset::ICPUTopLevelAccelerationStructure>* operator()(const lookup_t<asset::ICPUTopLevelAccelerationStructure>&) const = 0;
virtual const patch_t<asset::ICPUImage>* operator()(const lookup_t<asset::ICPUImage>&) const = 0;
virtual const patch_t<asset::ICPUBufferView>* operator()(const lookup_t<asset::ICPUBufferView>&) const = 0;
virtual const patch_t<asset::ICPUImageView>* operator()(const lookup_t<asset::ICPUImageView>&) const = 0;
Expand Down Expand Up @@ -577,6 +657,8 @@ class CAssetConverter : public core::IReferenceCounted
bool operator()(lookup_t<asset::ICPUSampler>);
bool operator()(lookup_t<asset::ICPUShader>);
bool operator()(lookup_t<asset::ICPUBuffer>);
bool operator()(lookup_t<asset::ICPUBottomLevelAccelerationStructure>);
bool operator()(lookup_t<asset::ICPUTopLevelAccelerationStructure>);
bool operator()(lookup_t<asset::ICPUImage>);
bool operator()(lookup_t<asset::ICPUBufferView>);
bool operator()(lookup_t<asset::ICPUImageView>);
Expand Down Expand Up @@ -717,6 +799,16 @@ class CAssetConverter : public core::IReferenceCounted
return {};
}

// this a weird signature, but its for an acceleration structure backing IGPUBuffer
virtual inline std::span<const uint32_t> getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUBottomLevelAccelerationStructure* blas, const patch_t<asset::ICPUBottomLevelAccelerationStructure>& patch) const
{
return {};
}
virtual inline std::span<const uint32_t> getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUTopLevelAccelerationStructure* tlas, const patch_t<asset::ICPUTopLevelAccelerationStructure>& patch) const
{
return {};
}

virtual inline std::span<const uint32_t> getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUImage* buffer, const patch_t<asset::ICPUImage>& patch) const
{
return {};
Expand Down Expand Up @@ -793,6 +885,7 @@ class CAssetConverter : public core::IReferenceCounted
{
// By default the last to queue to touch a GPU object will own it after any transfer or compute operations are complete.
// If you want to record a pipeline barrier that will release ownership to another family, override this.
// The overload for the IGPUBuffer may be called with a hash belonging to a Acceleration Structure, this means that its the storage buffer backing the AS
virtual inline uint32_t getFinalOwnerQueueFamily(const IGPUBuffer* buffer, const core::blake3_hash_t& createdFrom)
{
return IQueue::FamilyIgnored;
Expand Down Expand Up @@ -829,6 +922,10 @@ class CAssetConverter : public core::IReferenceCounted
IUtilities* utilities = nullptr;
// optional, last submit (compute, transfer if no compute needed) signals these in addition to the scratch semaphore
std::span<const IQueue::SSubmitInfo::SSemaphoreInfo> extraSignalSemaphores = {};
// specific to Acceleration Structure Build, they need to be at least as large as the largest amount of scratch required for an AS build
CAsyncSingleBufferSubAllocatorST<>* scratchForASBuild = nullptr;
//
IDeviceMemoryAllocator* compactedASAllocator = nullptr;
// specific to mip-map recomputation, these are okay defaults for the size of our Descriptor Indexed temporary descriptor set
uint32_t sampledImageBindingCount = 1<<10;
uint32_t storageImageBindingCount = 11<<10;
Expand All @@ -853,6 +950,13 @@ class CAssetConverter : public core::IReferenceCounted
// https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/vkCmdCopyBufferToImage.html#VUID-vkCmdCopyBufferToImage-commandBuffer-07739
inline core::bitflag<IQueue::FAMILY_FLAGS> getRequiredQueueFlags() const {return m_queueFlags;}

// just enough memory to build the Acceleration Structures one by one waiting for each build to complete inbetween
inline uint64_t getMinASBuildScratchSize() const {return m_minASBuildScratchSize;}
// enough memory to build and compact the all Acceleration Structures at once, obviously respecting order of BLAS (build->compact) -> TLAS (build->compact)
inline uint64_t getMaxASBuildScratchSize() const {return m_maxASBuildScratchSize;}
// if returns NONE means there are no acceleration structures to build
inline auto getASBuildScratchUsages() const {return m_ASBuildScratchUsages;}

//
inline operator bool() const {return bool(m_converter);}

Expand Down Expand Up @@ -917,19 +1021,32 @@ class CAssetConverter : public core::IReferenceCounted
core::smart_refctd_ptr<const AssetType> canonical;
// gpu object to transfer canonical's data to or build it from
asset_traits<AssetType>::video_t* gpuObj;
// only relevant for images
uint16_t recomputeMips = 0;
union
{
// only relevant for images
uint16_t recomputeMips = 0;
//
struct ASBuildParams
{
uint8_t host : 1;
uint8_t compact : 1;
} asBuildParams;
};
};
template<asset::Asset AssetType>
using conversion_requests_t = core::vector<ConversionRequest<AssetType>>;
using convertible_asset_types = core::type_list<
asset::ICPUBuffer,
asset::ICPUImage/*,
asset::ICPUImage,
asset::ICPUBottomLevelAccelerationStructure,
asset::ICPUTopLevelAccelerationStructure*/
asset::ICPUTopLevelAccelerationStructure
>;
core::tuple_transform_t<conversion_requests_t,convertible_asset_types> m_conversionRequests;

//
uint64_t m_minASBuildScratchSize = 0;
uint64_t m_maxASBuildScratchSize = 0;
core::bitflag<IGPUBuffer::E_USAGE_FLAGS> m_ASBuildScratchUsages = IGPUBuffer::E_USAGE_FLAGS::EUF_NONE;
//
core::bitflag<IQueue::FAMILY_FLAGS> m_queueFlags = IQueue::FAMILY_FLAGS::NONE;
};
Expand Down
Loading