-
Notifications
You must be signed in to change notification settings - Fork 62
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Maths for HLSL BxDFs (template cmath, tgmath) #803
base: master
Are you sure you want to change the base?
Conversation
namespace impl | ||
{ | ||
struct bitFields // need template? | ||
{ | ||
using this_t = bitFields; | ||
|
||
static this_t create(uint32_t base, uint32_t value, uint32_t offset, uint32_t count) | ||
{ | ||
this_t retval; | ||
retval.base = base; | ||
retval.value = value; | ||
retval.offset = offset; | ||
retval.count = count; | ||
return retval; | ||
} | ||
|
||
uint32_t __insert() | ||
{ | ||
const uint32_t shifted_masked_value = (value & ((0x1u << count) - 1u)) << offset; | ||
const uint32_t lo = base & ((0x1u << offset) - 1u); | ||
const uint32_t hi = base ^ lo; | ||
return (hi << count) | shifted_masked_value | lo; | ||
} | ||
|
||
uint32_t __overwrite() | ||
{ | ||
#ifdef __HLSL_VERSION | ||
return spirv::bitFieldInsert<uint32_t>(base, value, offset, count); | ||
#else | ||
// TODO: double check implementation | ||
const uint32_t shifted_masked_value = ~(0xffffffffu << count) << offset; | ||
base &= ~shifted_masked_value; | ||
return base | (value << offset); | ||
#endif | ||
} | ||
|
||
uint32_t base; | ||
uint32_t value; | ||
uint32_t offset; | ||
uint32_t count; | ||
}; | ||
} | ||
|
||
uint32_t bitFieldOverwrite(uint32_t base, uint32_t value, uint32_t offset, uint32_t count) | ||
{ | ||
impl::bitFields b = impl::bitFields::create(base, value, offset, count); | ||
return b.__overwrite(); | ||
} | ||
|
||
uint32_t bitFieldInsert(uint32_t base, uint32_t value, uint32_t offset, uint32_t count) | ||
{ | ||
impl::bitFields b = impl::bitFields::create(base, value, offset, count); | ||
return b.__insert(); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
but we have glm and spirv for this ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The situation with bitfieldInsert
is confusing to me. This mostly reflects the GLSL version we have:
Nabla/include/nbl/builtin/glsl/math/functions.glsl
Lines 263 to 277 in c7bdd2a
uint nbl_glsl_bitfieldOverwrite(in uint base, in uint value, in uint offset, in uint count) | |
{ | |
return bitfieldInsert(base,value,int(offset),int(count)); | |
} | |
uint nbl_glsl_bitfieldInsert_impl(in uint base, in uint shifted_masked_value, in uint lo, in uint count) | |
{ | |
const uint hi = base^lo; | |
return (hi<<count)|shifted_masked_value|lo; | |
} | |
uint nbl_glsl_bitfieldInsert(in uint base, uint value, in uint offset, in uint count) | |
{ | |
const uint shifted_masked_value = (value&((0x1u<<count)-1u))<<offset; | |
return nbl_glsl_bitfieldInsert_impl(base,shifted_masked_value,base&((0x1u<<offset)-1u),count); | |
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But there's also these ones in glsl_compat/core.hlsl
:
Nabla/include/nbl/builtin/hlsl/glsl_compat/core.hlsl
Lines 210 to 226 in c7bdd2a
template<typename T> | |
T bitfieldExtract( T val, uint32_t offsetBits, uint32_t numBits ) | |
{ | |
return impl::bitfieldExtract<T, is_signed<T>::value, is_integral<T>::value>::__call(val,offsetBits,numBits); | |
} | |
template<typename T> | |
T bitfieldInsert(T base, T insert, uint32_t offset, uint32_t bits) | |
{ | |
return spirv::bitFieldInsert<T>(base, insert, offset, bits); | |
} | |
template<typename T> | |
T bitfieldReverse(T value) | |
{ | |
return spirv::bitFieldReverse<T>(value); | |
} |
So maybe I can remove this one? But then there's no bitfieldOverwrite
and I don't know if they do the same thing as the GLSL version.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Przemog1 is responsible here, but in general:
There's should be the SPIR-V intrinsic in spirv
namespace which provides this GLSL builtin to HLSL.
Then there should be an implementation in C++ which does the same thing, its very likely that glm
already has one.
And finally a nbl::hlsl::
namespace function which calls the SPIR-V or C++ depending on the language __HLSL_VERSION
do not remove anything, coordinate with Przemek.
namespace impl | ||
{ | ||
template<typename T, uint16_t M, uint16_t N, uint16_t P> | ||
struct applyChainRule4D | ||
{ | ||
static matrix<T, P, M> __call(matrix<T, N, M> dFdG, matrix<T, P, N> dGdR) | ||
{ | ||
#ifdef __HLSL_VERSION | ||
return mul(dFdG, dGdR); | ||
#else | ||
return dFdG * dGdR; // glm | ||
#endif | ||
} | ||
}; | ||
|
||
template<typename T, uint16_t M, uint16_t N> | ||
struct applyChainRule3D : applyChainRule4D<T,M,N,1> | ||
{ | ||
static vector<T, N> __call(matrix<T, N, M> dFdG, vector<T, N> dGdR) | ||
{ | ||
#ifdef __HLSL_VERSION | ||
return mul(dFdG, dGdR); | ||
#else | ||
return dFdG * dGdR; // glm | ||
#endif | ||
} | ||
}; | ||
|
||
template<typename T, uint16_t M> | ||
struct applyChainRule2D : applyChainRule4D<T,M,1,1> | ||
{ | ||
static vector<T, M> __call(vector<T, M> dFdG, T dGdR) | ||
{ | ||
#ifdef __HLSL_VERSION | ||
return mul(dFdG, dGdR); | ||
#else | ||
return dFdG * dGdR; // glm | ||
#endif | ||
} | ||
}; | ||
|
||
template<typename T> | ||
struct applyChainRule1D : applyChainRule4D<T,1,1,1> | ||
{ | ||
static T __call(T dFdG, T dGdR) | ||
{ | ||
return dFdG * dGdR; | ||
} | ||
}; | ||
} | ||
|
||
// possible to derive M,N,P automatically? | ||
template<typename T, uint16_t M, uint16_t N, uint16_t P NBL_FUNC_REQUIRES(is_scalar_v<T> && M>1 && N>1 && P>1) | ||
matrix<T, P, M> applyChainRule(matrix<T, N, M> dFdG, matrix<T, P, N> dGdR) | ||
{ | ||
return impl::applyChainRule4D<T,M,N,P>::__call(dFdG, dGdR); | ||
} | ||
|
||
template<typename T, uint16_t M, uint16_t N NBL_FUNC_REQUIRES(is_scalar_v<T> && M>1 && N>1) | ||
vector<T, N> applyChainRule(matrix<T, N, M> dFdG, vector<T, N> dGdR) | ||
{ | ||
return impl::applyChainRule3D<T,M,N>::__call(dFdG, dGdR); | ||
} | ||
|
||
template<typename T, uint16_t M NBL_FUNC_REQUIRES(is_scalar_v<T> && M>1) | ||
vector<T, M> applyChainRule(vector<T, M> dFdG, T dGdR) | ||
{ | ||
return impl::applyChainRule2D<T,M>::__call(dFdG, dGdR); | ||
} | ||
|
||
template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T>) | ||
T applyChainRule(T dFdG, T dGdR) | ||
{ | ||
return impl::applyChainRule1D<T>::__call(dFdG, dGdR); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought this wa doable through just a simple
template<typename T, uint16_t M, uint16_t N, uint16_t P NBL_FUNC_REQUIRES(is_scalar_v<T> && M>1 && N>1 && P>1)
matrix<T,M,P> applyChainRule(matrix<T,N,M> dFdG, matrix<T,M,P> dGdR)
{
return mul(dFdG,dGdR);
}
which just call mul
remember GLSL was silly and had NxM on the matrix names backwards as MxN
Also in HLSL you can have 1x or x1 matrix types, in GLSL you couldn't so you can remove the overloads with vector
and the >1
dimension requirements
https://godbolt.org/z/qoTj5xPMG
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GLM uses *
for matrix multiplications, doesn't it?
Could change to use mul
in #804
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also don't need partial spec?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GLM uses
*
for matrix multiplications, doesn't it? Could change to usemul
in #804
we have our own mul
now, for both C++ and HLSL
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also don't need partial spec?
There's no custom/specialized behaviour to enable, the only thing we could do is maybe allow a different Matrix
type.
But that can be noted as a TODO in a comment.
T rcpOrientedEta; | ||
T rcpOrientedEta2; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
stuff that deals with refractive indices definitely belongs in bxdf/fresnel
{ | ||
c = cos<T>(theta); | ||
s = sqrt<T>(1.0-c*c); | ||
s = (theta < 0.0) ? -s : s; // TODO: test with XOR |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
make that signflip
or negate
funuction that takes a bool, and then use it here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
there is flipSign
function in the hlsl/ieee754.hlsl
file
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
there is
flipSign
function in thehlsl/ieee754.hlsl
file
the flipSign
needs a bool arg to tell whether to flip or not
|
||
|
||
// valid only for `theta` in [-PI,PI] | ||
template <typename T NBL_FUNC_REQUIRES(is_scalar_v<T>) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you should change most is_scalar_v
into a bool concept that matches float scalars and also allows emulated_float64_t
Also scalar
matches integers and booleans, on which sin and cos don't make sense
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've created such concept already
https://github.com/Devsh-Graphics-Programming/Nabla/blob/maths_for_bxdfs_hlsl/include/nbl/builtin/hlsl/concepts/core.hlsl#L103-L105
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
however name of this function will be changed to with next commit FloatingPointLikeScalar
template <typename T NBL_FUNC_REQUIRES(is_scalar_v<T>) | ||
matrix<T, 3, 2> frisvad(vector<T, 3> n) // TODO: confirm dimensions of matrix |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
frankly I'd make it void frisvad(const T normal, NBL_REF_ARG(T) tangent, NBL_REF_ARG(T) bitangent)
requiring that vector_traits<T>::Dimension==3
the reason is that this way its more semantically sound, instead of trying to scratch your head what row/column of the matrix is what (in the original GLSL, the first column - the first vector you get with operator[]
or provide with the constructor was the tangent)
We had this conversation with @keptsecret on his thread and PR
Also https://github.com/Devsh-Graphics-Programming/Nabla/pull/803/files#r1904982026
return (n.z < -0.9999999) ? matrix<T, 2, 3>(vector<T, 3>(0.0,-1.0,0.0), vector<T, 3>(-1.0,0.0,0.0)) : | ||
matrix<T, 2, 3>(vector<T, 3>(1.0-n.x*n.x*a, b, -n.x), vector<T, 3>(b, 1.0-n.y*n.y*a, -n.y)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
also use an if instead of ?
we get no perf diff, but much better legibility (also the n.z
condition can be moved up)
// @ return abs(x) if cond==true, max(x,0.0) otherwise | ||
template <typename T NBL_FUNC_REQUIRES(is_scalar_v<T> || is_vector_v<T>) | ||
T conditionalAbsOrMax(bool cond, T x, T limit); | ||
|
||
template <> | ||
float conditionalAbsOrMax<float>(bool cond, float x, float limit) | ||
{ | ||
const float condAbs = asfloat(asuint(x) & uint(cond ? 0x7fFFffFFu : 0xffFFffFFu)); | ||
return max(condAbs,limit); | ||
} | ||
|
||
template <> | ||
float32_t2 conditionalAbsOrMax<float32_t2>(bool cond, float32_t2 x, float32_t2 limit) | ||
{ | ||
const float32_t2 condAbs = asfloat(asuint(x) & select(cond, (uint32_t2)0x7fFFffFFu, (uint32_t2)0xffFFffFFu)); | ||
return max(condAbs,limit); | ||
} | ||
|
||
template <> | ||
float32_t3 conditionalAbsOrMax<float32_t3>(bool cond, float32_t3 x, float32_t3 limit) | ||
{ | ||
const float32_t3 condAbs = asfloat(asuint(x) & select(cond, (uint32_t3)0x7fFFffFFu, (uint32_t3)0xffFFffFFu)); | ||
return max(condAbs,limit); | ||
} | ||
|
||
template <> | ||
float32_t4 conditionalAbsOrMax<float32_t4>(bool cond, float32_t4 x, float32_t4 limit) | ||
{ | ||
const float32_t4 condAbs = asfloat(asuint(x) & select(cond, (uint32_t4)0x7fFFffFFu, (uint32_t4)0xffFFffFFu)); | ||
return max(condAbs,limit); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Require that T
is floating point (matching emulated_float too), leave signed integer impl as a TODO
because the condAbs
is not constexpr, you can use our nbl::hlsl::bit_cast
you can use nbl::hlsl::promote
to turn a single scalar into a vector.
Also use mix
instead of select
then it should all work cross C++ and HLSL
template<typename T, uint32_t LP NBL_FUNC_REQUIRES(LP>0) | ||
scalar_type_t<T> lpNormPreroot(NBL_CONST_REF_ARG(T) v) | ||
{ | ||
return impl::lp_norm<T,LP>::__sum(v); | ||
} | ||
|
||
template<typename T, uint32_t LP> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you probably want to require that vector_traits<T>::scalar_type
is a float (including emulated float)
matrix<T, 3, 2> frisvad(vector<T, 3> n) // TODO: confirm dimensions of matrix | ||
matrix<T, 2, 3> frisvad(vector<T, 3> n) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
dont use matrix
its semantically confusing
template<typename FloatingPoint> | ||
[[vk::ext_instruction(GLSLstd450::GLSLstd450Pow, "GLSL.std.450")]] | ||
enable_if_t<is_floating_point<FloatingPoint>::value && !is_matrix_v<FloatingPoint>, FloatingPoint> pow(FloatingPoint lhs, FloatingPoint rhs); | ||
|
||
template<typename FloatingPoint> | ||
[[vk::ext_instruction(GLSLstd450::GLSLstd450Exp, "GLSL.std.450")]] | ||
enable_if_t<is_floating_point<FloatingPoint>::value && !is_matrix_v<FloatingPoint>, FloatingPoint> exp(FloatingPoint val); | ||
|
||
template<typename FloatingPoint> | ||
[[vk::ext_instruction(GLSLstd450::GLSLstd450Exp2, "GLSL.std.450")]] | ||
enable_if_t<is_floating_point<FloatingPoint>::value && !is_matrix_v<FloatingPoint>, FloatingPoint> exp2(FloatingPoint val); | ||
|
||
template<typename FloatingPoint> | ||
[[vk::ext_instruction(GLSLstd450::GLSLstd450Log, "GLSL.std.450")]] | ||
enable_if_t<is_floating_point<FloatingPoint>::value && !is_matrix_v<FloatingPoint>, FloatingPoint> log(FloatingPoint val); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pow, exp and log don't take float64 without an extension
also as per our discord conversation, we need 5 named concepts in spirv
namespace so the enable_if_t
are clearer and easier to compare with SPIR-V spec
inline bool isnan_uint_impl(UnsignedInteger val) | ||
{ | ||
using AsFloat = typename float_of_size<sizeof(UnsignedInteger)>::type; | ||
return bool((ieee754::extractBiasedExponent<UnsignedInteger>(val) == ieee754::traits<AsFloat>::specialValueExp) && (val & ieee754::traits<AsFloat>::mantissaMask)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
there's a faster check:
- AND with
(numeric_limits<UnsignedInteger>::max>>1)
- clear the sign mask - check for
> (specialValueExp<<mantissaBits)
template<typename FloatingPoint> | ||
NBL_PARTIAL_REQ_TOP(concepts::FloatingPointScalar<FloatingPoint>) | ||
struct isnan_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointScalar<FloatingPoint>) > | ||
{ | ||
static bool __call(NBL_CONST_REF_ARG(FloatingPoint) x) | ||
{ | ||
#ifdef __HLSL_VERSION | ||
return spirv::isNan<FloatingPoint>(x); | ||
#else | ||
// GCC and Clang will always return false with call to std::isnan when fast math is enabled, | ||
// this implementation will always return appropriate output regardless is fas math is enabled or not | ||
using AsUint = typename unsigned_integer_of_size<sizeof(FloatingPoint)>::type; | ||
return tgmath_impl::isnan_uint_impl(reinterpret_cast<const AsUint&>(x)); | ||
#endif | ||
} | ||
}; | ||
|
||
template<typename V> | ||
NBL_PARTIAL_REQ_TOP(concepts::Vectorial<V>) | ||
struct isnan_helper<V NBL_PARTIAL_REQ_BOT(concepts::Vectorial<V>) > | ||
{ | ||
using output_t = vector<bool, hlsl::vector_traits<V>::Dimension>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
as per the discord convom, its best if there's a whole separate SPIR-V specialization which is not predicated on a concept but if the SPIR-V intrinsic is callable/compiles with that type
and the C++ specialization can be just for a scalar.
So you have 2 specializations in two different parts of __HLSL_VERSION
block, instead of one that tries to reconcile incompatible concepts
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ideally I'd keep the functions without SPIR-V intrinsic equivalents in a separate file or near the bottom of this one
// ERF | ||
|
||
template<typename T NBL_STRUCT_CONSTRAINABLE> | ||
struct erf_helper; | ||
|
||
template<typename FloatingPoint> | ||
NBL_PARTIAL_REQ_TOP(concepts::FloatingPointScalar<FloatingPoint>) | ||
struct erf_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointScalar<FloatingPoint>) > |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the non-spirv forwarding functions can be done however you like, most sane is probably to do the per-channel broadcast specs as macros for quick partial spec
the Scalar constraint you have here is fine, but not for the potentially SPIR-V vector perfect forwarding ones above
const FloatingPoint a1 = 0.254829592; | ||
const FloatingPoint a2 = -0.284496736; | ||
const FloatingPoint a3 = 1.421413741; | ||
const FloatingPoint a4 = -1.453152027; | ||
const FloatingPoint a5 = 1.061405429; | ||
const FloatingPoint p = 0.3275911; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pay attention to 2 things:
- whether the constant literal has all the digits needed (float32 and float64 often have different digits
- to get true FP64 constants in HLSL you need to use a stupid suffix, there's actually an
NBL_
macro you need to place around them cause DXC is holding back on implementing explicit sized fp16, fp32, fp64 suffices on float literals
Also, usually an implementation of a complex math function is different for float16, float32 and float64 (different levels of polynomial approximation/coefficients and number of steps/iterations) so I'd have a separate partial spec in preprocessor blocks of `__HLSL_VERSION
|
…lated_float64_t stuff
…ics-Programming/Nabla into maths_for_bxdfs_hlsl
No description provided.