Skip to content

Commit

Permalink
Change: Move FORCEINLINE macro into util header
Browse files Browse the repository at this point in the history
  • Loading branch information
spnda committed Jun 22, 2024
1 parent 0420436 commit b670b7c
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 21 deletions.
8 changes: 4 additions & 4 deletions include/fastgltf/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -671,17 +671,17 @@ namespace fastgltf::math {
}

/** Returns the column vector at the given index. */
[[nodiscard]] constexpr decltype(auto) operator[](std::size_t idx) noexcept {
[[nodiscard]] FASTGLTF_FORCEINLINE constexpr decltype(auto) operator[](std::size_t idx) noexcept {
return col(idx);
}
[[nodiscard]] constexpr decltype(auto) operator[](std::size_t idx) const noexcept {
[[nodiscard]] FASTGLTF_FORCEINLINE constexpr decltype(auto) operator[](std::size_t idx) const noexcept {
return col(idx);
}

[[nodiscard]] constexpr decltype(auto) col(std::size_t idx) noexcept {
[[nodiscard]] FASTGLTF_FORCEINLINE constexpr decltype(auto) col(std::size_t idx) noexcept {
return _data[idx];
}
[[nodiscard]] constexpr decltype(auto) col(std::size_t idx) const noexcept {
[[nodiscard]] FASTGLTF_FORCEINLINE constexpr decltype(auto) col(std::size_t idx) const noexcept {
return _data[idx];
}

Expand Down
10 changes: 10 additions & 0 deletions include/fastgltf/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,16 @@
#define FASTGLTF_INTRINSIC
#endif

#if defined(_MSC_VER)
#define FASTGLTF_FORCEINLINE __forceinline
#elif defined(__GNUC__) || defined(__clang__)
#define FASTGLTF_FORCEINLINE [[gnu::always_inline]] inline
#else
// On other compilers we need the inline specifier, so that the functions in this compilation unit
// can be properly inlined without the "function body can be overwritten at link time" error.
#define FASTGLTF_FORCEINLINE inline
#endif

#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 5030) // attribute 'x' is not recognized
Expand Down
24 changes: 7 additions & 17 deletions src/base64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,16 +61,6 @@

namespace fg = fastgltf;

#if defined(_MSC_VER)
#define FORCEINLINE __forceinline
#elif defined(__GNUC__) || defined(__clang__)
#define FORCEINLINE [[gnu::always_inline]] inline
#else
// On other compilers we need the inline specifier, so that the functions in this compilation unit
// can be properly inlined without the "function body can be overwritten at link time" error.
#define FORCEINLINE inline
#endif

namespace fastgltf::base64 {
using DecodeFunctionInplace = std::function<void(std::string_view, std::uint8_t*, std::size_t)>;
using DecodeFunction = std::function<fg::StaticVector<std::uint8_t>(std::string_view)>;
Expand Down Expand Up @@ -120,7 +110,7 @@ namespace fastgltf::base64 {
// The AVX and SSE decoding functions are based on http://0x80.pl/notesen/2016-01-17-sse-base64-decoding.html.
// It covers various methods of en-/decoding base64 using SSE and AVX and also shows their
// performance metrics.
[[gnu::target("avx2")]] FORCEINLINE auto avx2_lookup_pshufb_bitmask(const __m256i input) {
[[gnu::target("avx2")]] FASTGLTF_FORCEINLINE auto avx2_lookup_pshufb_bitmask(const __m256i input) {
const auto higher_nibble = _mm256_and_si256(_mm256_srli_epi32(input, 4), _mm256_set1_epi8(0x0f));

const auto shiftLUT = _mm256_setr_epi8(
Expand All @@ -137,7 +127,7 @@ namespace fastgltf::base64 {
return _mm256_add_epi8(input, shift);
}

[[gnu::target("avx2")]] FORCEINLINE auto avx2_pack_ints(__m256i input) {
[[gnu::target("avx2")]] FASTGLTF_FORCEINLINE auto avx2_pack_ints(__m256i input) {
const auto merge = _mm256_maddubs_epi16(input, _mm256_set1_epi32(0x01400140));
return _mm256_madd_epi16(merge, _mm256_set1_epi32(0x00011000));
}
Expand Down Expand Up @@ -204,7 +194,7 @@ namespace fastgltf::base64 {
return ret;
}

[[gnu::target("sse4.1")]] FORCEINLINE auto sse4_lookup_pshufb_bitmask(const __m128i input) {
[[gnu::target("sse4.1")]] FASTGLTF_FORCEINLINE auto sse4_lookup_pshufb_bitmask(const __m128i input) {
const auto higher_nibble = _mm_and_si128(_mm_srli_epi32(input, 4), _mm_set1_epi8(0x0f));

const auto shiftLUT = _mm_setr_epi8(
Expand All @@ -218,7 +208,7 @@ namespace fastgltf::base64 {
return _mm_add_epi8(input, shift);
}

[[gnu::target("sse4.1")]] FORCEINLINE auto sse4_pack_ints(__m128i input) {
[[gnu::target("sse4.1")]] FASTGLTF_FORCEINLINE auto sse4_pack_ints(__m128i input) {
const auto merge = _mm_maddubs_epi16(input, _mm_set1_epi32(0x01400140));
return _mm_madd_epi16(merge, _mm_set1_epi32(0x00011000));
}
Expand Down Expand Up @@ -279,7 +269,7 @@ namespace fastgltf::base64 {
return ret;
}
#elif defined(FASTGLTF_IS_A64)
FORCEINLINE int8x16_t neon_lookup_pshufb_bitmask(const uint8x16_t input) {
FASTGLTF_FORCEINLINE int8x16_t neon_lookup_pshufb_bitmask(const uint8x16_t input) {
// clang-format off
constexpr std::array<int8_t, 16> shiftLUTdata = {
0, 0, 19, 4, -65, -65, -71, -71,
Expand All @@ -298,7 +288,7 @@ FORCEINLINE int8x16_t neon_lookup_pshufb_bitmask(const uint8x16_t input) {
return vaddq_s8(input, shift);
}

FORCEINLINE int16x8_t neon_pack_ints(const int8x16_t input) {
FASTGLTF_FORCEINLINE int16x8_t neon_pack_ints(const int8x16_t input) {
const uint32x4_t mask = vdupq_n_u32(0x01400140);

const int16x8_t tl = vmulq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(input))), vmovl_s8(vget_low_s8(mask)));
Expand Down Expand Up @@ -387,7 +377,7 @@ static constexpr std::array<std::uint8_t, 128> base64lut = {

namespace fastgltf::base64 {
template <typename Output>
FORCEINLINE void decode_block(std::array<std::uint8_t, 4>& sixBitChars, Output output) {
FASTGLTF_FORCEINLINE void decode_block(std::array<std::uint8_t, 4>& sixBitChars, Output output) {
for (std::size_t i = 0; i < 4; i++) {
assert(static_cast<std::size_t>(sixBitChars[i]) < base64lut.size());
sixBitChars[i] = base64lut[sixBitChars[i]];
Expand Down

0 comments on commit b670b7c

Please sign in to comment.