-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathp3a_simd.hpp
101 lines (85 loc) · 2.91 KB
/
p3a_simd.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#pragma once
#include <Kokkos_SIMD.hpp>
#include "p3a_functions.hpp"
#include "p3a_type_traits.hpp"
#include "p3a_functional.hpp"
#include "p3a_scalar.hpp"
namespace p3a {
using Kokkos::Experimental::simd;
using Kokkos::Experimental::simd_mask;
using Kokkos::Experimental::const_where_expression;
using Kokkos::Experimental::where_expression;
using Kokkos::Experimental::element_aligned_tag;
namespace simd_abi = Kokkos::Experimental::simd_abi;
using Kokkos::Experimental::native_simd;
using Kokkos::Experimental::native_simd_mask;
using Kokkos::Experimental::condition;
using Kokkos::Experimental::where;
template <class T>
using device_simd = Kokkos::Experimental::native_simd<T>;
template <class T>
using device_simd_mask = Kokkos::Experimental::native_simd_mask<T>;
template <class T, class U, class Abi>
[[nodiscard]] P3A_ALWAYS_INLINE P3A_HOST P3A_DEVICE inline
simd<T, Abi> load(T const* ptr, int i, simd_mask<U, Abi> const& mask)
{
simd<T, Abi> result;
where(simd_mask<T, Abi>(mask), result).copy_from(ptr + i, element_aligned_tag());
return result;
}
template <class T, class U, class Integral, class Abi>
[[nodiscard]] P3A_ALWAYS_INLINE P3A_HOST P3A_DEVICE inline
simd<T, Abi> load(T const* ptr, simd<Integral, Abi> const& indices, simd_mask<U, Abi> const& mask)
{
simd<T, Abi> result;
where(simd_mask<T, Abi>(mask), result).gather_from(ptr, indices);
return result;
}
template <class T, class U, class Abi>
P3A_ALWAYS_INLINE P3A_HOST P3A_DEVICE inline
void store(simd<T, Abi> const& value, T* ptr, int i, simd_mask<U, Abi> const& mask)
{
where(simd_mask<T, Abi>(mask), value).copy_to(ptr + i, element_aligned_tag());
}
template <class T, class U, class Integral, class Abi>
P3A_ALWAYS_INLINE P3A_HOST P3A_DEVICE inline
void store(simd<T, Abi> const& value, T* ptr, simd<Integral, Abi> const& indices, simd_mask<U, Abi> const& mask)
{
where(simd_mask<T, Abi>(mask), value).scatter_to(ptr, indices);
}
template<class M, class V, class T>
[[nodiscard]] P3A_ALWAYS_INLINE P3A_HOST P3A_DEVICE inline
typename V::value_type
reduce(
const_where_expression<M, V> const& x,
typename V::value_type identity_element,
maximizer<T> binary_op)
{
return Kokkos::Experimental::hmax(x);
}
template<class M, class V, class T>
[[nodiscard]] P3A_ALWAYS_INLINE P3A_HOST P3A_DEVICE inline
typename V::value_type
reduce(
const_where_expression<M, V> const& x,
typename V::value_type identity_element,
minimizer<T> binary_op)
{
return Kokkos::Experimental::hmin(x);
}
template<class M, class V>
[[nodiscard]] P3A_ALWAYS_INLINE P3A_HOST P3A_DEVICE inline
typename V::value_type reduce(
const_where_expression<M, V> const& x,
typename V::value_type identity_element,
adder<typename V::value_type> binary_op)
{
return Kokkos::Experimental::reduce(x, identity_element, std::plus<>());
}
namespace details {
template <class T, class Abi>
struct is_scalar<simd<T, Abi>> {
inline static constexpr bool value = true;
};
}
}