-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathbittwiddle.h
149 lines (142 loc) · 3.94 KB
/
bittwiddle.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#pragma once
#include <type_traits>
#include <limits>
#include <algorithm>
#include "entry.h"
/// A fallback implementation with runtime detected optimization
template<typename T>
entry<T (*)(T)> popcounti;
/**
* Return count of set bits.
*
* @param v Integer which set bits are counted
* @return The number of set bits
*/
template<typename T>
static inline T popcount(T v)
{
// Native ALU width might be different than pointer width (e.g. x32)
#if __GNUC__
#if __WORDSIZE == 32
using native_t = uint32_t;
#else
using native_t = uint64_t;
#endif
#else
using native_t = unsigned intptr_t;
#endif
// Helper type to convert argument to same width unsigned type
using uT = typename std::make_unsigned<T>::type;
// Convert to unsigned without changes to bit representation.
uT uv = v;
// Optimize case when counting bits in a value which is wider than native
// register width. This is used on 32bit system when counting cards.
if (sizeof(v) > sizeof(native_t)) {
native_t rv = 0;
unsigned count = sizeof(v)/sizeof(native_t);
// Calculate shift width while avoiding warning for undefined behavior
// when shifting more than the width of type.
const unsigned shift = std::min(std::numeric_limits<native_t>::digits,
std::numeric_limits<uT>::digits - 1);
do {
rv += popcount(static_cast<native_t>(uv));
uv >>= shift;
} while(--count);
return rv;
}
#if !__POPCNT__
// Without instruction support call fallback function
return popcounti<uT>(uv);
#endif
#if __GNUC__
// Call type specific counting builtin to generate correct instructions
if (sizeof(v) == sizeof(long long))
return __builtin_popcountll(uv);
if (sizeof(v) == sizeof(long))
return __builtin_popcountl(uv);
if (sizeof(v) <= sizeof(int))
return __builtin_popcount(uv);
#else
#warning "No popcnt instruction support for your compiler"
return popcounti<uT>(uv);
#endif
}
/**
* Count trailing zero bits.
*
* @param v Integer which trailing zero bits are counted
* @return The number of trailing zero bits
*/
template<typename T>
static inline T ctz(T v)
{
using uT = typename std::make_unsigned<T>::type;
#if __GNUC__
assert(v != 0 && "ctz builtin requires non-zero value. "
"Instruction could be made easily work with zero but builtin doesn't support it.");
if (sizeof(v) == sizeof(long long))
return __builtin_ctzll(v);
if (sizeof(v) == sizeof(long))
return __builtin_ctzl(v);
if (sizeof(v) <= sizeof(int))
return __builtin_ctz(v);
#else
#warning "No count trailing zeros instruction support for your compiler"
#endif
uT uv = v;
unsigned depth = std::numeric_limits<uT>::digits/2;
unsigned position = depth;
// Binary search for zero bits
for (; depth > 1;) {
depth /= 2;
uT mask = (uT{1} << position) - 1;
if (uv & mask)
position -= depth;
else
position += depth;
}
uT mask = (uT{1} << position) - 1;
if (uv & mask)
position--;
return position;
}
/**
* Count leading zero bits
*
* @Param v Integer which leading zero bits are counted
* @return The number of leading zero bits
*/
template<typename T>
static inline T clz(T v)
{
using uT = typename std::make_unsigned<T>::type;
#if __GNUC__
assert(v != 0 && "ctz builtin requires non-zero value. "
"Instruction could be made easily work with zero but builtin doesn't support it.");
if (sizeof(v) == sizeof(long long))
return __builtin_clzll(v);
if (sizeof(v) == sizeof(long))
return __builtin_clzl(v);
if (sizeof(v) <= sizeof(int))
return __builtin_clz(v) -
(std::numeric_limits<unsigned>::digits - std::numeric_limits<uT>::digits);
#else
#warning "No count leading zeros instruction support for your compiler"
#endif
uT uv = v;
unsigned depth = std::numeric_limits<uT>::digits/2;
unsigned position = depth;
// Binary search for zero bits
for (; depth > 1;) {
depth /= 2;
uT mask = (uT{1} << position) - 1;
if (uv & ~mask)
position += depth;
else
position -= depth;
}
uT mask = (uT{1} << position) - 1;
if (uv & ~mask)
position++;
return std::numeric_limits<uT>::digits - position;
}