Skip to content

Commit

Permalink
Fix several integer-signedness warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
iskunk committed May 6, 2024
1 parent 033d9ef commit 21c0d6b
Show file tree
Hide file tree
Showing 15 changed files with 44 additions and 38 deletions.
2 changes: 1 addition & 1 deletion include/cute/numeric/integral_constant.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ template <class... Ts>
constexpr uint64_t parse_int_digits(uint64_t result, int digit, Ts... digits)
{
if constexpr (sizeof...(Ts) == 0) {
return 10 * result + digit;
return 10 * result + uint64_t(digit);
} else {
return parse_int_digits(10 * result + digit, digits...);
}
Expand Down
2 changes: 1 addition & 1 deletion include/cute/numeric/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ bit_width(T x) {
(numeric_limits<T>::digits == 8 ? 3 : (assert(false),0)))));
T r = 0;
for (int i = N - 1; i >= 0; --i) {
T shift = (x > ((T(1) << (T(1) << i))-1)) << i;
T shift = T(x > ((T(1) << (T(1) << i))-1)) << i;
x >>= shift;
r |= shift;
}
Expand Down
12 changes: 6 additions & 6 deletions include/cutlass/arch/mma_sm60.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ struct Mma<

#else
CUTLASS_PRAGMA_UNROLL
for (int i = 0; i < 2; ++i) {
for (size_t i = 0; i < 2; ++i) {
d[i] = a[i] * b[0] + c[i];
}
#endif
Expand Down Expand Up @@ -130,7 +130,7 @@ struct Mma<

#else
CUTLASS_PRAGMA_UNROLL
for (int i = 0; i < 2; ++i) {
for (size_t i = 0; i < 2; ++i) {
d[i] = a[0] * b[i] + c[i];
}
#endif
Expand Down Expand Up @@ -182,9 +182,9 @@ struct Mma <

#else
CUTLASS_PRAGMA_UNROLL
for (int j = 0; j < 2; ++j) {
for (size_t j = 0; j < 2; ++j) {
CUTLASS_PRAGMA_UNROLL
for (int i = 0; i < 2; ++i) {
for (size_t i = 0; i < 2; ++i) {
d[i + 2 * j] = a[i] * b[j] + c[i + 2 * j];
}
}
Expand Down Expand Up @@ -236,9 +236,9 @@ struct Mma<
D[1] = reinterpret_cast<Array<half_t, 2> &>(Dhi);
#else
CUTLASS_PRAGMA_UNROLL
for (int i = 0; i < 2; ++i) {
for (size_t i = 0; i < 2; ++i) {
CUTLASS_PRAGMA_UNROLL
for (int j = 0; j < 2; ++j) {
for (size_t j = 0; j < 2; ++j) {
d[i * 2 + j] = a[i] * b[j] + c[i * 2 + j];
}
}
Expand Down
4 changes: 2 additions & 2 deletions include/cutlass/arch/mma_sm61.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ struct Mma<
d[0] = c[0];

CUTLASS_PRAGMA_UNROLL
for (int k = 0; k < 4; ++k) {
for (size_t k = 0; k < 4; ++k) {
d[0] += a[k] * b[k];
}

Expand Down Expand Up @@ -129,7 +129,7 @@ struct Mma<
d[0] = c[0];

CUTLASS_PRAGMA_UNROLL
for (int k = 0; k < 2; ++k) {
for (size_t k = 0; k < 2; ++k) {
d[0] += a[k] * b[k];
}
#endif
Expand Down
8 changes: 4 additions & 4 deletions include/cutlass/epilogue/warp/tile_iterator_tensor_op_mixed.h
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ class TileIteratorTensorOpMixed<WarpShape_, OperatorShape_, int32_t, 32, OutputS
TensorRef const &ref,
unsigned lane_id
):
stride_(ref.stride()[0] / AccessType::kElements) {
stride_(size_t(ref.stride()[0]) / AccessType::kElements) {

int quad_id = (lane_id / Detail::kLanesInQuad);
int lane_in_quad = (lane_id % Detail::kLanesInQuad);
Expand Down Expand Up @@ -579,7 +579,7 @@ class TileIteratorTensorOpMixed<WarpShape_, OperatorShape_, int32_t, 32, OutputS
TensorRef const &ref,
unsigned lane_id
):
stride_(ref.stride()[0] / AccessType::kElements) {
stride_(size_t(ref.stride()[0]) / AccessType::kElements) {

int quad_id = (lane_id / Detail::kLanesInQuad);
int lane_in_quad = (lane_id % Detail::kLanesInQuad);
Expand Down Expand Up @@ -774,7 +774,7 @@ class TileIteratorTensorOpMixed<WarpShape_, OperatorShape_, float, 32, 8, 16, 8>
TensorRef const &ref,
unsigned lane_id
):
stride_(ref.stride()[0] / AccessType::kElements) {
stride_(size_t(ref.stride()[0]) / AccessType::kElements) {

int quad_id = (lane_id / Detail::kLanesInQuad);
int lane_in_quad = (lane_id % Detail::kLanesInQuad);
Expand Down Expand Up @@ -963,7 +963,7 @@ class TileIteratorTensorOpMixed<WarpShape_, OperatorShape_, float, 32, 8, 8, 8>
TensorRef const &ref,
unsigned lane_id
):
stride_(ref.stride()[0] / AccessType::kElements) {
stride_(size_t(ref.stride()[0]) / AccessType::kElements) {

int quad_id = (lane_id / Detail::kLanesInQuad);
int lane_in_quad = (lane_id % Detail::kLanesInQuad);
Expand Down
4 changes: 2 additions & 2 deletions include/cutlass/fast_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ CUTLASS_CONSTEXPR_IF_CXX17
value_t find_log2(value_t x) {
int a = int(31 - clz(x));
a += (x & (x - 1)) != 0; // Round up, add 1 if not a power of 2.
return a;
return value_t(a);
}


Expand Down Expand Up @@ -364,7 +364,7 @@ struct FastDivmod {
FastDivmod(int divisor): divisor(divisor) {

if (divisor != 1) {
unsigned int p = 31 + find_log2(divisor);
auto p = static_cast<unsigned int>(31 + find_log2(divisor));
unsigned m = unsigned(((1ull << p) + unsigned(divisor) - 1) / unsigned(divisor));

multiplier = m;
Expand Down
6 changes: 3 additions & 3 deletions include/cutlass/gemm/kernel/tile_scheduler_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -958,9 +958,9 @@ struct PersistentTileSchedulerSm90StreamKParams {
uint32_t epilogue_subtile = 1,
uint32_t num_accumulator_mtxs = 1) {

auto log_swizzle_size = UnderlyingParams::get_log_swizzle_size(problem_blocks.x, problem_blocks.y, max_swizzle);
problem_blocks.x = round_up(problem_blocks.x, (1 << log_swizzle_size) * cluster_shape.m());
problem_blocks.y = round_up(problem_blocks.y, (1 << log_swizzle_size) * cluster_shape.n());
auto log_swizzle_size = UnderlyingParams::get_log_swizzle_size(int(problem_blocks.x), int(problem_blocks.y), max_swizzle);
problem_blocks.x = unsigned(round_up(int(problem_blocks.x), (1 << log_swizzle_size) * cluster_shape.m()));
problem_blocks.y = unsigned(round_up(int(problem_blocks.y), (1 << log_swizzle_size) * cluster_shape.n()));

// Workspace is needed only for output tiles that will be split. Thus, we first determine the number
// of output tiles that will be split, and then calculate the workspace needed to cover these.
Expand Down
10 changes: 8 additions & 2 deletions include/cutlass/gemm/threadblock/threadblock_swizzle.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,10 @@ struct GemmIdentityThreadblockSwizzle {
CUTLASS_HOST_DEVICE
static dim3 get_grid_shape(GemmCoord tiled_shape) {
int tile = 1 << get_log_tile(tiled_shape);
return dim3(tiled_shape.m() * tile, (tiled_shape.n() + tile - 1) / tile, tiled_shape.k());
return dim3(
unsigned(tiled_shape.m() * tile),
unsigned((tiled_shape.n() + tile - 1) / tile),
unsigned(tiled_shape.k()));
}

/// Calculates optimal swizzle width
Expand Down Expand Up @@ -301,7 +304,10 @@ struct GemmSplitKIdentityThreadblockSwizzle {
CUTLASS_HOST_DEVICE
static dim3 get_grid_shape(GemmCoord tiled_shape) {
int tile = 1 << get_log_tile(tiled_shape);
return dim3(tiled_shape.m() * tile, (tiled_shape.n() + tile - 1) / tile, tiled_shape.k());
return dim3(
unsigned(tiled_shape.m() * tile),
unsigned((tiled_shape.n() + tile - 1) / tile),
unsigned(tiled_shape.k()));
}

/// Obtains the threadblock offset (in units of threadblock-scoped tiles)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -437,9 +437,9 @@ struct ThreadblockSwizzleStreamK {
batch_count);

size_t problem_bytes =
(element_C_bytes_ * problem_size.m() * problem_size.n()) +
(element_A_bytes_ * problem_size.m() * problem_size.k()) +
(element_B_bytes_ * problem_size.k() * problem_size.n());
(element_C_bytes_ * size_t(problem_size.m()) * size_t(problem_size.n())) +
(element_A_bytes_ * size_t(problem_size.m()) * size_t(problem_size.k())) +
(element_B_bytes_ * size_t(problem_size.k()) * size_t(problem_size.n()));

size_t problem_flops = size_t(problem_size.m()) * size_t(problem_size.n()) * size_t(problem_size.k()) * 2;

Expand Down
8 changes: 4 additions & 4 deletions include/cutlass/gemm/warp/mma_tensor_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ struct ConvertAndPack<bfloat16_t, float, N, Round> {
Array<float, N> tmp;

CUTLASS_PRAGMA_UNROLL
for (int i = 0; i < N; ++i) {
int idx = (((i << 1) & 2) | ((i >> 1) & 1) | (i & 0xfffffffc));
for (uint32_t i = 0; i < N; ++i) {
uint32_t idx = (((i << 1) & 2) | ((i >> 1) & 1) | (i & 0xfffffffc));
tmp[i] = source[idx];
}

Expand All @@ -120,8 +120,8 @@ struct ConvertAndPack<half_t, float, N, Round> {
Array<float, N> tmp;

CUTLASS_PRAGMA_UNROLL
for (int i = 0; i < N; ++i) {
int idx = (((i << 1) & 2) | ((i >> 1) & 1) | (i & 0xfffffffc));
for (uint32_t i = 0; i < N; ++i) {
uint32_t idx = (((i << 1) & 2) | ((i >> 1) & 1) | (i & 0xfffffffc));
tmp[i] = source[idx];
}

Expand Down
4 changes: 2 additions & 2 deletions include/cutlass/half.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ struct alignas(2) half_t {
#endif

uint16_t sign = uint16_t((s >> 16) & 0x8000);
int16_t exp = uint16_t(((s >> 23) & 0xff) - 127);
auto exp = int16_t(((s >> 23) & 0xff) - 127);
int mantissa = s & 0x7fffff;
uint16_t u = 0;

Expand All @@ -238,7 +238,7 @@ struct alignas(2) half_t {

if (exp >= -14) {
// normal fp32 to normal fp16
exp = uint16_t(exp + uint16_t(15));
exp = static_cast<decltype(exp)>(exp + 15);
u = uint16_t(((exp & 0x1f) << 10));
u = uint16_t(u | (mantissa >> 13));
} else {
Expand Down
8 changes: 4 additions & 4 deletions include/cutlass/layout/permute.h
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ class Tensor4DPermuteBMM0213RowMajor : public PermuteBase {
LongIndex operator()(MatrixCoord coord) const {

// The batch index for BMM
Index BMM_batch_idx = blockIdx.z;
auto BMM_batch_idx = Index(blockIdx.z);

// [i,j,k,l] -> [i,k,j,l]
Index l = coord.column();
Expand Down Expand Up @@ -381,7 +381,7 @@ class Tensor4DPermuteBMM0213RowMajorInverse : public PermuteBase {
LongIndex operator()(MatrixCoord coord) const {

// The batch index for BMM
Index BMM_batch_idx = blockIdx.z;
auto BMM_batch_idx = Index(blockIdx.z);

// The following assumes grouping [(D0)->batch, (D2)->row, (D1,D3)->col]
Index l = coord.column() % D3_;
Expand Down Expand Up @@ -453,7 +453,7 @@ class Tensor4DPermuteBMM0321ColumnMajor : public PermuteBase {
CUTLASS_HOST_DEVICE
LongIndex operator()(MatrixCoord coord) const {

Index BMM_batch_idx = blockIdx.z;
auto BMM_batch_idx = Index(blockIdx.z);

// [i,j,k,l] -> [i,k,j,l]
Index l = coord.column();
Expand Down Expand Up @@ -514,7 +514,7 @@ class Tensor4DPermuteBMM0321ColumnMajorInverse : public PermuteBase {
CUTLASS_HOST_DEVICE
LongIndex operator()(MatrixCoord coord) const {

Index BMM_batch_idx = blockIdx.z;
auto BMM_batch_idx = Index(blockIdx.z);

// The following assumes grouping [(D0)->batch, (D1,D2)->row, (D3)->col]
Index l = coord.column();
Expand Down
4 changes: 2 additions & 2 deletions include/cutlass/predicate_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,8 @@ struct PredicateVector {
int byte = (idx / kPredicatesPerByte);
int bit_offset = (idx % kPredicatesPerByte);

word = byte / sizeof(Storage);
int byte_offset = (byte % sizeof(Storage));
word = byte / int(sizeof(Storage));
int byte_offset = (byte % int(sizeof(Storage)));

bit = byte_offset * 8 + bit_offset + kPredicateStart;
}
Expand Down
2 changes: 1 addition & 1 deletion test/unit/gemm/device/testbed.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ struct Testbed {
else if (dist_kind == cutlass::Distribution::Sequential) {

cutlass::reference::host::BlockFillSequential(
view.data(), view.capacity());
view.data(), int64_t(view.capacity()));
}
else {
EXPECT_TRUE(false) << "Not implemented";
Expand Down
2 changes: 1 addition & 1 deletion test/unit/gemm/device/testbed_universal.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ struct TestbedUniversal {
else if (dist_kind == cutlass::Distribution::Sequential) {

cutlass::reference::host::BlockFillSequential(
view.data(), view.capacity());
view.data(), int64_t(view.capacity()));
}
else {
EXPECT_TRUE(false) << "Not implemented";
Expand Down

0 comments on commit 21c0d6b

Please sign in to comment.