Skip to content

Commit

Permalink
AVX-512でクラッシュする問題を修正 (#287)
Browse files Browse the repository at this point in the history
* AVX-512でクラッシュする問題を修正

* AVX-512で入力特徴量を変換する際にすべてのトリガーに対して積算できていないのを修正
  • Loading branch information
KazApps authored Oct 7, 2024
1 parent 1a8a697 commit 34214eb
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
3 changes: 2 additions & 1 deletion source/eval/nnue/nnue_accumulator.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ namespace NNUE {

// 入力特徴量をアフィン変換した結果を保持するクラス
// 最終的な出力である評価値も一緒に持たせておく
struct alignas(32) Accumulator {
// AVX-512命令を使用する場合に64bytesのアライメントが要求される。
struct alignas(64) Accumulator {
std::int16_t
accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
Value score = VALUE_ZERO;
Expand Down
14 changes: 14 additions & 0 deletions source/eval/nnue/nnue_feature_transformer.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,12 @@ class FeatureTransformer {
_mm512_load_si512(&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
__m512i sum1 =
_mm512_load_si512(&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum0 = _mm512_add_epi16(
sum0, reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][i])[j * 2 + 0]);
sum1 = _mm512_add_epi16(
sum1, reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][i])[j * 2 + 1]);
}
_mm512_store_si512(&out[j], _mm512_permutexvar_epi64(
kControl, _mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), kZero)));
}
Expand Down Expand Up @@ -289,7 +295,11 @@ class FeatureTransformer {
const IndexType offset = kHalfDimensions * index;
auto accumulation = reinterpret_cast<vec_t*>(&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
#if defined(USE_AVX512)
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
#else
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
#endif
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = vec_add_16(accumulation[j], column[j]);
}
Expand Down Expand Up @@ -327,7 +337,11 @@ class FeatureTransformer {
RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i], removed_indices, added_indices, reset);
for (Color perspective : {BLACK, WHITE}) {
#if defined(VECTOR)
#if defined(USE_AVX512)
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
#else
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
#endif
auto accumulation = reinterpret_cast<vec_t*>(&accumulator.accumulation[perspective][i][0]);
#endif
if (reset[perspective]) {
Expand Down

0 comments on commit 34214eb

Please sign in to comment.