Skip to content

Commit

Permalink
test routing
Browse files Browse the repository at this point in the history
  • Loading branch information
PikaCat-OuO committed Jan 16, 2025
1 parent b9bbadb commit c5f74ae
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 54 deletions.
8 changes: 0 additions & 8 deletions src/nnue/features/half_ka_v2_hm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,6 @@ IndexType HalfKAv2_hm::make_attack_bucket(const Position& pos, Color c) {
return AttackBucket[pos.count<ROOK>(c)][pos.count<KNIGHT>(c)][pos.count<CANNON>(c)];
}

// Get layer stack bucket
IndexType HalfKAv2_hm::make_layer_stack_bucket(const Position& pos) {
Color us = pos.side_to_move();
return LayerStackBuckets[pos.count<ROOK>(us)][pos.count<ROOK>(~us)]
[pos.count<KNIGHT>(us) + pos.count<CANNON>(us)]
[pos.count<KNIGHT>(~us) + pos.count<CANNON>(~us)];
}

// Index of a feature for a given king position and another piece on some square
template<Color Perspective>
inline IndexType HalfKAv2_hm::make_index(Square s, Piece pc, int bucket, bool mirror) {
Expand Down
28 changes: 0 additions & 28 deletions src/nnue/features/half_ka_v2_hm.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,41 +166,13 @@ class HalfKAv2_hm {
return v;
}();

// LayerStack buckets
static constexpr auto LayerStackBuckets = [] {
std::array<std::array<std::array<std::array<uint8_t, 5>, 5>, 3>, 3> v{};
for (uint8_t us_rook = 0; us_rook <= 2; ++us_rook)
for (uint8_t opp_rook = 0; opp_rook <= 2; ++opp_rook)
for (uint8_t us_knight_cannon = 0; us_knight_cannon <= 4; ++us_knight_cannon)
for (uint8_t opp_knight_cannon = 0; opp_knight_cannon <= 4; ++opp_knight_cannon)
v[us_rook][opp_rook][us_knight_cannon][opp_knight_cannon] = [&] {
if (us_rook == opp_rook)
return us_rook * 4
+ int(us_knight_cannon + opp_knight_cannon >= 4) * 2
+ int(us_knight_cannon == opp_knight_cannon);
else if (us_rook == 2 && opp_rook == 1)
return 12;
else if (us_rook == 1 && opp_rook == 2)
return 13;
else if (us_rook > 0 && opp_rook == 0)
return 14;
else if (us_rook == 0 && opp_rook > 0)
return 15;
return -1;
}();
return v;
}();

// Maximum number of simultaneously active features.
static constexpr IndexType MaxActiveDimensions = 32;
using IndexList = ValueList<IndexType, MaxActiveDimensions>;

// Get attack bucket
static IndexType make_attack_bucket(const Position& pos, Color c);

// Get layer stack bucket
static IndexType make_layer_stack_bucket(const Position& pos);

// Index of a feature for a given king position and another piece on some square
template<Color Perspective>
static IndexType make_index(Square s, Piece pc, int bucket, bool mirror);
Expand Down
26 changes: 22 additions & 4 deletions src/nnue/network.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ Network::Network(const Network& other) :
if (other.featureTransformer)
featureTransformer = make_unique_large_page<FeatureTransformer>(*other.featureTransformer);

if (other.router)
router = make_unique_aligned<RouterArchitecture>(*other.router);

network = make_unique_aligned<NetworkArchitecture[]>(LayerStacks);

if (!other.network)
Expand All @@ -75,6 +78,8 @@ Network& Network::operator=(const Network& other) {

featureTransformer = make_unique_large_page<FeatureTransformer>(*other.featureTransformer);

router = make_unique_aligned<RouterArchitecture>(*other.router);

network = make_unique_aligned<NetworkArchitecture[]>(LayerStacks);

if (!other.network)
Expand Down Expand Up @@ -153,8 +158,12 @@ NetworkOutput Network::evaluate(const Position& pos, AccumulatorCaches::Cache* c

ASSERT_ALIGNED(transformedFeatures, alignment);

const int bucket = FeatureSet::make_layer_stack_bucket(pos);
const auto psqt = featureTransformer->transform(pos, cache, transformedFeatures, bucket);
featureTransformer->transform(pos, cache, transformedFeatures);
const int bucket = router->propagate(transformedFeatures);
const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
const auto& psqtAccumulation = pos.state()->accumulator.psqtAccumulation;
const auto psqt =
(psqtAccumulation[perspectives[0]][bucket] - psqtAccumulation[perspectives[1]][bucket]) / 2;
const auto positional = network[bucket].propagate(transformedFeatures);

return {static_cast<Value>(psqt / OutputScale), static_cast<Value>(positional / OutputScale)};
Expand Down Expand Up @@ -226,11 +235,15 @@ NnueEvalTrace Network::trace_evaluate(const Position& pos, AccumulatorCaches::Ca
ASSERT_ALIGNED(transformedFeatures, alignment);

NnueEvalTrace t{};
t.correctBucket = FeatureSet::make_layer_stack_bucket(pos);
featureTransformer->transform(pos, cache, transformedFeatures);
t.correctBucket = router->propagate(transformedFeatures);
const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
const auto& psqtAccumulation = pos.state()->accumulator.psqtAccumulation;
for (IndexType bucket = 0; bucket < LayerStacks; ++bucket)
{
const auto materialist =
featureTransformer->transform(pos, cache, transformedFeatures, bucket);
(psqtAccumulation[perspectives[0]][bucket] - psqtAccumulation[perspectives[1]][bucket])
/ 2;
const auto positional = network[bucket].propagate(transformedFeatures);

t.psqt[bucket] = static_cast<Value>(materialist / OutputScale);
Expand All @@ -255,6 +268,7 @@ void Network::load_user_net(const std::string& dir, const std::string& evalfileP

void Network::initialize() {
featureTransformer = make_unique_large_page<FeatureTransformer>();
router = make_unique_aligned<RouterArchitecture>();
network = make_unique_aligned<NetworkArchitecture[]>(LayerStacks);
}

Expand Down Expand Up @@ -312,6 +326,8 @@ bool Network::read_parameters(std::istream& stream, std::string& netDescription)
return false;
if (!Detail::read_parameters(stream, *featureTransformer))
return false;
if (!Detail::read_parameters(stream, *router))
return false;
for (std::size_t i = 0; i < LayerStacks; ++i)
{
if (!Detail::read_parameters(stream, network[i]))
Expand All @@ -326,6 +342,8 @@ bool Network::write_parameters(std::ostream& stream, const std::string& netDescr
return false;
if (!Detail::write_parameters(stream, *featureTransformer))
return false;
if (!Detail::write_parameters(stream, *router))
return false;
for (std::size_t i = 0; i < LayerStacks; ++i)
{
if (!Detail::write_parameters(stream, network[i]))
Expand Down
8 changes: 6 additions & 2 deletions src/nnue/network.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,18 @@ class Network {
// Input feature converter
LargePagePtr<FeatureTransformer> featureTransformer;

// Router
AlignedPtr<RouterArchitecture> router;

// Evaluation function
AlignedPtr<NetworkArchitecture[]> network;

EvalFile evalFile;

// Hash value of evaluation function structure
static constexpr std::uint32_t hash =
FeatureTransformer::get_hash_value() ^ NetworkArchitecture::get_hash_value();
static constexpr std::uint32_t hash = FeatureTransformer::get_hash_value()
^ RouterArchitecture::get_hash_value()
^ NetworkArchitecture::get_hash_value();

friend struct AccumulatorCaches::Cache;
};
Expand Down
71 changes: 71 additions & 0 deletions src/nnue/nnue_architecture.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,77 @@ constexpr IndexType TransformedFeatureDimensions = 2048;
constexpr IndexType PSQTBuckets = 16;
constexpr IndexType LayerStacks = 16;

struct RouterArchitecture {
static constexpr int FC_0_OUTPUTS = 16;
static constexpr int FC_1_OUTPUTS = 32;

Layers::AffineTransformSparseInput<TransformedFeatureDimensions, FC_0_OUTPUTS> fc_0;
Layers::SqrClippedReLU<FC_0_OUTPUTS> ac_sqr_0;
Layers::ClippedReLU<FC_0_OUTPUTS> ac_0;
Layers::AffineTransform<FC_0_OUTPUTS * 2, FC_1_OUTPUTS> fc_1;
Layers::ClippedReLU<FC_1_OUTPUTS> ac_1;
Layers::AffineTransform<FC_1_OUTPUTS, LayerStacks> fc_2;

// Hash value embedded in the evaluation file
static constexpr std::uint32_t get_hash_value() {
// input slice hash
std::uint32_t hashValue = 0xEC42E90Du;
hashValue ^= TransformedFeatureDimensions * 2;
hashValue = decltype(fc_0)::get_hash_value(hashValue);
hashValue = decltype(ac_0)::get_hash_value(hashValue);
hashValue = decltype(fc_1)::get_hash_value(hashValue);
hashValue = decltype(ac_1)::get_hash_value(hashValue);
hashValue = decltype(fc_2)::get_hash_value(hashValue);
return hashValue;
}

// Read network parameters
bool read_parameters(std::istream& stream) {
return fc_0.read_parameters(stream) && ac_0.read_parameters(stream)
&& fc_1.read_parameters(stream) && ac_1.read_parameters(stream)
&& fc_2.read_parameters(stream);
}

// Write network parameters
bool write_parameters(std::ostream& stream) const {
return fc_0.write_parameters(stream) && ac_0.write_parameters(stream)
&& fc_1.write_parameters(stream) && ac_1.write_parameters(stream)
&& fc_2.write_parameters(stream);
}

std::int32_t propagate(const TransformedFeatureType* transformedFeatures) {
struct alignas(CacheLineSize) Buffer {
alignas(CacheLineSize) decltype(fc_0)::OutputBuffer fc_0_out;
alignas(CacheLineSize) decltype(ac_0)::OutputType ac_0_out[FC_0_OUTPUTS * 2];
alignas(CacheLineSize) decltype(fc_1)::OutputBuffer fc_1_out;
alignas(CacheLineSize) decltype(ac_1)::OutputBuffer ac_1_out;
alignas(CacheLineSize) decltype(fc_2)::OutputBuffer fc_2_out;

Buffer() { std::memset(this, 0, sizeof(*this)); }
};

#if defined(__clang__) && (__APPLE__)
// workaround for a bug reported with xcode 12
static thread_local auto tlsBuffer = std::make_unique<Buffer>();
// Access TLS only once, cache result.
Buffer& buffer = *tlsBuffer;
#else
alignas(CacheLineSize) static thread_local Buffer buffer;
#endif

fc_0.propagate(transformedFeatures, buffer.fc_0_out);
ac_sqr_0.propagate(buffer.fc_0_out, buffer.ac_0_out);
ac_0.propagate(buffer.fc_0_out, buffer.ac_0_out + FC_0_OUTPUTS);
fc_1.propagate(buffer.ac_0_out, buffer.fc_1_out);
ac_1.propagate(buffer.fc_1_out, buffer.ac_1_out);
fc_2.propagate(buffer.ac_1_out, buffer.fc_2_out);

std::int32_t outputValue = std::distance(
buffer.fc_2_out, std::max_element(buffer.fc_2_out, buffer.fc_2_out + LayerStacks));
return outputValue;
}
};

struct NetworkArchitecture {
static constexpr int FC_0_OUTPUTS = 15;
static constexpr int FC_1_OUTPUTS = 32;
Expand Down
14 changes: 2 additions & 12 deletions src/nnue/nnue_feature_transformer.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,19 +341,11 @@ class FeatureTransformer {
}

// Convert input features
std::int32_t transform(const Position& pos,
AccumulatorCaches::Cache* cache,
OutputType* output,
int bucket) const {
void transform(const Position& pos, AccumulatorCaches::Cache* cache, OutputType* output) const {
update_accumulator<WHITE>(pos, cache);
update_accumulator<BLACK>(pos, cache);

const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
const auto& psqtAccumulation = pos.state()->accumulator.psqtAccumulation;

const auto psqt =
(psqtAccumulation[perspectives[0]][bucket] - psqtAccumulation[perspectives[1]][bucket])
/ 2;
const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};

const auto& accumulation = pos.state()->accumulator.accumulation;

Expand Down Expand Up @@ -464,8 +456,6 @@ class FeatureTransformer {

#endif
}

return psqt;
} // end of function transform()

void hint_common_access(const Position& pos, AccumulatorCaches::Cache* cache) const {
Expand Down

0 comments on commit c5f74ae

Please sign in to comment.