diff --git a/source/engine/dlshogi-engine/UctSearch.cpp b/source/engine/dlshogi-engine/UctSearch.cpp index 808cf7b00..1abe5acae 100644 --- a/source/engine/dlshogi-engine/UctSearch.cpp +++ b/source/engine/dlshogi-engine/UctSearch.cpp @@ -1023,7 +1023,7 @@ namespace dlshogi for (int i = 0; i < child_num; i++) { if (uct_child[i].IsWin()) { // 負けが確定しているノードは選択しない - if (child_win_count == i && uct_child[i].move_count > max_count) { + if (child_win_count == NodeCountType(i) && uct_child[i].move_count > max_count) { // すべて負けの場合は、探索回数が最大の手を選択する select_index = i; max_count = uct_child[i].move_count; diff --git a/source/engine/dlshogi-engine/YaneuraOu_dlshogi_bridge.cpp b/source/engine/dlshogi-engine/YaneuraOu_dlshogi_bridge.cpp index 760fb4618..01c7c5e21 100644 --- a/source/engine/dlshogi-engine/YaneuraOu_dlshogi_bridge.cpp +++ b/source/engine/dlshogi-engine/YaneuraOu_dlshogi_bridge.cpp @@ -98,42 +98,17 @@ void USI::extra_option(USI::OptionsMap& o) // RTX 3090で10bなら4、15bなら2で最適。 o["UCT_Threads1"] << USI::Option(2, 0, 256); - o["UCT_Threads2"] << USI::Option(0, 0, 256); - o["UCT_Threads3"] << USI::Option(0, 0, 256); - o["UCT_Threads4"] << USI::Option(0, 0, 256); - o["UCT_Threads5"] << USI::Option(0, 0, 256); - o["UCT_Threads6"] << USI::Option(0, 0, 256); - o["UCT_Threads7"] << USI::Option(0, 0, 256); - o["UCT_Threads8"] << USI::Option(0, 0, 256); - o["UCT_Threads9"] << USI::Option(0, 0, 256); - o["UCT_Threads10"] << USI::Option(0, 0, 256); - o["UCT_Threads11"] << USI::Option(0, 0, 256); - o["UCT_Threads12"] << USI::Option(0, 0, 256); - o["UCT_Threads13"] << USI::Option(0, 0, 256); - o["UCT_Threads14"] << USI::Option(0, 0, 256); - o["UCT_Threads15"] << USI::Option(0, 0, 256); - o["UCT_Threads16"] << USI::Option(0, 0, 256); + for (int i = 2; i <= max_gpu ; ++i) + o["UCT_Threads" + std::to_string(i)] << USI::Option(0, 0, 256); + #if defined(COREML) // Core MLでは、ONNXではなく独自形式のモデルが必要。 o["DNN_Model1"] << USI::Option(R"(model.mlmodel)"); #else o["DNN_Model1"] << USI::Option(R"(model.onnx)"); #endif - o["DNN_Model2"] << USI::Option(""); - o["DNN_Model3"] << USI::Option(""); - o["DNN_Model4"] << USI::Option(""); - o["DNN_Model5"] << USI::Option(""); - o["DNN_Model6"] << USI::Option(""); - o["DNN_Model7"] << USI::Option(""); - o["DNN_Model8"] << USI::Option(""); - o["DNN_Model9"] << USI::Option(""); - o["DNN_Model10"] << USI::Option(""); - o["DNN_Model11"] << USI::Option(""); - o["DNN_Model12"] << USI::Option(""); - o["DNN_Model13"] << USI::Option(""); - o["DNN_Model14"] << USI::Option(""); - o["DNN_Model15"] << USI::Option(""); - o["DNN_Model16"] << USI::Option(""); + for (int i = 2; i <= max_gpu ; ++i) + o["DNN_Model" + std::to_string(i)] << USI::Option(""); #if defined(TENSOR_RT) || defined(ORT_TRT) // 通常時の推奨128 , 検討の時は推奨256。 @@ -145,21 +120,8 @@ void USI::extra_option(USI::OptionsMap& o) // M1チップで8程度でスループットが飽和する。 o["DNN_Batch_Size1"] << USI::Option(8, 1, 1024); #endif - o["DNN_Batch_Size2"] << USI::Option(0, 0, 1024); - o["DNN_Batch_Size3"] << USI::Option(0, 0, 1024); - o["DNN_Batch_Size4"] << USI::Option(0, 0, 1024); - o["DNN_Batch_Size5"] << USI::Option(0, 0, 1024); - o["DNN_Batch_Size6"] << USI::Option(0, 0, 1024); - o["DNN_Batch_Size7"] << USI::Option(0, 0, 1024); - o["DNN_Batch_Size8"] << USI::Option(0, 0, 1024); - o["DNN_Batch_Size9"] << USI::Option(0, 0, 1024); - o["DNN_Batch_Size10"] << USI::Option(0, 0, 1024); - o["DNN_Batch_Size11"] << USI::Option(0, 0, 1024); - o["DNN_Batch_Size12"] << USI::Option(0, 0, 1024); - o["DNN_Batch_Size13"] << USI::Option(0, 0, 1024); - o["DNN_Batch_Size14"] << USI::Option(0, 0, 1024); - o["DNN_Batch_Size15"] << USI::Option(0, 0, 1024); - o["DNN_Batch_Size16"] << USI::Option(0, 0, 1024); + for (int i = 2; i <= max_gpu ; ++i) + o["DNN_Batch_Size" + std::to_string(i)] << USI::Option(0, 0, 1024); //(*this)["Const_Playout"] = USIOption(0, 0, int_max); // → Playout数固定。これはNodesLimitでできるので不要。 @@ -212,25 +174,22 @@ void Search::clear() // スレッド数と各GPUのbatchsizeをsearcherに設定する。 - const int new_thread[max_gpu] = { - (int)Options["UCT_Threads1" ], (int)Options["UCT_Threads2" ], (int)Options["UCT_Threads3" ], (int)Options["UCT_Threads4" ], - (int)Options["UCT_Threads5" ], (int)Options["UCT_Threads6" ], (int)Options["UCT_Threads7" ], (int)Options["UCT_Threads8" ], - (int)Options["UCT_Threads9" ], (int)Options["UCT_Threads10"], (int)Options["UCT_Threads11"], (int)Options["UCT_Threads12"], - (int)Options["UCT_Threads13"], (int)Options["UCT_Threads14"], (int)Options["UCT_Threads15"], (int)Options["UCT_Threads16"] - }; - const int new_policy_value_batch_maxsize[max_gpu] = { - (int)Options["DNN_Batch_Size1" ], (int)Options["DNN_Batch_Size2" ], (int)Options["DNN_Batch_Size3" ], (int)Options["DNN_Batch_Size4" ], - (int)Options["DNN_Batch_Size5" ], (int)Options["DNN_Batch_Size6" ], (int)Options["DNN_Batch_Size7" ], (int)Options["DNN_Batch_Size8" ], - (int)Options["DNN_Batch_Size9" ], (int)Options["DNN_Batch_Size10"], (int)Options["DNN_Batch_Size11"], (int)Options["DNN_Batch_Size12"], - (int)Options["DNN_Batch_Size13"], (int)Options["DNN_Batch_Size14"], (int)Options["DNN_Batch_Size15"], (int)Options["DNN_Batch_Size16"] - }; + std::vector new_thread; + std::vector new_policy_value_batch_maxsize; + for (int i = 1; i <= max_gpu; ++i) + { + // GPU_unlimited() なら、すべてUCT_Threads1, DNN_Batch_Size1を参照する。 + new_thread.emplace_back((int)Options["UCT_Threads" + std::to_string(i)]); + new_policy_value_batch_maxsize.emplace_back((int)Options["DNN_Batch_Size" + std::to_string(i)]); + } + // 対応デバイス数を取得する int device_count = NN::get_device_count(); std::vector thread_nums; std::vector policy_value_batch_maxsizes; - for (int i = 0; i < max_gpu; ++i) + for (int i = 0; i < max_gpu ; ++i) { // 対応デバイス数以上のデバイスIDのスレッド数は 0 として扱う(デバイスの無効化) thread_nums.push_back(i < device_count ? new_thread[i] : 0); diff --git a/source/eval/deep/nn_types.cpp b/source/eval/deep/nn_types.cpp index ab31c6840..c72a9ca0a 100644 --- a/source/eval/deep/nn_types.cpp +++ b/source/eval/deep/nn_types.cpp @@ -622,12 +622,9 @@ namespace Eval::dlshogi Result init_model_paths() { - const std::string model_paths[max_gpu] = { - Options["DNN_Model1"], Options["DNN_Model2"], Options["DNN_Model3"], Options["DNN_Model4"], - Options["DNN_Model5"], Options["DNN_Model6"], Options["DNN_Model7"], Options["DNN_Model8"], - Options["DNN_Model9"], Options["DNN_Model10"], Options["DNN_Model11"], Options["DNN_Model12"], - Options["DNN_Model13"], Options["DNN_Model14"], Options["DNN_Model15"], Options["DNN_Model16"] - }; + std::vector model_paths; + for (int i = 1; i <= max_gpu ; ++i) + model_paths.emplace_back(Options["DNN_Model" + std::to_string(i)]); string eval_dir = Options["EvalDir"]; @@ -640,7 +637,7 @@ namespace Eval::dlshogi // モデルファイル存在チェック bool is_err = false; - for (int i = 0; i < max_gpu; ++i) { + for (int i = 0; i < max_gpu ; ++i) { if (model_paths[i] != "") { string path = Path::Combine(eval_dir, model_paths[i].c_str()); diff --git a/source/eval/deep/nn_types.h b/source/eval/deep/nn_types.h index a98a9390c..554878b6d 100644 --- a/source/eval/deep/nn_types.h +++ b/source/eval/deep/nn_types.h @@ -20,7 +20,10 @@ namespace Eval::dlshogi // === GPU関連の設定 === // GPUの最大数(これ以上のGPUは扱えない) - constexpr int max_gpu = 16; +#if !defined(MAX_GPU) + constexpr int MAX_GPU = 16; +#endif + constexpr int max_gpu = MAX_GPU; // === 入出力の特徴量の定義 ===