- ふかうら王、持ち時間制御その6

- 合流判定追加合流判定用のテスト局面。 ``` multipv 2 DebugMessage true isready usinewgame position startpos moves 2g2f 8c8d 7g7f 4a3b 2f2e 8d8e 8h7g 3c3d 7i7h 2b7g+ 7h7g 3a2b 4g4f 5a4b 9g9f B*4g B*3h 4g7d+ 3h7d 7c7d 6i7h 7a7b 1g1f 2b3c 3i4h 1c1d 4h4g 9c9d 4i4h 8a7c 6g6f 6c6d 3g3f 6d6e 2i3g 6e6f 4g5f 7b6c 7g6f 6c5d 2h2i 8e8f 8g8f 6a6b 5f4e 5d6e 6f6e 7c6e 4e5f S*3h 4h3h 6e5g+ S*6e B*6f P*5h 5g5f 6e5f 6f9i+ N*5d 4b3a 5d6b+ 9i8i G*7i 8i7i 7h7i 8b6b B*7c 6b6c 7c5a+ G*4a P*6d 6c6d 5a4a 3a4a B*5e 6d6c G*7c 6c6a S*6b 6a8a 2e2d 2c2d 7c8b P*6a 6b5c+ L*5a 3g4e S*4d 4e3c+ 2a3c 8b8a 4d5e S*2c 5a5c 2c3b 4a3b 5f5e B*7g P*6h B*4a G*2c 3b2c R*2a S*2b S*3a N*6g 5i4i S*1c 3a2b 1c2b S*3a S*1c go btime 100000 wtime 100000 byoyomi 10000 ``` →　千日手なのでnode作られてなかった…。 ``` multipv 2 DebugMessage true isready usinewgame startpos moves 7g7f 4a3b 2g2f 8c8d 2f2e 8d8e 8h7g 3c3d 7i7h 2b7g+ 7h7g 3a2b 1g1f 1c1d 3i3h 5a4b 9g9f 7a7b 3g3f 6c6d 5i6h 7c7d 6i7h 8a7c 3f3e 7c6e 7g6f 8e8f 8g8f 8b8f P*8h 7d7e B*5e 7b6c 3h3g B*3c 5e3c+ 2b3c 3e3d 3c3d 6f6e 6d6e B*7g 8f8a N*7c 8a7a 7c6a+ 7a6a 7g1a+ 2a3c 1a1b 6e6f 6g6f P*6g 6h5h 3c4e 3g4f N*5d P*3c 3b3c P*3e 5d6f 5h4h P*3f L*3h S*3g 2i3g 3f3g+ 3h3g go btime 100000 wtime 100000 byoyomi 10000 ``` →　合流はしているのだが、桂不成だと同玉を強制できないので評価値と訪問回数に差がある。
yaneurao · Feb 6, 2024 · a44e990 · a44e990
1 parent 801449d
commit a44e990
Show file tree

Hide file tree

Showing 2 changed files with 123 additions and 61 deletions.
diff --git a/source/engine/dlshogi-engine/dlshogi_searcher.cpp b/source/engine/dlshogi-engine/dlshogi_searcher.cpp
@@ -572,9 +572,32 @@ namespace dlshogi
 		}
 	}
 
+	// posからply先のpvのhash keyを返す。
+	void pv_key(Position& pos, Node* node, int ply, Key64 keys[])
+	{
+		if (ply == 0)
+			keys[ply] = pos.hash_key();
+		else if (node == nullptr || node->child_num == 0)
+			keys[ply] = 0;
+		else
+		{
+			ChildNumType max_i = 0;
+			for(ChildNumType i = 1 ; i < node->child_num; ++i)
+				if (node->child[i].move_count > node->child[max_i].move_count)
+					max_i = i;
+
+			StateInfo si;
+			Move m = node->child[max_i].move;
+			//sync_cout << to_usi_string(m) << sync_endl;
+			pos.do_move(m, si);
+			pv_key(pos, node->child_nodes[max_i].get(), ply - 1, keys);
+			pos.undo_move(m);
+		}
+	}
+
 	//  探索停止の確認
 	// SearchInterruptionCheckerから呼び出される。
-	void DlshogiSearcher::InterruptionCheck()
+	void DlshogiSearcher::InterruptionCheck(const Position& rootPos)
 	{
 		auto& s = search_limits;
 		auto& o = search_options;
@@ -706,13 +729,14 @@ namespace dlshogi
 		// Time management (LC0 blog)     : https://lczero.org/blog/2018/09/time-management/
 		// PR1195: Time management update : https://lczero.org/dev/docs/timemgr/
 		double game_ply_factor =
-			  s.game_ply <  20 ? 1.2 // 序盤では時間あまり使わないように。(時間を使ったところでそんなに良い指し手になるわけではないから)
-			: s.game_ply <  30 ? 1.5
-			: 2.0;
-
-		// やねうら王のtimemanのoptimum、ふかうら王にとっては少ないので
-		// optimumを2倍にして考える。
-		optimum = (TimePoint)std::min((double)optimum * 2, (double)maximum);
+			  s.game_ply <  20 ? 1.5 // 序盤では時間あまり使わないように。(時間を使ったところでそんなに良い指し手になるわけではないから)
+			: s.game_ply <  30 ? 3.5
+			: s.game_ply <  40 ? 4.0
+			: 3.0;
+		// ⇑ここ、なめらかなほうがいいのかも知れないが、
+		// もともと目分量で決めてるものなので細かいことは気にしないことにする。
+
+		// maximum時間を基準に考えるので、これをoptimumをベースとして再計算する。
 		maximum = (TimePoint)std::min((double)optimum * game_ply_factor  , (double)maximum);
 
 		// elapsed         : "go" , もしくは"go ponder"～"ponderhit"(のponderhit)からの経過時間
@@ -732,10 +756,11 @@ namespace dlshogi
 
 		auto elapsed    = s.time_manager.elapsed();
 
+		// 残りoptimum po(予測値)
+		//s64 rest_optimum_po = std::max((s64)(s.nodes_searched * (optimum - elapsed_from_ponderhit) / (elapsed + 1)), (s64)0);
+
 		// 最大残りpo(予測値)
 		s64 rest_maximum_po = std::max((s64)(s.nodes_searched * (maximum - elapsed_from_ponderhit) / (elapsed + 1)), (s64)0);
-		// 残りoptimum po(予測値)
-		s64 rest_optimum_po = std::max((s64)(s.nodes_searched * (optimum - elapsed_from_ponderhit) / (elapsed + 1)), (s64)0);
 
 		// 残りの探索を全て次善手に費やしても optimum_timeまでに
 		// 最善手を超えられない場合は探索を打ち切る。
@@ -744,27 +769,29 @@ namespace dlshogi
 		// second_searched : move_countが2番目の指し手のmove_count
 		// すなわち、best_searched >= second_searched が成り立つ。
 
-		NodeCountType best_searched = 0, second_searched = 0;
-
 		const ChildNode* uct_child = current_root->child.get();
 
 		// その時のindex
-		int best_i = 0, second_i = 0;
+		int best_i = 0, second_i = -1 , third_i = -1;
 
 		// 探索回数が最も多い手と次に多い手の評価値を求める。
-		for (int i = 0; i < child_num; i++) {
-			if (uct_child[i].move_count > best_searched) {
-				second_searched = best_searched;
-				best_searched   = uct_child[i].move_count;
+		for (int i = 1; i < child_num; i++) {
+			if (uct_child[i].move_count > uct_child[best_i].move_count) {
+				third_i         = second_i;
 				second_i        = best_i;
 				best_i          = i;
 			}
-			else if (uct_child[i].move_count > second_searched) {
-				second_searched = uct_child[i].move_count;
+			else if (second_i == -1 || uct_child[i].move_count > uct_child[second_i].move_count) {
+				third_i         = second_i;
 				second_i        = i;
 			}
+			else if (third_i == -1 || uct_child[i].move_count > uct_child[third_i].move_count)
+				third_i         = i;
 		}
 
+		NodeCountType best_searched   = uct_child[best_i  ].move_count;
+		NodeCountType second_searched = uct_child[second_i].move_count;
+
 		// best_winrate   : move_countが最大の指し手の勝率
 		// second_winrate : move_countが2番目の指し手の勝率
 		// ※　best_winrate >= second_winrate とは限らないので注意。
@@ -776,78 +803,111 @@ namespace dlshogi
 		// 条件に該当したらbreak(思考を終了)、さもなくばreturnするためのfor loop。
 		for(;;)
 		{
-			if (rest_optimum_po > 0 /* optimum時間が残っている */)
+			if (rest_maximum_po > 0 /* maximum時間が残っている */)
 			{
+				WinType eval_diff = best_winrate - second_winrate;
+				bool converged = false;
+
+				// special case : 指し手が合流してると推測されるケース。
+				if (second_i!=third_i && third_i != -1 && elapsed >= optimum / 8
+					// && std::abs(eval_diff) < 0.02 && best_searched < second_searched * 1.1
+					// ⇑この条件なくてもいいや。(桂成と桂不成みたいなケースにおいてはpolicyに差があるからevalが近い値にならない。
+					)
+				{
+					// 指し手が本当に合流しているかPVの4手先を辿って確認する。
+					// →　合流した結果千日手になるパターンは、nodeが作られてないから、このチェックにひっかからない。
+
+					Node* node1 = current_root->child_nodes[best_i  ].get();
+					Node* node2 = current_root->child_nodes[second_i].get();
+
+					if (node1 != nullptr && node2 != nullptr)
+					{
+						// rootPosはスレッドごとに用意されているのでmemcpyして問題ない。
+						Position pos;
+						memcpy(&pos, &rootPos, sizeof(Position));
+						StateInfo si;
+
+						Move m1 = uct_child[best_i  ].move;
+						Move m2 = uct_child[second_i].move;
+
+						//sync_cout << to_usi_string(m1) << sync_endl;
+						//sync_cout << to_usi_string(m2) << sync_endl;
+
+						pos.do_move(m1,si);
+						Key64 k1[3],k2[3];
+						pv_key(pos, node1, 3, k1); // 3手先までのhash key 
+						pos.undo_move(m1);
+						pos.do_move(m2,si);
+						pv_key(pos, node2, 3, k2); // 3手先までのhash key 
+						pos.undo_move(m2);
+
+						// 現局面から数えてPVの2手先が一致するか4手先が一致するか。
+						if ((k1[0] == k2[0] && Key(k1[0]) != 0) || (k1[2] == k2[2] && Key(k1[2]) != 0) )
+						{
+							// 合流しているので3番目の指し手と比較する。
+
+							// 他の指し手が台頭してきていれば良いのだが..
+							NodeCountType third_searched  = uct_child[third_i ].move_count;
+							WinType third_winrate         = uct_child[third_i ].win / (third_searched + delta);
+
+							// 3番目の指し手を2番目の指し手とみなす。
+							// これでこのあとの早期終了条件を満たすならそれで停止させれば良い。
+							second_searched = third_searched;
+							second_winrate  = third_winrate;
+							eval_diff       = best_winrate - third_winrate;
+
+							converged = true;
+						}
+					}
+				}
+
 				// 安定した探索であると言える条件は、bestの訪問回数がsecondの1.5倍以上(この条件、重要)かつ、
 				// bestの期待勝率がsecondの期待勝率を上回ること。
 				if (   best_winrate >= second_winrate
 					&& best_searched >= second_searched * 1.5
 					)
 				{
-					WinType eval_diff = best_winrate - second_winrate;
 					// bestとsecondの勝率に応じて早期に思考を終了しても良いという考え。
 					// 勝率差0.2なら、探索が早期に終了して良いと思う。
 					WinType ratio = std::max( 1.0 - eval_diff * 5 , 0.0 );
 
-					// 経過時間がoptimum /8 を超えてるのに残りoptimum時間をすべて用いても訪問数が逆転しない。
+					// 経過時間がoptimum /4 を超えてるのに残りmaximum時間をすべて用いても訪問数が逆転しない。
 					// ただしこの時、eval_diffが0.1なら50%というように、eval_diffの値に応じてrest_optimum_poを減らして考える。
-					if (   elapsed_from_ponderhit >= optimum / 8
-						&& best_searched > second_searched + rest_optimum_po * ratio
+					if (   elapsed >= optimum / 4
+						&& best_searched > second_searched + rest_maximum_po * ratio
 						)
 					{
 						if (o.debug_message)
 							sync_cout << "info string interrupted by early exit"
-							<< " , best_searched > second_searched + rest_optimum_po * " << ratio << " "
+							<< " , best_searched > second_searched + rest_maximum_po * " << ratio << " "
 							<< " , best_searched = "   << best_searched
 							<< " , second_searched = " << second_searched
-							<< " , rest_optimum_po = " << rest_optimum_po
+							<< " , rest_maximum_po = " << rest_maximum_po
+							<< " , elapsed = "         << elapsed
+							<< " , eval_diff = "       << eval_diff
 							<< " , ratio = "           << ratio
+							<< " , converged = "       << converged
 							<< sync_endl;
 
 						break;
 					}
 				}
 
-			} else if (rest_maximum_po > 0){
-				// && rest_optimum_po == 0 /* optimum時間が残っていない */
-
-				// optimum時間超えてて、訪問回数,evalの関係がおかしくないならmaximumまで時間を使わずして終了。
-
-				if (   best_winrate  >= second_winrate
-					&& best_searched >= second_searched * 1.5
-					)
-				{
-					if (o.debug_message)
-						sync_cout << "info string optimum time is over"
-						<< " , best_winrate  >= second_winrate && best_searched >= second_searched * 1.5"
-						<< " , best_winrate = "    << best_winrate
-						<< " , second_winrate = "  << second_winrate
-						<< " , best_searched = "   << best_searched
-						<< " , second_searched = " << second_searched
-						<< sync_endl;
-
-					break;
-				}
-
 				// いま、おそらく best_winrate < second_winrate なので
 				// これの行く末を見守る必要がある。(best_winrate >= second_winrateであって欲しい)
 				// しかし、どう頑張っても訪問回数で叶わないなら、諦める。
 
-				// optimum時間を超えていて、残り時間をすべて使っても訪問回数が逆転しない。
+				// maximum時間を超えていて、残り時間をすべて使っても訪問回数が逆転しない。
 				// ⇨　残念だけど、あきらめる。
 
-				// 勝率差を考慮して多少思考時間を縮める。
-				WinType eval_diff = best_winrate - second_winrate;
-				WinType ratio = std::max( 1.0 - eval_diff * 3 , 0.0 );
-				if (best_searched > second_searched + rest_maximum_po * ratio)
+				if (best_searched > second_searched + rest_maximum_po)
 				{
 					if (o.debug_message)
-						sync_cout << "info string optimum time is over"
-						<< " , best_searched > second_searched + rest_maximum_po * ratio"
+						sync_cout << "info string interrupted by retirement"
+						<< " , best_searched > second_searched + rest_maximum_po"
 						<< " , best_searched = "   << best_searched
 						<< " , second_searched = " << second_searched
 						<< " , rest_maximum_po = " << rest_maximum_po
-						<< " , ratio = "           << ratio
 						<< sync_endl;
 
 					break;
@@ -860,7 +920,9 @@ namespace dlshogi
 					<< " , rest_maximum_po == 0"
 					<< " , best_winrate = "    << best_winrate
 					<< " , second_winrate = "  << second_winrate
-					<< " , rest_optimum_po = " << rest_optimum_po << sync_endl;
+					<< " , best_searched = "   << best_searched
+					<< " , second_searched = " << second_searched
+					<< sync_endl;
 
 				break;
 			}
@@ -897,7 +959,7 @@ namespace dlshogi
 
 		// 探索終了判定用。
 		else if (thread_id == s)
-			interruption_checker->Worker();
+			interruption_checker->Worker(rootPos);
 
 		else if (thread_id == s + 1)
 			root_dfpn_searcher->search(rootPos, search_options.root_mate_search_nodes_limit); // df-pnの探索ノード数制限
@@ -912,7 +974,7 @@ namespace dlshogi
 
 	// ガーベジ用のスレッドが実行するworker
 	// 探索開始時にこの関数を呼び出す。
-	void SearchInterruptionChecker::Worker()
+	void SearchInterruptionChecker::Worker(const Position& rootPos)
 	{
 		// スレッド停止命令が来るまで、kCheckIntervalMs[ms]ごとにInterruptionCheck()を実行する。
 
@@ -928,7 +990,7 @@ namespace dlshogi
 			std::this_thread::sleep_for(std::chrono::milliseconds(kCheckIntervalMs));
 
 			// 探索の終了チェック
-			ds->InterruptionCheck();
+			ds->InterruptionCheck(rootPos);
 
 			// ここにも終了判定を入れておいたほうが、探索停止確定にPV出力しなくてよろしい。
 			if (stop())

diff --git a/source/engine/dlshogi-engine/dlshogi_searcher.h b/source/engine/dlshogi-engine/dlshogi_searcher.h
@@ -395,7 +395,7 @@ namespace dlshogi
 
 		//  探索停止の確認
 		// SearchInterruptionCheckerから呼び出される。
-		void InterruptionCheck();
+		void InterruptionCheck(const Position& rootPos);
 
 		// PV表示の確認
 		// SearchInterruptionCheckerから呼び出される。
@@ -473,7 +473,7 @@ namespace dlshogi
 
 		// ガーベジ用のスレッドが実行するworker
 		// 探索開始時にこの関数を呼び出す。
-		void Worker();
+		void Worker(const Position& rootPos);
 
 	private:
 		DlshogiSearcher* ds;