From d2097606a324db186bdaa02ca454b7e57aff6606 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 14 Nov 2024 14:23:56 +0800 Subject: [PATCH 1/3] Fix rng for the column sampler. --- src/common/random.h | 2 +- tests/python/test_updaters.py | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/common/random.h b/src/common/random.h index e5781f27dd01..434c30896412 100644 --- a/src/common/random.h +++ b/src/common/random.h @@ -233,7 +233,7 @@ class ColumnSampler { }; inline auto MakeColumnSampler(Context const* ctx) { - std::uint32_t seed = common::GlobalRandomEngine()(); + std::uint32_t seed = common::GlobalRandom()(); auto rc = collective::Broadcast(ctx, linalg::MakeVec(&seed, 1), 0); collective::SafeColl(rc); auto cs = std::make_shared(seed); diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py index 4ff3175cfab1..b580630741d4 100644 --- a/tests/python/test_updaters.py +++ b/tests/python/test_updaters.py @@ -54,6 +54,25 @@ def test_exact_sample_by_node_error(self) -> None: num_boost_round=2, ) + def test_hist_colsample_rng(self) -> None: + """Test rng has an effect on column sampling.""" + X, y, _ = tm.make_regression(128, 16, use_cupy=False) + reg0 = xgb.XGBRegressor( + n_estimators=2, + colsample_bynode=0.5, + random_state=42, + ) + reg0.fit(X, y) + + reg1 = xgb.XGBRegressor( + n_estimators=2, + colsample_bynode=0.5, + random_state=43, + ) + reg1.fit(X, y) + + assert list(reg0.feature_importances_) != list(reg1.feature_importances_) + @given( exact_parameter_strategy, hist_parameter_strategy, From 4fbeea1d7602dddbc9fdee52cbb19d18dbf25398 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 14 Nov 2024 14:37:17 +0800 Subject: [PATCH 2/3] Cleanup the GPU implementation as well. --- src/tree/updater_gpu_hist.cu | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index 9ed0bd409e30..5a6236b6ccfb 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -826,10 +826,7 @@ class GPUHistMaker : public TreeUpdater { CHECK_GE(ctx_->Ordinal(), 0) << "Must have at least one device"; // Synchronise the column sampling seed - std::uint32_t column_sampling_seed = common::GlobalRandom()(); - SafeColl(collective::Broadcast( - ctx_, linalg::MakeVec(&column_sampling_seed, sizeof(column_sampling_seed)), 0)); - this->column_sampler_ = std::make_shared(column_sampling_seed); + this->column_sampler_ = common::MakeColumnSampler(ctx_); curt::SetDevice(ctx_->Ordinal()); p_fmat->Info().feature_types.SetDevice(ctx_->Device()); @@ -968,8 +965,7 @@ class GPUGlobalApproxMaker : public TreeUpdater { monitor_.Start(__func__); CHECK(ctx_->IsCUDA()) << error::InvalidCUDAOrdinal(); - uint32_t column_sampling_seed = common::GlobalRandom()(); - this->column_sampler_ = std::make_shared(column_sampling_seed); + this->column_sampler_ = common::MakeColumnSampler(ctx_); p_last_fmat_ = p_fmat; initialised_ = true; From 02592d5a5e97329ca3eac3e0f160b12b06bfe764 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 14 Nov 2024 14:40:39 +0800 Subject: [PATCH 3/3] Tests. --- tests/python/test_updaters.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py index b580630741d4..3f2eb8aed52f 100644 --- a/tests/python/test_updaters.py +++ b/tests/python/test_updaters.py @@ -54,13 +54,15 @@ def test_exact_sample_by_node_error(self) -> None: num_boost_round=2, ) - def test_hist_colsample_rng(self) -> None: + @pytest.mark.parametrize("tree_method", ["approx", "hist"]) + def test_colsample_rng(self, tree_method: str) -> None: """Test rng has an effect on column sampling.""" X, y, _ = tm.make_regression(128, 16, use_cupy=False) reg0 = xgb.XGBRegressor( n_estimators=2, colsample_bynode=0.5, random_state=42, + tree_method=tree_method, ) reg0.fit(X, y) @@ -68,6 +70,7 @@ def test_hist_colsample_rng(self) -> None: n_estimators=2, colsample_bynode=0.5, random_state=43, + tree_method=tree_method, ) reg1.fit(X, y)