From 631e0a2a7bdd694a91f30378fb271d05ce438122 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 18 Mar 2024 23:24:14 -0500 Subject: [PATCH] [ci] prevent trailing whitespace, ensure files end with newline (#6373) --- .ci/install-clang-devel.sh | 2 +- .pre-commit-config.yaml | 10 +++ R-package/LICENSE | 2 +- R-package/cran-comments.md | 2 +- SECURITY.md | 2 +- build-cran-package.sh | 2 +- docs/Advanced-Topics.rst | 8 +- docs/Features.rst | 2 +- docs/GPU-Targets.rst | 6 +- docs/GPU-Tutorial.rst | 2 +- docs/Key-Events.md | 2 +- docs/Makefile | 2 +- docs/_static/images/artifacts-download.svg | 2 +- docs/_static/images/artifacts-fetching.svg | 2 +- .../images/artifacts-not-available.svg | 2 +- docs/_static/images/dask-concat.svg | 2 +- docs/_static/images/dask-initial-setup.svg | 2 +- examples/README.md | 8 +- examples/binary_classification/train.conf | 12 +-- .../binary_classification/train_linear.conf | 12 +-- examples/lambdarank/README.md | 2 +- examples/lambdarank/train.conf | 12 +-- examples/multiclass_classification/train.conf | 6 +- examples/parallel_learning/train.conf | 10 +-- examples/python-guide/README.md | 4 +- examples/regression/train.conf | 12 +-- examples/xendcg/README.md | 2 +- examples/xendcg/train.conf | 10 +-- include/LightGBM/bin.h | 2 +- include/LightGBM/network.h | 6 +- include/LightGBM/utils/common.h | 2 +- pmml/README.md | 2 +- src/c_api.cpp | 2 +- .../kernels/histogram_16_64_256.hu | 21 +++-- src/treelearner/leaf_splits.hpp | 4 +- src/treelearner/ocl/histogram16.cl | 82 +++++++++---------- src/treelearner/ocl/histogram256.cl | 38 ++++----- src/treelearner/ocl/histogram64.cl | 38 ++++----- swig/StringArray.i | 14 ++-- swig/pointer_manipulation.i | 6 +- tests/cpp_tests/testutils.cpp | 2 +- windows/LightGBM.vcxproj.filters | 2 +- 42 files changed, 186 insertions(+), 177 deletions(-) diff --git a/.ci/install-clang-devel.sh b/.ci/install-clang-devel.sh index 138fd73bd052..3556fccaebd8 100755 --- a/.ci/install-clang-devel.sh +++ b/.ci/install-clang-devel.sh @@ -56,7 +56,7 @@ cp --remove-destination /usr/lib/llvm-${CLANG_VERSION}/bin/* /usr/bin/ # per https://www.stats.ox.ac.uk/pub/bdr/Rconfig/r-devel-linux-x86_64-fedora-clang # # clang was built to use libc++: for a version built to default to libstdc++ -# (as shipped by Fedora/Debian/Ubuntu), add -stdlib=libc++ to CXX +# (as shipped by Fedora/Debian/Ubuntu), add -stdlib=libc++ to CXX # and install the libcxx-devel/libc++-dev package. mkdir -p "${HOME}/.R" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8e1fac76e586..8ffb6a8f8f2f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,3 +1,4 @@ +# exclude files which are auto-generated by build tools exclude: | (?x)^( build| @@ -5,8 +6,17 @@ exclude: | lightgbm-python| lightgbm_r| )$ + |R-package/configure$ + |R-package/inst/Makevars$ + |R-package/inst/Makevars.win$ + |R-package/man/.*Rd$ repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace - repo: https://github.com/pycqa/isort rev: 5.13.2 hooks: diff --git a/R-package/LICENSE b/R-package/LICENSE index f8c67ce8d1d2..3c99fcc5b364 100644 --- a/R-package/LICENSE +++ b/R-package/LICENSE @@ -1,2 +1,2 @@ YEAR: 2016 -COPYRIGHT HOLDER: Microsoft Corporation \ No newline at end of file +COPYRIGHT HOLDER: Microsoft Corporation diff --git a/R-package/cran-comments.md b/R-package/cran-comments.md index 91b33f135e1a..404c471c1c16 100644 --- a/R-package/cran-comments.md +++ b/R-package/cran-comments.md @@ -14,7 +14,7 @@ warning was not fixed within 14 days. ```text /usr/local/clang-trunk/bin/../include/c++/v1/__fwd/string_view.h:22:41: warning: 'char_traits' is deprecated: -char_traits for T not equal to char, wchar_t, char8_t, char16_t or char32_t is non-standard and is provided for a temporary period. +char_traits for T not equal to char, wchar_t, char8_t, char16_t or char32_t is non-standard and is provided for a temporary period. It will be removed in LLVM 19, so please migrate off of it. [-Wdeprecated-declarations] ``` diff --git a/SECURITY.md b/SECURITY.md index 869fdfe2b246..f0c2dc14f4fc 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -14,7 +14,7 @@ Instead, please report them to the Microsoft Security Response Center (MSRC) at If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). -You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). +You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: diff --git a/build-cran-package.sh b/build-cran-package.sh index 9396450ed410..7ce5ab494638 100755 --- a/build-cran-package.sh +++ b/build-cran-package.sh @@ -4,7 +4,7 @@ # Prepare a source distribution of the R package # to be submitted to CRAN. # -# [arguments] +# [arguments] # # --r-executable Customize the R executable used by `R CMD build`. # Useful if building the R package in an environment with diff --git a/docs/Advanced-Topics.rst b/docs/Advanced-Topics.rst index 345a1361bfa9..b2eb0c3c9dc5 100644 --- a/docs/Advanced-Topics.rst +++ b/docs/Advanced-Topics.rst @@ -113,8 +113,8 @@ Unlike a categorical feature, however, ``positions`` are used to adjust the targ The position file corresponds with training data file line by line, and has one position per line. And if the name of training data file is ``train.txt``, the position file should be named as ``train.txt.position`` and placed in the same folder as the data file. In this case, LightGBM will load the position file automatically if it exists. The positions can also be specified through the ``Dataset`` constructor when using Python API. If the positions are specified in both approaches, the ``.position`` file will be ignored. -Currently, implemented is an approach to model position bias by using an idea of Generalized Additive Models (`GAM `_) to linearly decompose the document score ``s`` into the sum of a relevance component ``f`` and a positional component ``g``: ``s(x, pos) = f(x) + g(pos)`` where the former component depends on the original query-document features and the latter depends on the position of an item. -During the training, the compound scoring function ``s(x, pos)`` is fit with a standard ranking algorithm (e.g., LambdaMART) which boils down to jointly learning the relevance component ``f(x)`` (it is later returned as an unbiased model) and the position factors ``g(pos)`` that help better explain the observed (biased) labels. -Similar score decomposition ideas have previously been applied for classification & pointwise ranking tasks with assumptions of binary labels and binary relevance (a.k.a. "two-tower" models, refer to the papers: `Towards Disentangling Relevance and Bias in Unbiased Learning to Rank `_, `PAL: a position-bias aware learning framework for CTR prediction in live recommender systems `_, `A General Framework for Debiasing in CTR Prediction `_). -In LightGBM, we adapt this idea to general pairwise Lerarning-to-Rank with arbitrary ordinal relevance labels. +Currently, implemented is an approach to model position bias by using an idea of Generalized Additive Models (`GAM `_) to linearly decompose the document score ``s`` into the sum of a relevance component ``f`` and a positional component ``g``: ``s(x, pos) = f(x) + g(pos)`` where the former component depends on the original query-document features and the latter depends on the position of an item. +During the training, the compound scoring function ``s(x, pos)`` is fit with a standard ranking algorithm (e.g., LambdaMART) which boils down to jointly learning the relevance component ``f(x)`` (it is later returned as an unbiased model) and the position factors ``g(pos)`` that help better explain the observed (biased) labels. +Similar score decomposition ideas have previously been applied for classification & pointwise ranking tasks with assumptions of binary labels and binary relevance (a.k.a. "two-tower" models, refer to the papers: `Towards Disentangling Relevance and Bias in Unbiased Learning to Rank `_, `PAL: a position-bias aware learning framework for CTR prediction in live recommender systems `_, `A General Framework for Debiasing in CTR Prediction `_). +In LightGBM, we adapt this idea to general pairwise Lerarning-to-Rank with arbitrary ordinal relevance labels. Besides, GAMs have been used in the context of explainable ML (`Accurate Intelligible Models with Pairwise Interactions `_) to linearly decompose the contribution of each feature (and possibly their pairwise interactions) to the overall score, for subsequent analysis and interpretation of their effects in the trained models. diff --git a/docs/Features.rst b/docs/Features.rst index 89b56646588f..086c1f8cf07d 100644 --- a/docs/Features.rst +++ b/docs/Features.rst @@ -21,7 +21,7 @@ LightGBM uses histogram-based algorithms\ `[4, 5, 6] <#references>`__, which buc - To get one leaf's histograms in a binary tree, use the histogram subtraction of its parent and its neighbor - So it needs to construct histograms for only one leaf (with smaller ``#data`` than its neighbor). It then can get histograms of its neighbor by histogram subtraction with small cost (``O(#bins)``) - + - **Reduce memory usage** - Replaces continuous values with discrete bins. If ``#bins`` is small, can use small data type, e.g. uint8\_t, to store training data diff --git a/docs/GPU-Targets.rst b/docs/GPU-Targets.rst index ab024847d82d..1dd0077a77b8 100644 --- a/docs/GPU-Targets.rst +++ b/docs/GPU-Targets.rst @@ -107,7 +107,7 @@ Example of using GPU (``gpu_platform_id = 0`` and ``gpu_device_id = 0`` in our s [LightGBM] [Info] 40 dense feature groups (0.12 MB) transferred to GPU in 0.004211 secs. 76 sparse feature groups. [LightGBM] [Info] No further splits with positive gain, best gain: -inf [LightGBM] [Info] Trained a tree with leaves=16 and depth=8 - [1]: test's rmse:1.10643e-17 + [1]: test's rmse:1.10643e-17 [LightGBM] [Info] No further splits with positive gain, best gain: -inf [LightGBM] [Info] Trained a tree with leaves=7 and depth=5 [2]: test's rmse:0 @@ -145,11 +145,11 @@ Example of using CPU (``gpu_platform_id = 0``, ``gpu_device_id = 1``). The GPU d [LightGBM] [Info] 40 dense feature groups (0.12 MB) transferred to GPU in 0.004540 secs. 76 sparse feature groups. [LightGBM] [Info] No further splits with positive gain, best gain: -inf [LightGBM] [Info] Trained a tree with leaves=16 and depth=8 - [1]: test's rmse:1.10643e-17 + [1]: test's rmse:1.10643e-17 [LightGBM] [Info] No further splits with positive gain, best gain: -inf [LightGBM] [Info] Trained a tree with leaves=7 and depth=5 [2]: test's rmse:0 - + Known issues: diff --git a/docs/GPU-Tutorial.rst b/docs/GPU-Tutorial.rst index ee1d3173e556..ee6d4074f9b8 100644 --- a/docs/GPU-Tutorial.rst +++ b/docs/GPU-Tutorial.rst @@ -61,7 +61,7 @@ Now we are ready to checkout LightGBM and compile it with GPU support: cd LightGBM mkdir build cd build - cmake -DUSE_GPU=1 .. + cmake -DUSE_GPU=1 ..   # if you have installed NVIDIA CUDA to a customized location, you should specify paths to OpenCL headers and library like the following: # cmake -DUSE_GPU=1 -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/ .. make -j$(nproc) diff --git a/docs/Key-Events.md b/docs/Key-Events.md index 67b024f60fc3..2fb3d80d48c9 100644 --- a/docs/Key-Events.md +++ b/docs/Key-Events.md @@ -75,7 +75,7 @@ The list includes the commits where the major feature added is considered workin * 22/06/2017 [Microsoft/LightGBM@d862b3e](https://github.com/microsoft/LightGBM/pull/642): CIntegration: Travis OSX Support (Pull Request 642) * 20/06/2017 [Microsoft/LightGBM@80c641c](https://github.com/microsoft/LightGBM/pull/635): Release: Python pip package (Pull Request 635) * 18/06/2017 [Microsoft/LightGBM@4d2aa84](https://github.com/microsoft/LightGBM/pull/634): CIntegration: AppVeyor Support (Pull Request 634) -* 06/06/2017 [Microsoft/LightGBM@2c9ce59](https://github.com/microsoft/LightGBM/pull/592): Release: R-package version 0.2 (Pull Request 592) +* 06/06/2017 [Microsoft/LightGBM@2c9ce59](https://github.com/microsoft/LightGBM/pull/592): Release: R-package version 0.2 (Pull Request 592) * 05/06/2017 [Microsoft/LightGBM@f98d75f](https://github.com/microsoft/LightGBM/pull/584): Feature: Use custom compiler for R-package (Pull Request 584) * 29/05/2017 [Microsoft/LightGBM@993bbd5](https://github.com/microsoft/LightGBM/pull/559): Parameter: Early Stopping for predictions (Pull Request 559) * 26/05/2017 [Microsoft/LightGBM@3abff37](https://github.com/microsoft/LightGBM/commit/3abff370bb353293e4a03e516111dd02785fbd97): Feature: Parameter to disable missing values (Commit) diff --git a/docs/Makefile b/docs/Makefile index 8e5bdd752cc1..627e704c9b52 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -17,4 +17,4 @@ help: # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/images/artifacts-download.svg b/docs/_static/images/artifacts-download.svg index 669b5d62af5b..f6de09e29608 100644 --- a/docs/_static/images/artifacts-download.svg +++ b/docs/_static/images/artifacts-download.svg @@ -1 +1 @@ - artifactsartifactsdownloaddownload \ No newline at end of file + artifactsartifactsdownloaddownload diff --git a/docs/_static/images/artifacts-fetching.svg b/docs/_static/images/artifacts-fetching.svg index 627efeb354a7..d662b9f9ced0 100644 --- a/docs/_static/images/artifacts-fetching.svg +++ b/docs/_static/images/artifacts-fetching.svg @@ -1 +1 @@ - artifactsartifactsfetching...fetching... \ No newline at end of file + artifactsartifactsfetching...fetching... diff --git a/docs/_static/images/artifacts-not-available.svg b/docs/_static/images/artifacts-not-available.svg index e754558f674d..37ed0b77c33a 100644 --- a/docs/_static/images/artifacts-not-available.svg +++ b/docs/_static/images/artifacts-not-available.svg @@ -1 +1 @@ - artifactsartifactslink is available only on Read the Docs sitelink is available only on Read the Docs site \ No newline at end of file + artifactsartifactslink is available only on Read the Docs sitelink is available only on Read the Docs site diff --git a/docs/_static/images/dask-concat.svg b/docs/_static/images/dask-concat.svg index a230535d50c2..7e0ca1f3c814 100644 --- a/docs/_static/images/dask-concat.svg +++ b/docs/_static/images/dask-concat.svg @@ -1 +1 @@ - \ No newline at end of file + diff --git a/docs/_static/images/dask-initial-setup.svg b/docs/_static/images/dask-initial-setup.svg index 5ffe85b87397..ed18049a1478 100644 --- a/docs/_static/images/dask-initial-setup.svg +++ b/docs/_static/images/dask-initial-setup.svg @@ -1 +1 @@ - \ No newline at end of file + diff --git a/examples/README.md b/examples/README.md index 835bf7b645d9..3c16fcd6fbe7 100644 --- a/examples/README.md +++ b/examples/README.md @@ -34,7 +34,7 @@ Machine Learning Challenge Winning Solutions | 1st | [Google Analytics Customer Revenue Prediction](https://www.kaggle.com/c/ga-customer-revenue-prediction) | [link](https://www.kaggle.com/c/ga-customer-revenue-prediction/discussion/82614#latest-482575) | 2019.3 | | 1st | [VSB Power Line Fault Detection](https://www.kaggle.com/c/vsb-power-line-fault-detection) | [link](https://www.kaggle.com/c/vsb-power-line-fault-detection/discussion/87038#latest-521846) | 2019.3 | | 5th | [Elo Merchant Category Recommendation](https://www.kaggle.com/c/elo-merchant-category-recommendation) | [link](https://www.kaggle.com/c/elo-merchant-category-recommendation/discussion/82314#latest-525737) | 2019.2 | -| 2nd | [PLAsTiCC Astronomical Classification](https://www.kaggle.com/c/PLAsTiCC-2018) | [link](https://www.kaggle.com/c/PLAsTiCC-2018/discussion/75059#latest-462457) | 2018.12 | +| 2nd | [PLAsTiCC Astronomical Classification](https://www.kaggle.com/c/PLAsTiCC-2018) | [link](https://www.kaggle.com/c/PLAsTiCC-2018/discussion/75059#latest-462457) | 2018.12 | | 1st | [Google Research Doodle Recognition Challenge](https://www.kaggle.com/c/quickdraw-doodle-recognition) | [link](https://www.kaggle.com/c/quickdraw-doodle-recognition/discussion/73738#latest-550028) | 2018.12 | | 1st | [Home Credit Group Home Credit Default Risk](https://www.kaggle.com/c/home-credit-default-risk) | [link](https://www.kaggle.com/c/home-credit-default-risk/discussion/64480#latest-514514) | 2018.8 | | 2nd | [Home Credit Group Home Credit Default Risk](https://www.kaggle.com/c/home-credit-default-risk) | [link](https://www.kaggle.com/c/home-credit-default-risk/discussion/64722#latest-394948) | 2018.8 | @@ -45,11 +45,11 @@ Machine Learning Challenge Winning Solutions | 2nd | [Avito Demand Prediction Challenge](https://www.kaggle.com/c/avito-demand-prediction) | [link](https://www.kaggle.com/c/avito-demand-prediction/discussion/59871#latest-470807) | 2018.6 | | 3rd | [Avito Demand Prediction Challenge](https://www.kaggle.com/c/avito-demand-prediction) | [link](https://www.kaggle.com/c/avito-demand-prediction/discussion/59885#latest-364403) | 2018.6 | | 1st | [TalkingData AdTracking Fraud Detection Challenge](https://www.kaggle.com/c/talkingdata-adtracking-fraud-detection) | [link](https://www.kaggle.com/c/talkingdata-adtracking-fraud-detection/discussion/56475)| 2018.5 | -| 1st | [DonorsChoose.org Application Screening](https://www.kaggle.com/c/donorschoose-application-screening)| [link](https://www.kaggle.com/shadowwarrior/1st-place-solution/notebook) | 2018.4 | +| 1st | [DonorsChoose.org Application Screening](https://www.kaggle.com/c/donorschoose-application-screening)| [link](https://www.kaggle.com/shadowwarrior/1st-place-solution/notebook) | 2018.4 | | 1st | [Toxic Comment Classification Challenge](https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge)| [link](https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge/discussion/52557) | 2018.3 | | 1st | [Mercari Price Suggestion Challenge](https://www.kaggle.com/c/mercari-price-suggestion-challenge) | [link](https://www.kaggle.com/c/mercari-price-suggestion-challenge/discussion/50256) | 2018.2 | -| 1st | [IEEE's Signal Processing Society, Camera Model Identification](https://www.kaggle.com/c/sp-society-camera-model-identification)| [link](https://www.kaggle.com/c/sp-society-camera-model-identification/discussion/49367) | 2018.2 | -| 1st | [Recruit Restaurant Visitor Forecasting](https://www.kaggle.com/c/recruit-restaurant-visitor-forecasting) | [link](https://www.kaggle.com/pureheart/1st-place-lgb-model-public-0-470-private-0-502/comments) | 2018.2| +| 1st | [IEEE's Signal Processing Society, Camera Model Identification](https://www.kaggle.com/c/sp-society-camera-model-identification)| [link](https://www.kaggle.com/c/sp-society-camera-model-identification/discussion/49367) | 2018.2 | +| 1st | [Recruit Restaurant Visitor Forecasting](https://www.kaggle.com/c/recruit-restaurant-visitor-forecasting) | [link](https://www.kaggle.com/pureheart/1st-place-lgb-model-public-0-470-private-0-502/comments) | 2018.2| | 1st | [WSDM CUP 2018 - KKBox's Music Recommendation Challenge](https://www.kaggle.com/c/kkbox-music-recommendation-challenge) | [link](https://www.kaggle.com/c/kkbox-music-recommendation-challenge/discussion/45942) | 2017.12 | | 1st | [Porto Seguro’s Safe Driver Prediction](https://www.kaggle.com/c/porto-seguro-safe-driver-prediction) | [link](https://www.kaggle.com/c/porto-seguro-safe-driver-prediction/discussion/44629) |2017.11 | | 1st | [Quora Question Pairs](https://www.kaggle.com/c/quora-question-pairs) | [link](https://www.kaggle.com/c/quora-question-pairs/discussion/34355) | 2017.6 | diff --git a/examples/binary_classification/train.conf b/examples/binary_classification/train.conf index f1ae6af54c1f..66b3b4df3bc4 100644 --- a/examples/binary_classification/train.conf +++ b/examples/binary_classification/train.conf @@ -12,10 +12,10 @@ boosting_type = gbdt objective = binary # eval metrics, support multi metric, delimited by ',' , support following metrics -# l1 +# l1 # l2 , default metric for regression # ndcg , default metric for lambdarank -# auc +# auc # binary_logloss , default metric for binary # binary_error metric = binary_logloss,auc @@ -29,7 +29,7 @@ is_training_metric = true # column in data to use as label label_column = 0 -# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. +# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. max_bin = 255 # training data @@ -39,7 +39,7 @@ data = binary.train # validation data, support multi validation data, separated by ',' # if existing weight file, should name to "binary.test.weight" -# alias: valid, test, test_data, +# alias: valid, test, test_data, valid_data = binary.test # number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds @@ -59,10 +59,10 @@ num_leaves = 63 # alias: tree tree_learner = serial -# number of threads for multi-threading. One thread will use each CPU. The default is the CPU count. +# number of threads for multi-threading. One thread will use each CPU. The default is the CPU count. # num_threads = 8 -# feature sub-sample, will random select 80% feature to train on each iteration +# feature sub-sample, will random select 80% feature to train on each iteration # alias: sub_feature feature_fraction = 0.8 diff --git a/examples/binary_classification/train_linear.conf b/examples/binary_classification/train_linear.conf index 4914a2c0bedb..05ae61bc3ad8 100644 --- a/examples/binary_classification/train_linear.conf +++ b/examples/binary_classification/train_linear.conf @@ -14,10 +14,10 @@ objective = binary linear_tree = true # eval metrics, support multi metric, delimited by ',' , support following metrics -# l1 +# l1 # l2 , default metric for regression # ndcg , default metric for lambdarank -# auc +# auc # binary_logloss , default metric for binary # binary_error metric = binary_logloss,auc @@ -28,7 +28,7 @@ metric_freq = 1 # true if need output metric for training data, alias: tranining_metric, train_metric is_training_metric = true -# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. +# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. max_bin = 255 # training data @@ -38,7 +38,7 @@ data = binary.train # validation data, support multi validation data, separated by ',' # if existing weight file, should name to "binary.test.weight" -# alias: valid, test, test_data, +# alias: valid, test, test_data, valid_data = binary.test # number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds @@ -58,10 +58,10 @@ num_leaves = 63 # alias: tree tree_learner = serial -# number of threads for multi-threading. One thread will use each CPU. The default is set to CPU count. +# number of threads for multi-threading. One thread will use each CPU. The default is set to CPU count. # num_threads = 8 -# feature sub-sample, will random select 80% feature to train on each iteration +# feature sub-sample, will random select 80% feature to train on each iteration # alias: sub_feature feature_fraction = 0.8 diff --git a/examples/lambdarank/README.md b/examples/lambdarank/README.md index 7223cd751d12..2ec7afcfe2fd 100644 --- a/examples/lambdarank/README.md +++ b/examples/lambdarank/README.md @@ -29,5 +29,5 @@ Run the following command in this folder: Data Format ----------- -To learn more about the query format used in this example, check out the +To learn more about the query format used in this example, check out the [query data format](https://lightgbm.readthedocs.io/en/latest/Parameters.html#query-data). diff --git a/examples/lambdarank/train.conf b/examples/lambdarank/train.conf index d2b7825be7ec..2aa2113b40d4 100644 --- a/examples/lambdarank/train.conf +++ b/examples/lambdarank/train.conf @@ -12,10 +12,10 @@ boosting_type = gbdt objective = lambdarank # eval metrics, support multi metric, delimited by ',' , support following metrics -# l1 +# l1 # l2 , default metric for regression # ndcg , default metric for lambdarank -# auc +# auc # binary_logloss , default metric for binary # binary_error metric = ndcg @@ -32,7 +32,7 @@ is_training_metric = true # column in data to use as label label_column = 0 -# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. +# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. max_bin = 255 # training data @@ -44,7 +44,7 @@ data = rank.train # validation data, support multi validation data, separated by ',' # if existing weight file, should name to "rank.test.weight" # if existing query file, should name to "rank.test.query" -# alias: valid, test, test_data, +# alias: valid, test, test_data, valid_data = rank.test # number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds @@ -64,10 +64,10 @@ num_leaves = 31 # alias: tree tree_learner = serial -# number of threads for multi-threading. One thread will use one CPU, defalut is setted to #cpu. +# number of threads for multi-threading. One thread will use one CPU, defalut is setted to #cpu. # num_threads = 8 -# feature sub-sample, will random select 80% feature to train on each iteration +# feature sub-sample, will random select 80% feature to train on each iteration # alias: sub_feature feature_fraction = 1.0 diff --git a/examples/multiclass_classification/train.conf b/examples/multiclass_classification/train.conf index 8d2bf28733b5..b432756f0e7d 100644 --- a/examples/multiclass_classification/train.conf +++ b/examples/multiclass_classification/train.conf @@ -13,10 +13,10 @@ boosting_type = gbdt objective = multiclass # eval metrics, support multi metric, delimited by ',' , support following metrics -# l1 +# l1 # l2 , default metric for regression # ndcg , default metric for lambdarank -# auc +# auc # binary_logloss , default metric for binary # binary_error # multi_logloss @@ -44,7 +44,7 @@ is_training_metric = true # column in data to use as label label_column = 0 -# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. +# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. max_bin = 255 # training data diff --git a/examples/parallel_learning/train.conf b/examples/parallel_learning/train.conf index 3b40778fa50f..bee2b2df86e2 100644 --- a/examples/parallel_learning/train.conf +++ b/examples/parallel_learning/train.conf @@ -12,10 +12,10 @@ boosting_type = gbdt objective = binary # eval metrics, support multi metric, delimite by ',' , support following metrics -# l1 +# l1 # l2 , default metric for regression # ndcg , default metric for lambdarank -# auc +# auc # binary_logloss , default metric for binary # binary_error metric = binary_logloss,auc @@ -29,7 +29,7 @@ is_training_metric = true # column in data to use as label label_column = 0 -# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. +# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. max_bin = 255 # training data @@ -39,7 +39,7 @@ data = binary.train # validation data, support multi validation data, separated by ',' # if existing weight file, should name to "binary.test.weight" -# alias: valid, test, test_data, +# alias: valid, test, test_data, valid_data = binary.test # number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds @@ -62,7 +62,7 @@ tree_learner = feature # number of threads for multi-threading. One thread will use each CPU. The default is the CPU count. # num_threads = 8 -# feature sub-sample, will random select 80% feature to train on each iteration +# feature sub-sample, will random select 80% feature to train on each iteration # alias: sub_feature feature_fraction = 0.8 diff --git a/examples/python-guide/README.md b/examples/python-guide/README.md index b34e0fa6e002..ea4d2ce0b446 100644 --- a/examples/python-guide/README.md +++ b/examples/python-guide/README.md @@ -23,11 +23,11 @@ Examples include: - [simple_example.py](https://github.com/microsoft/LightGBM/blob/master/examples/python-guide/simple_example.py) - Construct Dataset - Basic train and predict - - Eval during training + - Eval during training - Early stopping - Save model to file - [sklearn_example.py](https://github.com/microsoft/LightGBM/blob/master/examples/python-guide/sklearn_example.py) - - Create data for learning with sklearn interface + - Create data for learning with sklearn interface - Basic train and predict with sklearn interface - Feature importances with sklearn interface - Self-defined eval metric with sklearn interface diff --git a/examples/regression/train.conf b/examples/regression/train.conf index ce25d0ecae47..cd910af61dcf 100644 --- a/examples/regression/train.conf +++ b/examples/regression/train.conf @@ -12,10 +12,10 @@ boosting_type = gbdt objective = regression # eval metrics, support multi metric, delimite by ',' , support following metrics -# l1 +# l1 # l2 , default metric for regression # ndcg , default metric for lambdarank -# auc +# auc # binary_logloss , default metric for binary # binary_error metric = l2 @@ -29,7 +29,7 @@ is_training_metric = true # column in data to use as label label_column = 0 -# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. +# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. max_bin = 255 # forced bin thresholds @@ -42,7 +42,7 @@ data = regression.train # validation data, support multi validation data, separated by ',' # if exsting weight file, should name to "regression.test.weight" -# alias: valid, test, test_data, +# alias: valid, test, test_data, valid_data = regression.test # number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds @@ -62,10 +62,10 @@ num_leaves = 31 # alias: tree tree_learner = serial -# number of threads for multi-threading. One thread will use one CPU, default is setted to #cpu. +# number of threads for multi-threading. One thread will use one CPU, default is setted to #cpu. # num_threads = 8 -# feature sub-sample, will random select 80% feature to train on each iteration +# feature sub-sample, will random select 80% feature to train on each iteration # alias: sub_feature feature_fraction = 0.9 diff --git a/examples/xendcg/README.md b/examples/xendcg/README.md index f60d44614dad..122817d27408 100644 --- a/examples/xendcg/README.md +++ b/examples/xendcg/README.md @@ -29,5 +29,5 @@ Run the following command in this folder: Data Format ----------- -To learn more about the query format used in this example, check out the +To learn more about the query format used in this example, check out the [query data format](https://lightgbm.readthedocs.io/en/latest/Parameters.html#query-data). diff --git a/examples/xendcg/train.conf b/examples/xendcg/train.conf index 0dafb931d319..2b8777c5e45e 100644 --- a/examples/xendcg/train.conf +++ b/examples/xendcg/train.conf @@ -12,10 +12,10 @@ boosting_type = gbdt objective = rank_xendcg # eval metrics, support multi metric, delimite by ',' , support following metrics -# l1 +# l1 # l2 , default metric for regression # ndcg , default metric for lambdarank -# auc +# auc # binary_logloss , default metric for binary # binary_error metric = ndcg @@ -32,7 +32,7 @@ is_training_metric = true # column in data to use as label label_column = 0 -# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. +# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. max_bin = 255 # training data @@ -44,7 +44,7 @@ data = rank.train # validation data, support multi validation data, separated by ',' # if existing weight file, should name to "rank.test.weight" # if existing query file, should name to "rank.test.query" -# alias: valid, test, test_data, +# alias: valid, test, test_data, valid_data = rank.test # number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds @@ -68,7 +68,7 @@ tree_learner = serial num_threads = 1 objective_seed = 1025 -# feature sub-sample, will random select 80% feature to train on each iteration +# feature sub-sample, will random select 80% feature to train on each iteration # alias: sub_feature feature_fraction = 1.0 diff --git a/include/LightGBM/bin.h b/include/LightGBM/bin.h index de1bb6eb94ed..a33fcfa9c45c 100644 --- a/include/LightGBM/bin.h +++ b/include/LightGBM/bin.h @@ -144,7 +144,7 @@ class BinMapper { /*! * \brief Maximum categorical value - * \return Maximum categorical value for categorical features, 0 for numerical features + * \return Maximum categorical value for categorical features, 0 for numerical features */ inline int MaxCatValue() const { if (bin_2_categorical_.size() == 0) { diff --git a/include/LightGBM/network.h b/include/LightGBM/network.h index 87048b2062e6..1bece836923b 100644 --- a/include/LightGBM/network.h +++ b/include/LightGBM/network.h @@ -128,7 +128,7 @@ class Network { const ReduceFunction& reducer); /*! - * \brief Performing all_gather by using Bruck algorithm. + * \brief Performing all_gather by using Bruck algorithm. Communication times is O(log(n)), and communication cost is O(send_size * number_machine) * It can be used when all nodes have same input size. * \param input Input data @@ -138,7 +138,7 @@ class Network { static void Allgather(char* input, comm_size_t send_size, char* output); /*! - * \brief Performing all_gather by using Bruck algorithm. + * \brief Performing all_gather by using Bruck algorithm. Communication times is O(log(n)), and communication cost is O(all_size) * It can be used when nodes have different input size. * \param input Input data @@ -150,7 +150,7 @@ class Network { static void Allgather(char* input, const comm_size_t* block_start, const comm_size_t* block_len, char* output, comm_size_t all_size); /*! - * \brief Perform reduce scatter by using recursive halving algorithm. + * \brief Perform reduce scatter by using recursive halving algorithm. Communication times is O(log(n)), and communication cost is O(input_size) * \param input Input data * \param input_size The size of input data diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h index f1b5a10b5a69..f38375fb7370 100644 --- a/include/LightGBM/utils/common.h +++ b/include/LightGBM/utils/common.h @@ -1232,7 +1232,7 @@ struct __TToStringHelper { * Converts an array to a string with with values separated by the space character. * This method replaces Common's ``ArrayToString`` and ``ArrayToStringFast`` functionality * and is locale-independent. -* +* * \note If ``high_precision_output`` is set to true, * floating point values are output with more digits of precision. */ diff --git a/pmml/README.md b/pmml/README.md index d69996c8efd2..759340b61118 100644 --- a/pmml/README.md +++ b/pmml/README.md @@ -1,4 +1,4 @@ -PMML Generator +PMML Generator ============== The old Python convert script is removed due to it cannot support the new format of categorical features. diff --git a/src/c_api.cpp b/src/c_api.cpp index 67a6d05b75a7..76f6160471e8 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -107,7 +107,7 @@ class SingleRowPredictorInner { /*! * \brief Object to store resources meant for single-row Fast Predict methods. - * + * * For legacy reasons this is called `FastConfig` in the public C API. * * Meant to be used by the *Fast* predict methods only. diff --git a/src/treelearner/kernels/histogram_16_64_256.hu b/src/treelearner/kernels/histogram_16_64_256.hu index 8e3d3a5ec782..f72b7465783d 100644 --- a/src/treelearner/kernels/histogram_16_64_256.hu +++ b/src/treelearner/kernels/histogram_16_64_256.hu @@ -25,36 +25,36 @@ typedef unsigned char uchar; template __device__ double as_double(const T t) { static_assert(sizeof(T) == sizeof(double), "size mismatch"); - double d; - memcpy(&d, &t, sizeof(T)); + double d; + memcpy(&d, &t, sizeof(T)); return d; } template __device__ unsigned long long as_ulong_ulong(const T t) { static_assert(sizeof(T) == sizeof(unsigned long long), "size mismatch"); - unsigned long long u; - memcpy(&u, &t, sizeof(T)); + unsigned long long u; + memcpy(&u, &t, sizeof(T)); return u; } template __device__ float as_float(const T t) { static_assert(sizeof(T) == sizeof(float), "size mismatch"); - float f; - memcpy(&f, &t, sizeof(T)); + float f; + memcpy(&f, &t, sizeof(T)); return f; } template __device__ unsigned int as_uint(const T t) { static_assert(sizeof(T) == sizeof(unsigned int), "size_mismatch"); - unsigned int u; - memcpy(&u, &t, sizeof(T)); + unsigned int u; + memcpy(&u, &t, sizeof(T)); return u; } template __device__ uchar4 as_uchar4(const T t) { static_assert(sizeof(T) == sizeof(uchar4), "size mismatch"); - uchar4 u; - memcpy(&u, &t, sizeof(T)); + uchar4 u; + memcpy(&u, &t, sizeof(T)); return u; } @@ -158,4 +158,3 @@ DECLARE(histogram256); } // namespace LightGBM #endif // LIGHTGBM_TREELEARNER_KERNELS_HISTOGRAM_16_64_256_HU_ - diff --git a/src/treelearner/leaf_splits.hpp b/src/treelearner/leaf_splits.hpp index 2db71573e97e..77cf9decfadf 100644 --- a/src/treelearner/leaf_splits.hpp +++ b/src/treelearner/leaf_splits.hpp @@ -38,7 +38,7 @@ class LeafSplits { } /*! - * \brief Init split on current leaf on partial data. + * \brief Init split on current leaf on partial data. * \param leaf Index of current leaf * \param data_partition current data partition * \param sum_gradients @@ -54,7 +54,7 @@ class LeafSplits { } /*! - * \brief Init split on current leaf on partial data. + * \brief Init split on current leaf on partial data. * \param leaf Index of current leaf * \param data_partition current data partition * \param sum_gradients diff --git a/src/treelearner/ocl/histogram16.cl b/src/treelearner/ocl/histogram16.cl index 0c77150b87e6..21624ec9ee10 100644 --- a/src/treelearner/ocl/histogram16.cl +++ b/src/treelearner/ocl/histogram16.cl @@ -73,12 +73,12 @@ typedef uint acc_int_type; // local memory size in bytes #define LOCAL_MEM_SIZE (DWORD_FEATURES * (sizeof(uint) + 2 * sizeof(acc_type)) * NUM_BINS * NUM_BANKS) -// unroll the atomic operation for a few times. Takes more code space, +// unroll the atomic operation for a few times. Takes more code space, // but compiler can generate better code for faster atomics. #define UNROLL_ATOMIC 1 // Options passed by compiler at run time: -// IGNORE_INDICES will be set when the kernel does not +// IGNORE_INDICES will be set when the kernel does not // #define IGNORE_INDICES // #define POWER_FEATURE_WORKGROUPS 10 @@ -161,7 +161,7 @@ R""() // this function will be called by histogram16 // we have one sub-histogram of one feature in registers, and need to read others void within_kernel_reduction16x8(uchar8 feature_mask, - __global const acc_type* restrict feature4_sub_hist, + __global const acc_type* restrict feature4_sub_hist, const uint skip_id, acc_type stat_val, const ushort num_sub_hist, @@ -173,7 +173,7 @@ void within_kernel_reduction16x8(uchar8 feature_mask, uchar is_hessian_first = (ltid >> LOG2_DWORD_FEATURES) & 1; // hessian or gradient ushort bin_id = ltid >> (LOG2_DWORD_FEATURES + 1); // range 0 - 16 ushort i; - #if POWER_FEATURE_WORKGROUPS != 0 + #if POWER_FEATURE_WORKGROUPS != 0 // if there is only 1 work group, no need to do the reduction // add all sub-histograms for 4 features __global const acc_type* restrict p = feature4_sub_hist + ltid; @@ -185,7 +185,7 @@ void within_kernel_reduction16x8(uchar8 feature_mask, // skip the counters we already have p += 2 * DWORD_FEATURES * NUM_BINS; for (i = i + 1; i < num_sub_hist; ++i) { - stat_val += *p; + stat_val += *p; p += NUM_BINS * DWORD_FEATURES * 2; } #endif @@ -208,12 +208,12 @@ R""() __attribute__((reqd_work_group_size(LOCAL_SIZE_0, 1, 1))) #if USE_CONSTANT_BUF == 1 -__kernel void histogram16(__global const uchar4* restrict feature_data_base, +__kernel void histogram16(__global const uchar4* restrict feature_data_base, __constant const uchar8* restrict feature_masks __attribute__((max_constant_size(65536))), const data_size_t feature_size, - __constant const data_size_t* restrict data_indices __attribute__((max_constant_size(65536))), - const data_size_t num_data, - __constant const score_t* restrict ordered_gradients __attribute__((max_constant_size(65536))), + __constant const data_size_t* restrict data_indices __attribute__((max_constant_size(65536))), + const data_size_t num_data, + __constant const score_t* restrict ordered_gradients __attribute__((max_constant_size(65536))), #if CONST_HESSIAN == 0 __constant const score_t* restrict ordered_hessians __attribute__((max_constant_size(65536))), #else @@ -223,18 +223,18 @@ __kernel void histogram16(__global const uchar4* restrict feature_data_base, __global volatile int * sync_counters, __global acc_type* restrict hist_buf_base) { #else -__kernel void histogram16(__global const uchar4* feature_data_base, +__kernel void histogram16(__global const uchar4* feature_data_base, __constant const uchar8* restrict feature_masks __attribute__((max_constant_size(65536))), const data_size_t feature_size, - __global const data_size_t* data_indices, - const data_size_t num_data, - __global const score_t* ordered_gradients, + __global const data_size_t* data_indices, + const data_size_t num_data, + __global const score_t* ordered_gradients, #if CONST_HESSIAN == 0 __global const score_t* ordered_hessians, #else const score_t const_hessian, #endif - __global char* restrict output_buf, + __global char* restrict output_buf, __global volatile int * sync_counters, __global acc_type* restrict hist_buf_base) { #endif @@ -260,38 +260,38 @@ __kernel void histogram16(__global const uchar4* feature_data_base, // there are 8 banks (sub-histograms) used by 256 threads total 8 KB /* memory layout of gh_hist: ----------------------------------------------------------------------------------------------- - bk0_g_f0_bin0 bk0_g_f1_bin0 bk0_g_f2_bin0 bk0_g_f3_bin0 bk0_g_f4_bin0 bk0_g_f5_bin0 bk0_g_f6_bin0 bk0_g_f7_bin0 + bk0_g_f0_bin0 bk0_g_f1_bin0 bk0_g_f2_bin0 bk0_g_f3_bin0 bk0_g_f4_bin0 bk0_g_f5_bin0 bk0_g_f6_bin0 bk0_g_f7_bin0 bk0_h_f0_bin0 bk0_h_f1_bin0 bk0_h_f2_bin0 bk0_h_f3_bin0 bk0_h_f4_bin0 bk0_h_f5_bin0 bk0_h_f6_bin0 bk0_h_f7_bin0 - bk1_g_f0_bin0 bk1_g_f1_bin0 bk1_g_f2_bin0 bk1_g_f3_bin0 bk1_g_f4_bin0 bk1_g_f5_bin0 bk1_g_f6_bin0 bk1_g_f7_bin0 + bk1_g_f0_bin0 bk1_g_f1_bin0 bk1_g_f2_bin0 bk1_g_f3_bin0 bk1_g_f4_bin0 bk1_g_f5_bin0 bk1_g_f6_bin0 bk1_g_f7_bin0 bk1_h_f0_bin0 bk1_h_f1_bin0 bk1_h_f2_bin0 bk1_h_f3_bin0 bk1_h_f4_bin0 bk1_h_f5_bin0 bk1_h_f6_bin0 bk1_h_f7_bin0 - bk2_g_f0_bin0 bk2_g_f1_bin0 bk2_g_f2_bin0 bk2_g_f3_bin0 bk2_g_f4_bin0 bk2_g_f5_bin0 bk2_g_f6_bin0 bk2_g_f7_bin0 + bk2_g_f0_bin0 bk2_g_f1_bin0 bk2_g_f2_bin0 bk2_g_f3_bin0 bk2_g_f4_bin0 bk2_g_f5_bin0 bk2_g_f6_bin0 bk2_g_f7_bin0 bk2_h_f0_bin0 bk2_h_f1_bin0 bk2_h_f2_bin0 bk2_h_f3_bin0 bk2_h_f4_bin0 bk2_h_f5_bin0 bk2_h_f6_bin0 bk2_h_f7_bin0 - bk3_g_f0_bin0 bk3_g_f1_bin0 bk3_g_f2_bin0 bk3_g_f3_bin0 bk3_g_f4_bin0 bk3_g_f5_bin0 bk3_g_f6_bin0 bk3_g_f7_bin0 + bk3_g_f0_bin0 bk3_g_f1_bin0 bk3_g_f2_bin0 bk3_g_f3_bin0 bk3_g_f4_bin0 bk3_g_f5_bin0 bk3_g_f6_bin0 bk3_g_f7_bin0 bk3_h_f0_bin0 bk3_h_f1_bin0 bk3_h_f2_bin0 bk3_h_f3_bin0 bk3_h_f4_bin0 bk3_h_f5_bin0 bk3_h_f6_bin0 bk3_h_f7_bin0 - bk4_g_f0_bin0 bk4_g_f1_bin0 bk4_g_f2_bin0 bk4_g_f3_bin0 bk4_g_f4_bin0 bk4_g_f5_bin0 bk4_g_f6_bin0 bk4_g_f7_bin0 + bk4_g_f0_bin0 bk4_g_f1_bin0 bk4_g_f2_bin0 bk4_g_f3_bin0 bk4_g_f4_bin0 bk4_g_f5_bin0 bk4_g_f6_bin0 bk4_g_f7_bin0 bk4_h_f0_bin0 bk4_h_f1_bin0 bk4_h_f2_bin0 bk4_h_f3_bin0 bk4_h_f4_bin0 bk4_h_f5_bin0 bk4_h_f6_bin0 bk4_h_f7_bin0 - bk5_g_f0_bin0 bk5_g_f1_bin0 bk5_g_f2_bin0 bk5_g_f3_bin0 bk5_g_f4_bin0 bk5_g_f5_bin0 bk5_g_f6_bin0 bk5_g_f7_bin0 + bk5_g_f0_bin0 bk5_g_f1_bin0 bk5_g_f2_bin0 bk5_g_f3_bin0 bk5_g_f4_bin0 bk5_g_f5_bin0 bk5_g_f6_bin0 bk5_g_f7_bin0 bk5_h_f0_bin0 bk5_h_f1_bin0 bk5_h_f2_bin0 bk5_h_f3_bin0 bk5_h_f4_bin0 bk5_h_f5_bin0 bk5_h_f6_bin0 bk5_h_f7_bin0 - bk6_g_f0_bin0 bk6_g_f1_bin0 bk6_g_f2_bin0 bk6_g_f3_bin0 bk6_g_f4_bin0 bk6_g_f5_bin0 bk6_g_f6_bin0 bk6_g_f7_bin0 + bk6_g_f0_bin0 bk6_g_f1_bin0 bk6_g_f2_bin0 bk6_g_f3_bin0 bk6_g_f4_bin0 bk6_g_f5_bin0 bk6_g_f6_bin0 bk6_g_f7_bin0 bk6_h_f0_bin0 bk6_h_f1_bin0 bk6_h_f2_bin0 bk6_h_f3_bin0 bk6_h_f4_bin0 bk6_h_f5_bin0 bk6_h_f6_bin0 bk6_h_f7_bin0 - bk7_g_f0_bin0 bk7_g_f1_bin0 bk7_g_f2_bin0 bk7_g_f3_bin0 bk7_g_f4_bin0 bk7_g_f5_bin0 bk7_g_f6_bin0 bk7_g_f7_bin0 + bk7_g_f0_bin0 bk7_g_f1_bin0 bk7_g_f2_bin0 bk7_g_f3_bin0 bk7_g_f4_bin0 bk7_g_f5_bin0 bk7_g_f6_bin0 bk7_g_f7_bin0 bk7_h_f0_bin0 bk7_h_f1_bin0 bk7_h_f2_bin0 bk7_h_f3_bin0 bk7_h_f4_bin0 bk7_h_f5_bin0 bk7_h_f6_bin0 bk7_h_f7_bin0 ... - bk0_g_f0_bin16 bk0_g_f1_bin16 bk0_g_f2_bin16 bk0_g_f3_bin16 bk0_g_f4_bin16 bk0_g_f5_bin16 bk0_g_f6_bin16 bk0_g_f7_bin16 + bk0_g_f0_bin16 bk0_g_f1_bin16 bk0_g_f2_bin16 bk0_g_f3_bin16 bk0_g_f4_bin16 bk0_g_f5_bin16 bk0_g_f6_bin16 bk0_g_f7_bin16 bk0_h_f0_bin16 bk0_h_f1_bin16 bk0_h_f2_bin16 bk0_h_f3_bin16 bk0_h_f4_bin16 bk0_h_f5_bin16 bk0_h_f6_bin16 bk0_h_f7_bin16 - bk1_g_f0_bin16 bk1_g_f1_bin16 bk1_g_f2_bin16 bk1_g_f3_bin16 bk1_g_f4_bin16 bk1_g_f5_bin16 bk1_g_f6_bin16 bk1_g_f7_bin16 + bk1_g_f0_bin16 bk1_g_f1_bin16 bk1_g_f2_bin16 bk1_g_f3_bin16 bk1_g_f4_bin16 bk1_g_f5_bin16 bk1_g_f6_bin16 bk1_g_f7_bin16 bk1_h_f0_bin16 bk1_h_f1_bin16 bk1_h_f2_bin16 bk1_h_f3_bin16 bk1_h_f4_bin16 bk1_h_f5_bin16 bk1_h_f6_bin16 bk1_h_f7_bin16 - bk2_g_f0_bin16 bk2_g_f1_bin16 bk2_g_f2_bin16 bk2_g_f3_bin16 bk2_g_f4_bin16 bk2_g_f5_bin16 bk2_g_f6_bin16 bk2_g_f7_bin16 + bk2_g_f0_bin16 bk2_g_f1_bin16 bk2_g_f2_bin16 bk2_g_f3_bin16 bk2_g_f4_bin16 bk2_g_f5_bin16 bk2_g_f6_bin16 bk2_g_f7_bin16 bk2_h_f0_bin16 bk2_h_f1_bin16 bk2_h_f2_bin16 bk2_h_f3_bin16 bk2_h_f4_bin16 bk2_h_f5_bin16 bk2_h_f6_bin16 bk2_h_f7_bin16 - bk3_g_f0_bin16 bk3_g_f1_bin16 bk3_g_f2_bin16 bk3_g_f3_bin16 bk3_g_f4_bin16 bk3_g_f5_bin16 bk3_g_f6_bin16 bk3_g_f7_bin16 + bk3_g_f0_bin16 bk3_g_f1_bin16 bk3_g_f2_bin16 bk3_g_f3_bin16 bk3_g_f4_bin16 bk3_g_f5_bin16 bk3_g_f6_bin16 bk3_g_f7_bin16 bk3_h_f0_bin16 bk3_h_f1_bin16 bk3_h_f2_bin16 bk3_h_f3_bin16 bk3_h_f4_bin16 bk3_h_f5_bin16 bk3_h_f6_bin16 bk3_h_f7_bin16 - bk4_g_f0_bin16 bk4_g_f1_bin16 bk4_g_f2_bin16 bk4_g_f3_bin16 bk4_g_f4_bin16 bk4_g_f5_bin16 bk4_g_f6_bin16 bk4_g_f7_bin16 + bk4_g_f0_bin16 bk4_g_f1_bin16 bk4_g_f2_bin16 bk4_g_f3_bin16 bk4_g_f4_bin16 bk4_g_f5_bin16 bk4_g_f6_bin16 bk4_g_f7_bin16 bk4_h_f0_bin16 bk4_h_f1_bin16 bk4_h_f2_bin16 bk4_h_f3_bin16 bk4_h_f4_bin16 bk4_h_f5_bin16 bk4_h_f6_bin16 bk4_h_f7_bin16 - bk5_g_f0_bin16 bk5_g_f1_bin16 bk5_g_f2_bin16 bk5_g_f3_bin16 bk5_g_f4_bin16 bk5_g_f5_bin16 bk5_g_f6_bin16 bk5_g_f7_bin16 + bk5_g_f0_bin16 bk5_g_f1_bin16 bk5_g_f2_bin16 bk5_g_f3_bin16 bk5_g_f4_bin16 bk5_g_f5_bin16 bk5_g_f6_bin16 bk5_g_f7_bin16 bk5_h_f0_bin16 bk5_h_f1_bin16 bk5_h_f2_bin16 bk5_h_f3_bin16 bk5_h_f4_bin16 bk5_h_f5_bin16 bk5_h_f6_bin16 bk5_h_f7_bin16 - bk6_g_f0_bin16 bk6_g_f1_bin16 bk6_g_f2_bin16 bk6_g_f3_bin16 bk6_g_f4_bin16 bk6_g_f5_bin16 bk6_g_f6_bin16 bk6_g_f7_bin16 + bk6_g_f0_bin16 bk6_g_f1_bin16 bk6_g_f2_bin16 bk6_g_f3_bin16 bk6_g_f4_bin16 bk6_g_f5_bin16 bk6_g_f6_bin16 bk6_g_f7_bin16 bk6_h_f0_bin16 bk6_h_f1_bin16 bk6_h_f2_bin16 bk6_h_f3_bin16 bk6_h_f4_bin16 bk6_h_f5_bin16 bk6_h_f6_bin16 bk6_h_f7_bin16 - bk7_g_f0_bin16 bk7_g_f1_bin16 bk7_g_f2_bin16 bk7_g_f3_bin16 bk7_g_f4_bin16 bk7_g_f5_bin16 bk7_g_f6_bin16 bk7_g_f7_bin16 + bk7_g_f0_bin16 bk7_g_f1_bin16 bk7_g_f2_bin16 bk7_g_f3_bin16 bk7_g_f4_bin16 bk7_g_f5_bin16 bk7_g_f6_bin16 bk7_g_f7_bin16 bk7_h_f0_bin16 bk7_h_f1_bin16 bk7_h_f2_bin16 bk7_h_f3_bin16 bk7_h_f4_bin16 bk7_h_f5_bin16 bk7_h_f6_bin16 bk7_h_f7_bin16 ----------------------------------------------------------------------------------------------- */ @@ -333,7 +333,7 @@ __kernel void histogram16(__global const uchar4* feature_data_base, uchar is_hessian_first = (ltid >> LOG2_DWORD_FEATURES) & 1; // thread 0-15 write result to bank0, 16-31 to bank1, 32-47 to bank2, 48-63 to bank3, etc ushort bank = (ltid >> (LOG2_DWORD_FEATURES + 1)) & BANK_MASK; - + ushort group_feature = group_id >> POWER_FEATURE_WORKGROUPS; // each 2^POWER_FEATURE_WORKGROUPS workgroups process on one feature (compile-time constant) // feature_size is the number of examples per feature @@ -615,12 +615,12 @@ R""() )"" R""() */ - + #if ENABLE_ALL_FEATURES == 0 // restore feature_mask feature_mask = feature_masks[group_feature]; #endif - + // now reduce the 4 banks of subhistograms into 1 acc_type stat_val = 0.0f; uint cnt_val = 0; @@ -644,7 +644,7 @@ R""() } } #endif - + // now thread 0 - 7 holds feature 0 - 7's gradient for bin 0 and counter bin 0 // now thread 8 - 15 holds feature 0 - 7's hessian for bin 0 and counter bin 1 // now thread 16- 23 holds feature 0 - 7's gradient for bin 1 and counter bin 2 @@ -664,7 +664,7 @@ R""() // thread 8 - 15 read counters stored by thread 0 - 7 // thread 24- 31 read counters stored by thread 8 - 15 // thread 40- 47 read counters stored by thread 16- 23, etc - stat_val = const_hessian * + stat_val = const_hessian * cnt_hist[((ltid - DWORD_FEATURES) >> (LOG2_DWORD_FEATURES + 1)) * DWORD_FEATURES + (ltid & DWORD_FEATURES_MASK)]; } else { @@ -688,12 +688,12 @@ R""() h_f0_bin1 h_f1_bin1 h_f2_bin1 h_f3_bin1 h_f4_bin1 h_f5_bin1 h_f6_bin1 h_f7_bin1 ... ... - g_f0_bin16 g_f1_bin16 g_f2_bin16 g_f3_bin16 g_f4_bin16 g_f5_bin16 g_f6_bin16 g_f7_bin16 - h_f0_bin16 h_f1_bin16 h_f2_bin16 h_f3_bin16 h_f4_bin16 h_f5_bin16 h_f6_bin16 h_f7_bin16 + g_f0_bin16 g_f1_bin16 g_f2_bin16 g_f3_bin16 g_f4_bin16 g_f5_bin16 g_f6_bin16 g_f7_bin16 + h_f0_bin16 h_f1_bin16 h_f2_bin16 h_f3_bin16 h_f4_bin16 h_f5_bin16 h_f6_bin16 h_f7_bin16 c_f0_bin0 c_f1_bin0 c_f2_bin0 c_f3_bin0 c_f4_bin0 c_f5_bin0 c_f6_bin0 c_f7_bin0 c_f0_bin1 c_f1_bin1 c_f2_bin1 c_f3_bin1 c_f4_bin1 c_f5_bin1 c_f6_bin1 c_f7_bin1 ... - c_f0_bin16 c_f1_bin16 c_f2_bin16 c_f3_bin16 c_f4_bin16 c_f5_bin16 c_f6_bin16 c_f7_bin16 + c_f0_bin16 c_f1_bin16 c_f2_bin16 c_f3_bin16 c_f4_bin16 c_f5_bin16 c_f6_bin16 c_f7_bin16 */ // if there is only one workgroup processing this feature4, don't even need to write uint feature4_id = (group_id >> POWER_FEATURE_WORKGROUPS); @@ -704,7 +704,7 @@ R""() output[0 * DWORD_FEATURES * NUM_BINS + ltid] = stat_val; barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); mem_fence(CLK_GLOBAL_MEM_FENCE); - // To avoid the cost of an extra reducing kernel, we have to deal with some + // To avoid the cost of an extra reducing kernel, we have to deal with some // gray area in OpenCL. We want the last work group that process this feature to // make the final reduction, and other threads will just quit. // This requires that the results written by other workgroups available to the @@ -750,13 +750,13 @@ R""() #endif // locate our feature4's block in output memory uint output_offset = (feature4_id << POWER_FEATURE_WORKGROUPS); - __global acc_type const * restrict feature4_subhists = + __global acc_type const * restrict feature4_subhists = (__global acc_type *)output_buf + output_offset * DWORD_FEATURES * 2 * NUM_BINS; // skip reading the data already in local memory uint skip_id = group_id ^ output_offset; // locate output histogram location for this feature4 __global acc_type* restrict hist_buf = hist_buf_base + feature4_id * DWORD_FEATURES * 2 * NUM_BINS; - within_kernel_reduction16x8(feature_mask, feature4_subhists, skip_id, stat_val, + within_kernel_reduction16x8(feature_mask, feature4_subhists, skip_id, stat_val, 1 << POWER_FEATURE_WORKGROUPS, hist_buf, (__local acc_type *)shared_array); } } diff --git a/src/treelearner/ocl/histogram256.cl b/src/treelearner/ocl/histogram256.cl index 09d155520d52..3351f9efa7c3 100644 --- a/src/treelearner/ocl/histogram256.cl +++ b/src/treelearner/ocl/histogram256.cl @@ -47,12 +47,12 @@ typedef uint acc_int_type; #endif #define LOCAL_MEM_SIZE (4 * (sizeof(uint) + 2 * sizeof(acc_type)) * NUM_BINS) -// unroll the atomic operation for a few times. Takes more code space, +// unroll the atomic operation for a few times. Takes more code space, // but compiler can generate better code for faster atomics. #define UNROLL_ATOMIC 1 // Options passed by compiler at run time: -// IGNORE_INDICES will be set when the kernel does not +// IGNORE_INDICES will be set when the kernel does not // #define IGNORE_INDICES // #define POWER_FEATURE_WORKGROUPS 10 @@ -137,7 +137,7 @@ R""() // this function will be called by histogram256 // we have one sub-histogram of one feature in local memory, and need to read others void within_kernel_reduction256x4(uchar4 feature_mask, - __global const acc_type* restrict feature4_sub_hist, + __global const acc_type* restrict feature4_sub_hist, const uint skip_id, const uint old_val_f0_cont_bin0, const ushort num_sub_hist, @@ -314,12 +314,12 @@ R""() */ __attribute__((reqd_work_group_size(LOCAL_SIZE_0, 1, 1))) #if USE_CONSTANT_BUF == 1 -__kernel void histogram256(__global const uchar4* restrict feature_data_base, +__kernel void histogram256(__global const uchar4* restrict feature_data_base, __constant const uchar4* restrict feature_masks __attribute__((max_constant_size(65536))), const data_size_t feature_size, - __constant const data_size_t* restrict data_indices __attribute__((max_constant_size(65536))), - const data_size_t num_data, - __constant const score_t* restrict ordered_gradients __attribute__((max_constant_size(65536))), + __constant const data_size_t* restrict data_indices __attribute__((max_constant_size(65536))), + const data_size_t num_data, + __constant const score_t* restrict ordered_gradients __attribute__((max_constant_size(65536))), #if CONST_HESSIAN == 0 __constant const score_t* restrict ordered_hessians __attribute__((max_constant_size(65536))), #else @@ -329,18 +329,18 @@ __kernel void histogram256(__global const uchar4* restrict feature_data_base, __global volatile int * sync_counters, __global acc_type* restrict hist_buf_base) { #else -__kernel void histogram256(__global const uchar4* feature_data_base, +__kernel void histogram256(__global const uchar4* feature_data_base, __constant const uchar4* restrict feature_masks __attribute__((max_constant_size(65536))), const data_size_t feature_size, - __global const data_size_t* data_indices, - const data_size_t num_data, - __global const score_t* ordered_gradients, + __global const data_size_t* data_indices, + const data_size_t num_data, + __global const score_t* ordered_gradients, #if CONST_HESSIAN == 0 __global const score_t* ordered_hessians, #else const score_t const_hessian, #endif - __global char* restrict output_buf, + __global char* restrict output_buf, __global volatile int * sync_counters, __global acc_type* restrict hist_buf_base) { #endif @@ -363,20 +363,20 @@ __kernel void histogram256(__global const uchar4* feature_data_base, // gradient/hessian histograms // assume this starts at 32 * 4 = 128-byte boundary // total size: 2 * 4 * 256 * size_of(float) = 8 KB - // organization: each feature/grad/hessian is at a different bank, + // organization: each feature/grad/hessian is at a different bank, // as independent of the feature value as possible __local acc_type * gh_hist = (__local acc_type *)shared_array; // counter histogram // total size: 4 * 256 * size_of(uint) = 4 KB #if CONST_HESSIAN == 1 __local uint * cnt_hist = (__local uint *)(gh_hist + 2 * 4 * NUM_BINS); - #endif + #endif // thread 0, 1, 2, 3 compute histograms for gradients first // thread 4, 5, 6, 7 compute histograms for Hessians first // etc. uchar is_hessian_first = (ltid >> 2) & 1; - + ushort group_feature = group_id >> POWER_FEATURE_WORKGROUPS; // each 2^POWER_FEATURE_WORKGROUPS workgroups process on one feature (compile-time constant) // feature_size is the number of examples per feature @@ -725,7 +725,7 @@ R""() } barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); mem_fence(CLK_GLOBAL_MEM_FENCE); - // To avoid the cost of an extra reducing kernel, we have to deal with some + // To avoid the cost of an extra reducing kernel, we have to deal with some // gray area in OpenCL. We want the last work group that process this feature to // make the final reduction, and other threads will just quit. // This requires that the results written by other workgroups available to the @@ -773,15 +773,15 @@ R""() #endif // locate our feature4's block in output memory uint output_offset = (feature4_id << POWER_FEATURE_WORKGROUPS); - __global acc_type const * restrict feature4_subhists = + __global acc_type const * restrict feature4_subhists = (__global acc_type *)output_buf + output_offset * 4 * 2 * NUM_BINS; // skip reading the data already in local memory uint skip_id = group_id ^ output_offset; // locate output histogram location for this feature4 __global acc_type* restrict hist_buf = hist_buf_base + feature4_id * 4 * 2 * NUM_BINS; - within_kernel_reduction256x4(feature_mask, feature4_subhists, skip_id, old_val, 1 << POWER_FEATURE_WORKGROUPS, + within_kernel_reduction256x4(feature_mask, feature4_subhists, skip_id, old_val, 1 << POWER_FEATURE_WORKGROUPS, hist_buf, (__local acc_type *)shared_array); - // if (ltid == 0) + // if (ltid == 0) // printf("workgroup %d reduction done, %g %g %g %g %g %g %g %g\n", group_id, hist_buf[0], hist_buf[3*NUM_BINS], hist_buf[2*3*NUM_BINS], hist_buf[3*3*NUM_BINS], hist_buf[1], hist_buf[3*NUM_BINS+1], hist_buf[2*3*NUM_BINS+1], hist_buf[3*3*NUM_BINS+1]); } } diff --git a/src/treelearner/ocl/histogram64.cl b/src/treelearner/ocl/histogram64.cl index c156db6d91a2..48fa8c506d8b 100644 --- a/src/treelearner/ocl/histogram64.cl +++ b/src/treelearner/ocl/histogram64.cl @@ -65,12 +65,12 @@ typedef uint acc_int_type; // local memory size in bytes #define LOCAL_MEM_SIZE (4 * (sizeof(uint) + 2 * sizeof(acc_type)) * NUM_BINS * NUM_BANKS) -// unroll the atomic operation for a few times. Takes more code space, +// unroll the atomic operation for a few times. Takes more code space, // but compiler can generate better code for faster atomics. #define UNROLL_ATOMIC 1 // Options passed by compiler at run time: -// IGNORE_INDICES will be set when the kernel does not +// IGNORE_INDICES will be set when the kernel does not // #define IGNORE_INDICES // #define POWER_FEATURE_WORKGROUPS 10 @@ -155,7 +155,7 @@ R""() // this function will be called by histogram64 // we have one sub-histogram of one feature in registers, and need to read others void within_kernel_reduction64x4(uchar4 feature_mask, - __global const acc_type* restrict feature4_sub_hist, + __global const acc_type* restrict feature4_sub_hist, const uint skip_id, acc_type g_val, acc_type h_val, const ushort num_sub_hist, @@ -166,7 +166,7 @@ void within_kernel_reduction64x4(uchar4 feature_mask, ushort feature_id = ltid & 3; // range 0 - 4 const ushort bin_id = ltid >> 2; // range 0 - 63W ushort i; - #if POWER_FEATURE_WORKGROUPS != 0 + #if POWER_FEATURE_WORKGROUPS != 0 // if there is only 1 work group, no need to do the reduction // add all sub-histograms for 4 features __global const acc_type* restrict p = feature4_sub_hist + ltid; @@ -212,12 +212,12 @@ R""() */ __attribute__((reqd_work_group_size(LOCAL_SIZE_0, 1, 1))) #if USE_CONSTANT_BUF == 1 -__kernel void histogram64(__global const uchar4* restrict feature_data_base, +__kernel void histogram64(__global const uchar4* restrict feature_data_base, __constant const uchar4* restrict feature_masks __attribute__((max_constant_size(65536))), const data_size_t feature_size, - __constant const data_size_t* restrict data_indices __attribute__((max_constant_size(65536))), - const data_size_t num_data, - __constant const score_t* restrict ordered_gradients __attribute__((max_constant_size(65536))), + __constant const data_size_t* restrict data_indices __attribute__((max_constant_size(65536))), + const data_size_t num_data, + __constant const score_t* restrict ordered_gradients __attribute__((max_constant_size(65536))), #if CONST_HESSIAN == 0 __constant const score_t* restrict ordered_hessians __attribute__((max_constant_size(65536))), #else @@ -227,18 +227,18 @@ __kernel void histogram64(__global const uchar4* restrict feature_data_base, __global volatile int * sync_counters, __global acc_type* restrict hist_buf_base) { #else -__kernel void histogram64(__global const uchar4* feature_data_base, +__kernel void histogram64(__global const uchar4* feature_data_base, __constant const uchar4* restrict feature_masks __attribute__((max_constant_size(65536))), const data_size_t feature_size, - __global const data_size_t* data_indices, - const data_size_t num_data, - __global const score_t* ordered_gradients, + __global const data_size_t* data_indices, + const data_size_t num_data, + __global const score_t* ordered_gradients, #if CONST_HESSIAN == 0 __global const score_t* ordered_hessians, #else const score_t const_hessian, #endif - __global char* restrict output_buf, + __global char* restrict output_buf, __global volatile int * sync_counters, __global acc_type* restrict hist_buf_base) { #endif @@ -313,7 +313,7 @@ __kernel void histogram64(__global const uchar4* feature_data_base, uchar is_hessian_first = (ltid >> 2) & 1; // thread 0-7 write result to bank0, 8-15 to bank1, 16-23 to bank2, 24-31 to bank3 ushort bank = (ltid >> 3) & BANK_MASK; - + ushort group_feature = group_id >> POWER_FEATURE_WORKGROUPS; // each 2^POWER_FEATURE_WORKGROUPS workgroups process on one feature (compile-time constant) // feature_size is the number of examples per feature @@ -582,7 +582,7 @@ R""() atomic_local_add_f(gh_hist + addr2, s0_stat2); #endif barrier(CLK_LOCAL_MEM_FENCE); - + /* Makes MSVC happy with long string literal )"" R""() @@ -591,7 +591,7 @@ R""() // restore feature_mask feature_mask = feature_masks[group_feature]; #endif - + // now reduce the 4 banks of subhistograms into 1 /* memory layout of gh_hist: ----------------------------------------------------------------------------------------------- @@ -680,7 +680,7 @@ R""() output[1 * 4 * NUM_BINS + ltid] = h_val; barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); mem_fence(CLK_GLOBAL_MEM_FENCE); - // To avoid the cost of an extra reducing kernel, we have to deal with some + // To avoid the cost of an extra reducing kernel, we have to deal with some // gray area in OpenCL. We want the last work group that process this feature to // make the final reduction, and other threads will just quit. // This requires that the results written by other workgroups available to the @@ -726,13 +726,13 @@ R""() #endif // locate our feature4's block in output memory uint output_offset = (feature4_id << POWER_FEATURE_WORKGROUPS); - __global acc_type const * restrict feature4_subhists = + __global acc_type const * restrict feature4_subhists = (__global acc_type *)output_buf + output_offset * 4 * 2 * NUM_BINS; // skip reading the data already in local memory uint skip_id = group_id ^ output_offset; // locate output histogram location for this feature4 __global acc_type* restrict hist_buf = hist_buf_base + feature4_id * 4 * 2 * NUM_BINS; - within_kernel_reduction64x4(feature_mask, feature4_subhists, skip_id, g_val, h_val, + within_kernel_reduction64x4(feature_mask, feature4_subhists, skip_id, g_val, h_val, 1 << POWER_FEATURE_WORKGROUPS, hist_buf, (__local acc_type *)shared_array); } } diff --git a/swig/StringArray.i b/swig/StringArray.i index b179b02dff31..2665de5f207b 100644 --- a/swig/StringArray.i +++ b/swig/StringArray.i @@ -6,7 +6,7 @@ */ /** * This wraps the StringArray.hpp class for SWIG usage, - * adding the basic C-style wrappers needed to make it + * adding the basic C-style wrappers needed to make it * usable for the users of the low-level lightgbmJNI API. */ @@ -23,7 +23,7 @@ /** * @brief Creates a new StringArray and returns its handle. - * + * * @param num_strings number of strings to store. * @param string_size the maximum number of characters that can be stored in each string. * @return StringArrayHandle or nullptr in case of allocation failure. @@ -38,7 +38,7 @@ /** * @brief Free the StringArray object. - * + * * @param handle StringArray handle. */ void StringArrayHandle_free(StringArrayHandle handle) @@ -49,7 +49,7 @@ /** * @brief Return the raw pointer to the array of strings. * Wrapped in Java into String[] automatically. - * + * * @param handle StringArray handle. * @return Raw pointer to the string array which `various.i` maps to String[]. */ @@ -60,7 +60,7 @@ /** * For the end user to extract a specific string from the StringArray object. - * + * * @param handle StringArray handle. * @param index index of the string to retrieve from the array. * @return raw pointer to string at index, or nullptr if out of bounds. @@ -72,7 +72,7 @@ /** * @brief Replaces one string of the array at index with the new content. - * + * * @param handle StringArray handle. * @param index Index of the string to replace * @param new_content The content to replace @@ -85,7 +85,7 @@ /** * @brief Retrieve the number of strings in the StringArray. - * + * * @param handle StringArray handle. * @return number of strings that the array stores. */ diff --git a/swig/pointer_manipulation.i b/swig/pointer_manipulation.i index de0bddd42f8e..d868f92bfe15 100644 --- a/swig/pointer_manipulation.i +++ b/swig/pointer_manipulation.i @@ -6,11 +6,11 @@ * This SWIG interface extension provides support to * the pointer manipulation methods present in the standard * SWIG wrappers, but with support for larger arrays. - * + * * SWIG provides this in https://github.com/swig/swig/blob/master/Lib/carrays.i * but the standard methods only provide arrays with up to * max(int32_t) elements. - * + * * The `long_array_functions` wrappers extend this * to arrays of size max(int64_t) instead of max(int32_t). */ @@ -103,7 +103,7 @@ void delete_##NAME(TYPE *ary); TYPE NAME##_getitem(TYPE *ary, int64_t index); void NAME##_setitem(TYPE *ary, int64_t index, TYPE value); -%enddef +%enddef /* Custom template for arrays of pointers */ %define %ptr_array_functions(TYPE,NAME) diff --git a/tests/cpp_tests/testutils.cpp b/tests/cpp_tests/testutils.cpp index 84acfe5b98a3..8d8b7a66164a 100644 --- a/tests/cpp_tests/testutils.cpp +++ b/tests/cpp_tests/testutils.cpp @@ -277,7 +277,7 @@ namespace LightGBM { /*! * Pushes data from 1 thread into a Dataset based on thread_id and nrows. * e.g. with 100 rows, thread 0 will push rows 0-49, and thread 2 will push rows 50-99. - * Note that rows are still pushed in microbatches within their range. + * Note that rows are still pushed in microbatches within their range. */ void TestUtils::PushSparseBatch(DatasetHandle dataset_handle, int32_t nrows, diff --git a/windows/LightGBM.vcxproj.filters b/windows/LightGBM.vcxproj.filters index 00cb875b1218..1bb899738213 100644 --- a/windows/LightGBM.vcxproj.filters +++ b/windows/LightGBM.vcxproj.filters @@ -351,4 +351,4 @@ src\utils - \ No newline at end of file +