Feat/1.3.0dev231205 (secretflow#1064)

* repo-sync-2023-12-05T10:29:36+0800 * update changelog. * update yacl. * Update ci images. * rm bazelistrc * Larger ci timeout. * Larger ci timeout.
zhangxingmeng · Dec 6, 2023 · 215ee40 · 215ee40
1 parent bd4a539
commit 215ee40
Show file tree

Hide file tree

Showing 45 changed files with 704 additions and 465 deletions.
diff --git a/.bazeliskrc b/.bazeliskrc
diff --git a/.bazelversion b/.bazelversion
@@ -1 +1 @@
-5.4.1
+6.4.0
diff --git a/.circleci/continue-config.yml b/.circleci/continue-config.yml
@@ -13,7 +13,7 @@ jobs:
     # Specify the execution environment. You can specify an image from Dockerhub or use one of our Convenience Images from CircleCI's Developer Hub.
     # See: https://circleci.com/docs/2.0/configuration-reference/#docker-machine-macos-windows-executor
     docker:
-      - image: registry.hub.docker.com/secretflow/ubuntu-base-ci:0.4
+      - image: registry.hub.docker.com/secretflow/ubuntu-base-ci:0.9
     resource_class: 2xlarge+
     # Add steps to the job
     # See: https://circleci.com/docs/2.0/configuration-reference/#steps
@@ -24,7 +24,7 @@ jobs:
           name: Cancel build after set time
           background: true
           command: |
-            sleep 3600
+            sleep 7200
             echo "Canceling workflow as too much time has elapsed"
             curl -X POST --header "Content-Type: application/json" "https://circleci.com/api/v2/workflow/${CIRCLE_WORKFLOW_ID}/cancel?circle-token=${BUILD_TIMER_TOKEN}"
       - checkout

diff --git a/.circleci/release-config.yml b/.circleci/release-config.yml
@@ -20,7 +20,7 @@ parameters:
 jobs:
   linux_publish:
     docker:
-      - image: registry.hub.docker.com/secretflow/release-ci:0.7
+      - image: registry.hub.docker.com/secretflow/release-ci:0.12
     resource_class: 2xlarge
     parameters:
       python_ver:

diff --git a/.circleci/testpypi-config.yml b/.circleci/testpypi-config.yml
@@ -21,7 +21,7 @@ parameters:
 jobs:
   linux_publish:
     docker:
-      - image: registry.hub.docker.com/secretflow/release-ci:0.7
+      - image: registry.hub.docker.com/secretflow/release-ci:0.12
     resource_class: 2xlarge
     parameters:
       python_ver:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 `Fixed` for any bug fixes.
 `Security` in case of vulnerabilities.
 
+
+## [1.3.0.dev231205] - 2023-12-05
+
+### Changed
+- Add feature selection in all model predict comps.
+
+### Fixed
+- Fix pvalue & more readable assert msg.
+
+
 ## [1.3.0.dev231128] - 2023-11-28
 
 ### Added

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -93,3 +93,4 @@ Protocol Buffers resides at secretflow/protos.
 ```
 
 All generated Python code resides at secretflow/spec.
+
diff --git a/WORKSPACE b/WORKSPACE
@@ -4,7 +4,7 @@ load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
 
 git_repository(
     name = "yacl",
-    commit = "3baea619ae3f67911d7f072ff7dd39bc6a00ec28",
+    commit = "8492bdf8c39444262fdb53b5fdf19a0b75f0356a",
     remote = "https://github.com/secretflow/yacl.git",
 )
 

diff --git a/docker/comp_list.json b/docker/comp_list.json
@@ -69,15 +69,6 @@
             "lowerBoundEnabled": true,
             "lowerBound": {}
           }
-        },
-        {
-          "name": "select_all_features",
-          "desc": "Select all features for binning.",
-          "type": "AT_BOOL",
-          "atomic": {
-            "isOptional": true,
-            "defaultValue": {}
-          }
         }
       ],
       "inputs": [
@@ -90,7 +81,8 @@
           "attrs": [
             {
               "name": "feature_selects",
-              "desc": "which features should be binned."
+              "desc": "which features should be binned.",
+              "colMinCntInclusive": "1"
             }
           ]
         }
@@ -218,15 +210,6 @@
             },
             "upperBoundInclusive": true
           }
-        },
-        {
-          "name": "select_all_features",
-          "desc": "Select all features for binning.",
-          "type": "AT_BOOL",
-          "atomic": {
-            "isOptional": true,
-            "defaultValue": {}
-          }
         }
       ],
       "inputs": [
@@ -245,7 +228,8 @@
             {
               "name": "label",
               "desc": "Label of input data.",
-              "colMinCntInclusive": "1"
+              "colMinCntInclusive": "1",
+              "colMaxCntInclusive": "1"
             }
           ]
         }
@@ -624,15 +608,6 @@
             "isOptional": true,
             "defaultValue": {}
           }
-        },
-        {
-          "name": "offset_col",
-          "desc": "Specify a column to use as the offset",
-          "type": "AT_STRING",
-          "atomic": {
-            "isOptional": true,
-            "defaultValue": {}
-          }
         }
       ],
       "inputs": [
@@ -1190,6 +1165,11 @@
             "sf.table.vertical_table"
           ],
           "attrs": [
+            {
+              "name": "feature_selects",
+              "desc": "which features should be used for training.",
+              "colMinCntInclusive": "1"
+            },
             {
               "name": "label",
               "desc": "Label of train dataset.",
@@ -1389,24 +1369,6 @@
             }
           }
         },
-        {
-          "name": "offset_col",
-          "desc": "Specify a column to use as the offset",
-          "type": "AT_STRING",
-          "atomic": {
-            "isOptional": true,
-            "defaultValue": {}
-          }
-        },
-        {
-          "name": "weight_col",
-          "desc": "Specify a column to use for the observation weights",
-          "type": "AT_STRING",
-          "atomic": {
-            "isOptional": true,
-            "defaultValue": {}
-          }
-        },
         {
           "name": "l2_lambda",
           "desc": "L2 regularization term",
@@ -1420,6 +1382,15 @@
             "lowerBound": {},
             "lowerBoundInclusive": true
           }
+        },
+        {
+          "name": "report_weights",
+          "desc": "If this option is set to true, model will be revealed and model details are visible to all parties",
+          "type": "AT_BOOL",
+          "atomic": {
+            "isOptional": true,
+            "defaultValue": {}
+          }
         }
       ],
       "inputs": [
@@ -1430,6 +1401,21 @@
             "sf.table.vertical_table"
           ],
           "attrs": [
+            {
+              "name": "feature_selects",
+              "desc": "which features should be used for training.",
+              "colMinCntInclusive": "1"
+            },
+            {
+              "name": "offset",
+              "desc": "Specify a column to use as the offset",
+              "colMaxCntInclusive": "1"
+            },
+            {
+              "name": "weight",
+              "desc": "Specify a column to use for the observation weights",
+              "colMaxCntInclusive": "1"
+            },
             {
               "name": "label",
               "desc": "Label of train dataset.",
@@ -1446,6 +1432,13 @@
           "types": [
             "sf.model.ss_glm"
           ]
+        },
+        {
+          "name": "report",
+          "desc": "If report_weights is true, report model details",
+          "types": [
+            "sf.report"
+          ]
         }
       ]
     },
@@ -1591,6 +1584,11 @@
             "sf.table.vertical_table"
           ],
           "attrs": [
+            {
+              "name": "feature_selects",
+              "desc": "which features should be used for training.",
+              "colMinCntInclusive": "1"
+            },
             {
               "name": "label",
               "desc": "Label of train dataset.",
@@ -1796,6 +1794,11 @@
             "sf.table.vertical_table"
           ],
           "attrs": [
+            {
+              "name": "feature_selects",
+              "desc": "which features should be used for training.",
+              "colMinCntInclusive": "1"
+            },
             {
               "name": "label",
               "desc": "Label of train dataset.",

diff --git a/docker/translation.json b/docker/translation.json
@@ -24,11 +24,9 @@
     "How to bin features with numeric types: \"quantile\"(equal frequency)/\"eq_range\"(equal range)": "如何对特征进行分箱：“quantile”（等频）/“eq_range”（等距）",
     "bin_num": "bin_num",
     "Max bin counts for one features.": "一个特征的最大分箱数",
-    "select_all_features": "选择所有特征",
-    "Select all features for binning.": "使用所有特征进行分箱",
     "input_data": "输入数据集",
     "Input vertical table.": "输入垂直表",
-    "feature_selects": "选择特征",
+    "feature_selects": "特征列",
     "which features should be binned.": "应对哪些特征进行分箱",
     "bin_rule": "分箱规则",
     "Output bin rule.": "输出分箱规则"
@@ -52,11 +50,9 @@
     "Stop merging if remaining bin counts is less than or equal to this value.": "在 ChiMerge 中如果剩余箱计数小于或等于此值，则停止合并",
     "chimerge_target_pvalue": "chimerge目标 p-value 值",
     "Stop merging if biggest pvalue of remaining bins is greater than this value.": "在 ChiMerge 中如果剩余分箱的最大 p-value 大于此值，则停止合并",
-    "select_all_features": "select_all_features",
-    "Select all features for binning.": "选择要装箱的所有功能。",
     "input_data": "输入数据集",
     "Input vertical table.": "输入联合表",
-    "feature_selects": "选择特征",
+    "feature_selects": "特征列",
     "which features should be binned.": "应对哪些特征进行分箱",
     "label": "标签",
     "Label of input data.": "输入数据的标签",
@@ -161,8 +157,6 @@
     "Whether to save ids columns into output prediction table. If true, input feature_dataset must contain id columns, and receiver party must be id owner.": "是否将 id 列保存到输出预测表中；如果为 true，则输入feature_dataset必须包含 id 列，并且接收方必须是 id 所有者",
     "save_label": "保存标签列",
     "Whether or not to save real label columns into output pred file. If true, input feature_dataset must contain label columns and receiver party must be label owner.": "是否将真实的标签列保存到输出 pred 文件中；如果为 true，则输入feature_dataset必须包含标签列，并且接收方必须是标签所有者",
-    "offset_col": "样本偏移列",
-    "Specify a column to use as the offset": "指定要用作偏移量的列",
     "model": "模型",
     "Input model.": "输入模型",
     "feature_dataset": "特征数据集",
@@ -265,6 +259,8 @@
     "How to grow tree?": "如何生长树",
     "train_dataset": "训练数据集",
     "Input vertical table.": "输入联合表",
+    "feature_selects": "特征列",
+    "which features should be used for training.": "哪些特征应该用于训练",
     "label": "标签",
     "Label of train dataset.": "训练数据集的标签",
     "output_model": "输出模型",
@@ -299,18 +295,24 @@
     "decay learning rate": "衰减学习率",
     "optimizer": "优化器",
     "which optimizer to use: IRLS(Iteratively Reweighted Least Squares) or SGD(Stochastic Gradient Descent)": "使用哪个优化器：IRLS（迭代加权最小二乘法）或SGD（随机梯度下降法）",
-    "offset_col": "偏移列",
-    "Specify a column to use as the offset": "指定要用作偏移量的列",
-    "weight_col": "权重列",
-    "Specify a column to use for the observation weights": "指定用于观测权重的列",
     "l2_lambda": "l2_lambda",
     "L2 regularization term": "L2正则系数",
+    "report_weights": "模型报告",
+    "If this option is set to true, model will be revealed and model details are visible to all parties": "如果此选项设置为true，模型会被转换到明文，并且模型的详细信息对各方都可见",
     "train_dataset": "训练数据集",
     "Input vertical table.": "输入联合表",
-    "label": "标签",
+    "feature_selects": "特征列",
+    "which features should be used for training.": "哪些特征应该用于训练",
+    "offset": "偏移列",
+    "Specify a column to use as the offset": "指定要用作偏移量的列",
+    "weight": "权重列",
+    "Specify a column to use for the observation weights": "指定用于观测权重的列",
+    "label": "标签列",
     "Label of train dataset.": "训练数据集的标签",
     "output_model": "输出模型",
-    "Output model.": "输出模型"
+    "Output model.": "输出模型",
+    "report": "报告",
+    "If report_weights is true, report model details": "如果report_weights为true，则报告模型详细信息"
   },
   "ml.train/ss_sgd_train:0.0.1": {
     "ml.train": "模型训练",
@@ -335,6 +337,8 @@
     "If the change rate of weights is less than this threshold, the model is considered to be converged, and the training stops early. 0 to disable.": "如果权重的变化率小于此阈值，则认为模型已收敛，训练提前停止；0 表示禁用",
     "train_dataset": "训练数据集",
     "Input vertical table.": "输入联合表",
+    "feature_selects": "特征列",
+    "which features should be used for training.": "哪些特征应该用于训练",
     "label": "标签",
     "Label of train dataset.": "训练数据集的标签",
     "output_model": "输出模型",
@@ -367,6 +371,8 @@
     "Pseudorandom number generator seed.": "伪随机数生成器种子",
     "train_dataset": "训练数据集",
     "Input vertical table.": "输入联合表",
+    "feature_selects": "特征列",
+    "which features should be used for training.": "哪些特征应该用于训练",
     "label": "标签",
     "Label of train dataset.": "训练数据集的标签",
     "output_model": "输出模型",