From 4686391a6ce004eef529536186c038888d1c8a0f Mon Sep 17 00:00:00 2001 From: yangxudong Date: Fri, 20 Dec 2024 14:52:39 +0800 Subject: [PATCH 01/16] fix bug of autoint model demo config --- easy_rec/python/inference/predictor.py | 2 +- examples/configs/autoint_on_movielens.config | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/easy_rec/python/inference/predictor.py b/easy_rec/python/inference/predictor.py index 93220f047..fa0853ea5 100644 --- a/easy_rec/python/inference/predictor.py +++ b/easy_rec/python/inference/predictor.py @@ -534,7 +534,7 @@ def _parse_value(all_vals): ] for k in self._reserved_cols: if k in all_vals and all_vals[k].dtype == np.object: - all_vals[k] = [val.decode('utf-8') for val in all_vals[k]] + all_vals[k] = [val.decode('utf-8', errors='ignore') for val in all_vals[k]] ts2 = time.time() reserve_vals = self._get_reserve_vals(self._reserved_cols, diff --git a/examples/configs/autoint_on_movielens.config b/examples/configs/autoint_on_movielens.config index f74918899..35f0009ae 100644 --- a/examples/configs/autoint_on_movielens.config +++ b/examples/configs/autoint_on_movielens.config @@ -3,7 +3,7 @@ eval_input_path: "examples/data/movielens_1m/movies_test_data" model_dir: "examples/ckpt/autoint_on_movieslen_ckpt" train_config { - log_step_count_steps: 100 + log_step_count_steps: 1000 optimizer_config: { adam_optimizer: { learning_rate: { @@ -157,5 +157,5 @@ model_config: { embedding_regularization: 1e-4 } export_config { - multi_placeholder: false + multi_placeholder: true } From a23298b375e4a65235ddc187f316bb80c27b873f Mon Sep 17 00:00:00 2001 From: yangxudong Date: Wed, 25 Dec 2024 13:14:19 +0800 Subject: [PATCH 02/16] fix bug of wrong demo model configs --- examples/configs/autoint_on_movielens.config | 4 ++-- examples/configs/dcn_backbone_on_movielens.config | 5 +---- examples/configs/dcn_on_movielens.config | 3 --- examples/configs/deepfm_backbone_on_movielens.config | 3 --- examples/configs/deepfm_on_movielens.config | 3 --- examples/configs/dssm_on_books.config | 3 --- examples/configs/dssm_on_books_negative_sample.config | 3 --- examples/configs/dssm_senet_on_taobao.config | 3 --- examples/configs/fibinet_on_movielens.config | 3 --- examples/configs/masknet_on_movielens.config | 3 --- examples/configs/mind_on_books.config | 3 --- examples/configs/mind_on_books_negative_sample.config | 3 --- 12 files changed, 3 insertions(+), 36 deletions(-) diff --git a/examples/configs/autoint_on_movielens.config b/examples/configs/autoint_on_movielens.config index 35f0009ae..cbf43729f 100644 --- a/examples/configs/autoint_on_movielens.config +++ b/examples/configs/autoint_on_movielens.config @@ -3,7 +3,7 @@ eval_input_path: "examples/data/movielens_1m/movies_test_data" model_dir: "examples/ckpt/autoint_on_movieslen_ckpt" train_config { - log_step_count_steps: 1000 + log_step_count_steps: 100 optimizer_config: { adam_optimizer: { learning_rate: { @@ -17,7 +17,7 @@ train_config { } use_moving_average: false } - save_checkpoints_steps: 100 + save_checkpoints_steps: 1000 sync_replicas: True num_steps: 2500 } diff --git a/examples/configs/dcn_backbone_on_movielens.config b/examples/configs/dcn_backbone_on_movielens.config index dbffb76b7..7be038dbf 100644 --- a/examples/configs/dcn_backbone_on_movielens.config +++ b/examples/configs/dcn_backbone_on_movielens.config @@ -1,6 +1,6 @@ train_input_path: "examples/data/movielens_1m/movies_train_data" eval_input_path: "examples/data/movielens_1m/movies_test_data" -model_dir: "examples/ckpt/dcn_on_movieslen" +model_dir: "examples/ckpt/dcn_backbone_on_movieslen" train_config { log_step_count_steps: 100 @@ -199,6 +199,3 @@ model_config: { } embedding_regularization: 1e-4 } -export_config { - multi_placeholder: false -} diff --git a/examples/configs/dcn_on_movielens.config b/examples/configs/dcn_on_movielens.config index 4556fef88..09110c81d 100644 --- a/examples/configs/dcn_on_movielens.config +++ b/examples/configs/dcn_on_movielens.config @@ -180,6 +180,3 @@ model_config: { } embedding_regularization: 1e-4 } -export_config { - multi_placeholder: false -} diff --git a/examples/configs/deepfm_backbone_on_movielens.config b/examples/configs/deepfm_backbone_on_movielens.config index 56f210b10..5e6ea9b8d 100644 --- a/examples/configs/deepfm_backbone_on_movielens.config +++ b/examples/configs/deepfm_backbone_on_movielens.config @@ -241,6 +241,3 @@ model_config: { } embedding_regularization: 1e-4 } -export_config { - multi_placeholder: false -} diff --git a/examples/configs/deepfm_on_movielens.config b/examples/configs/deepfm_on_movielens.config index f87b7c894..a49a1988c 100644 --- a/examples/configs/deepfm_on_movielens.config +++ b/examples/configs/deepfm_on_movielens.config @@ -182,6 +182,3 @@ model_config: { } embedding_regularization: 1e-4 } -export_config { - multi_placeholder: false -} diff --git a/examples/configs/dssm_on_books.config b/examples/configs/dssm_on_books.config index 326e7432f..eebcdb295 100644 --- a/examples/configs/dssm_on_books.config +++ b/examples/configs/dssm_on_books.config @@ -112,6 +112,3 @@ model_config:{ } embedding_regularization: 5e-5 } - -export_config { -} diff --git a/examples/configs/dssm_on_books_negative_sample.config b/examples/configs/dssm_on_books_negative_sample.config index 069ebb4a5..8e3fe87f1 100644 --- a/examples/configs/dssm_on_books_negative_sample.config +++ b/examples/configs/dssm_on_books_negative_sample.config @@ -127,6 +127,3 @@ model_config:{ loss_type: SOFTMAX_CROSS_ENTROPY embedding_regularization: 5e-6 } - -export_config { -} diff --git a/examples/configs/dssm_senet_on_taobao.config b/examples/configs/dssm_senet_on_taobao.config index f8f415d1a..7b8e0da1c 100644 --- a/examples/configs/dssm_senet_on_taobao.config +++ b/examples/configs/dssm_senet_on_taobao.config @@ -281,6 +281,3 @@ model_config:{ } embedding_regularization: 5e-5 } - -export_config { -} diff --git a/examples/configs/fibinet_on_movielens.config b/examples/configs/fibinet_on_movielens.config index 5b0feb072..b4ecaf613 100644 --- a/examples/configs/fibinet_on_movielens.config +++ b/examples/configs/fibinet_on_movielens.config @@ -200,6 +200,3 @@ model_config: { } embedding_regularization: 1e-4 } -export_config { - multi_placeholder: false -} diff --git a/examples/configs/masknet_on_movielens.config b/examples/configs/masknet_on_movielens.config index 4dd54c914..04205ddd5 100644 --- a/examples/configs/masknet_on_movielens.config +++ b/examples/configs/masknet_on_movielens.config @@ -196,6 +196,3 @@ model_config: { } embedding_regularization: 1e-4 } -export_config { - multi_placeholder: false -} diff --git a/examples/configs/mind_on_books.config b/examples/configs/mind_on_books.config index 4b50d04b4..d19eb5d96 100644 --- a/examples/configs/mind_on_books.config +++ b/examples/configs/mind_on_books.config @@ -111,6 +111,3 @@ model_config:{ } embedding_regularization: 5e-5 } - -export_config { -} diff --git a/examples/configs/mind_on_books_negative_sample.config b/examples/configs/mind_on_books_negative_sample.config index 3816e9f7e..6058e6a2f 100644 --- a/examples/configs/mind_on_books_negative_sample.config +++ b/examples/configs/mind_on_books_negative_sample.config @@ -128,6 +128,3 @@ model_config:{ embedding_regularization: 5e-5 loss_type: SOFTMAX_CROSS_ENTROPY } - -export_config { -} From 7f5ddad1707c04c9925e0547c8ad347086efdf77 Mon Sep 17 00:00:00 2001 From: yangxudong Date: Tue, 31 Dec 2024 16:19:31 +0800 Subject: [PATCH 03/16] fix bug of undefined flags of easyrec tools run with DeepRec --- .../python/tools/add_boundaries_to_config.py | 3 +++ .../tools/add_feature_info_to_config.py | 3 +++ easy_rec/python/tools/faiss_index_pai.py | 3 +++ easy_rec/python/tools/feature_selection.py | 3 +++ easy_rec/python/tools/hit_rate_ds.py | 3 +++ easy_rec/python/tools/hit_rate_pai.py | 3 +++ easy_rec/python/tools/pre_check.py | 3 +++ easy_rec/python/tools/split_model_pai.py | 3 +++ easy_rec/python/tools/split_pdn_model_pai.py | 3 +++ easy_rec/python/utils/io_util.py | 21 +++++++++++++++++++ 10 files changed, 48 insertions(+) diff --git a/easy_rec/python/tools/add_boundaries_to_config.py b/easy_rec/python/tools/add_boundaries_to_config.py index 09d2d9a1d..18d5f6037 100644 --- a/easy_rec/python/tools/add_boundaries_to_config.py +++ b/easy_rec/python/tools/add_boundaries_to_config.py @@ -3,11 +3,13 @@ import json import logging import os +import sys import common_io import tensorflow as tf from easy_rec.python.utils import config_util +from easy_rec.python.utils import io_util if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -61,4 +63,5 @@ def main(argv): if __name__ == '__main__': + sys.argv = io_util.filter_unknown_args(FLAGS, sys.argv) tf.app.run() diff --git a/easy_rec/python/tools/add_feature_info_to_config.py b/easy_rec/python/tools/add_feature_info_to_config.py index b11cfc0a7..7594d038b 100644 --- a/easy_rec/python/tools/add_feature_info_to_config.py +++ b/easy_rec/python/tools/add_feature_info_to_config.py @@ -3,10 +3,12 @@ import json import logging import os +import sys import tensorflow as tf from easy_rec.python.utils import config_util +from easy_rec.python.utils import io_util from easy_rec.python.utils.hive_utils import HiveUtils if tf.__version__ >= '2.0': @@ -139,4 +141,5 @@ def main(argv): if __name__ == '__main__': + sys.argv = io_util.filter_unknown_args(FLAGS, sys.argv) tf.app.run() diff --git a/easy_rec/python/tools/faiss_index_pai.py b/easy_rec/python/tools/faiss_index_pai.py index 718382733..b7eb66bc0 100644 --- a/easy_rec/python/tools/faiss_index_pai.py +++ b/easy_rec/python/tools/faiss_index_pai.py @@ -4,10 +4,12 @@ import logging import os +import sys import faiss import numpy as np import tensorflow as tf +from easy_rec.python.utils import io_util logging.basicConfig( level=logging.INFO, format='[%(asctime)s][%(levelname)s] %(message)s') @@ -109,4 +111,5 @@ def main(argv): if __name__ == '__main__': + sys.argv = io_util.filter_unknown_args(FLAGS, sys.argv) tf.app.run() diff --git a/easy_rec/python/tools/feature_selection.py b/easy_rec/python/tools/feature_selection.py index 6d9f59911..f50a00fac 100644 --- a/easy_rec/python/tools/feature_selection.py +++ b/easy_rec/python/tools/feature_selection.py @@ -3,6 +3,7 @@ import json import os +import sys from collections import OrderedDict import numpy as np @@ -11,6 +12,7 @@ from tensorflow.python.framework.meta_graph import read_meta_graph_file from easy_rec.python.utils import config_util +from easy_rec.python.utils import io_util if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -299,6 +301,7 @@ def _visualize_feature_importance(self, feature_importance, group_name): if __name__ == '__main__': + sys.argv = io_util.filter_unknown_args(FLAGS, sys.argv) if FLAGS.model_type == 'variational_dropout': fs = VariationalDropoutFS( FLAGS.config_path, diff --git a/easy_rec/python/tools/hit_rate_ds.py b/easy_rec/python/tools/hit_rate_ds.py index 552b96aad..5528e0aa2 100644 --- a/easy_rec/python/tools/hit_rate_ds.py +++ b/easy_rec/python/tools/hit_rate_ds.py @@ -20,12 +20,14 @@ import json import logging import os +import sys import graphlearn as gl import tensorflow as tf from easy_rec.python.protos.dataset_pb2 import DatasetConfig from easy_rec.python.utils import config_util +from easy_rec.python.utils import io_util from easy_rec.python.utils.config_util import process_multi_file_input_path from easy_rec.python.utils.hit_rate_utils import compute_hitrate_batch from easy_rec.python.utils.hit_rate_utils import load_graph @@ -217,4 +219,5 @@ def main(): if __name__ == '__main__': + sys.argv = io_util.filter_unknown_args(FLAGS, sys.argv) main() diff --git a/easy_rec/python/tools/hit_rate_pai.py b/easy_rec/python/tools/hit_rate_pai.py index 73c8a2095..5f97b3429 100644 --- a/easy_rec/python/tools/hit_rate_pai.py +++ b/easy_rec/python/tools/hit_rate_pai.py @@ -17,8 +17,10 @@ from __future__ import division from __future__ import print_function +import sys import tensorflow as tf +from easy_rec.python.utils import io_util from easy_rec.python.utils.hit_rate_utils import compute_hitrate_batch from easy_rec.python.utils.hit_rate_utils import load_graph from easy_rec.python.utils.hit_rate_utils import reduce_hitrate @@ -131,4 +133,5 @@ def main(): if __name__ == '__main__': + sys.argv = io_util.filter_unknown_args(FLAGS, sys.argv) main() diff --git a/easy_rec/python/tools/pre_check.py b/easy_rec/python/tools/pre_check.py index 8fcaa2caf..da7f1923b 100644 --- a/easy_rec/python/tools/pre_check.py +++ b/easy_rec/python/tools/pre_check.py @@ -3,12 +3,14 @@ import json import logging import os +import sys import tensorflow as tf from easy_rec.python.input.input import Input from easy_rec.python.utils import config_util from easy_rec.python.utils import fg_util +from easy_rec.python.utils import io_util from easy_rec.python.utils.check_utils import check_env_and_input_path from easy_rec.python.utils.check_utils import check_sequence @@ -114,4 +116,5 @@ def main(argv): if __name__ == '__main__': + sys.argv = io_util.filter_unknown_args(FLAGS, sys.argv) tf.app.run() diff --git a/easy_rec/python/tools/split_model_pai.py b/easy_rec/python/tools/split_model_pai.py index bdb2087de..cf1657deb 100644 --- a/easy_rec/python/tools/split_model_pai.py +++ b/easy_rec/python/tools/split_model_pai.py @@ -2,6 +2,7 @@ import copy import logging import os +import sys import tensorflow as tf from tensorflow.core.framework import graph_pb2 @@ -11,6 +12,7 @@ from tensorflow.python.saved_model import signature_constants from tensorflow.python.tools import saved_model_utils from tensorflow.python.training import saver as tf_saver +from easy_rec.python.utils import io_util if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -282,4 +284,5 @@ def main(argv): if __name__ == '__main__': + sys.argv = io_util.filter_unknown_args(FLAGS, sys.argv) tf.app.run() diff --git a/easy_rec/python/tools/split_pdn_model_pai.py b/easy_rec/python/tools/split_pdn_model_pai.py index e2341d57d..849250b37 100644 --- a/easy_rec/python/tools/split_pdn_model_pai.py +++ b/easy_rec/python/tools/split_pdn_model_pai.py @@ -2,6 +2,7 @@ import copy import logging import os +import sys import tensorflow as tf from tensorflow.core.framework import graph_pb2 @@ -12,6 +13,7 @@ from tensorflow.python.saved_model.utils_impl import get_variables_path from tensorflow.python.tools import saved_model_utils from tensorflow.python.training import saver as tf_saver +from easy_rec.python.utils import io_util FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_string('model_dir', '', '') @@ -265,4 +267,5 @@ def main(argv): if __name__ == '__main__': + sys.argv = io_util.filter_unknown_args(FLAGS, sys.argv) tf.app.run() diff --git a/easy_rec/python/utils/io_util.py b/easy_rec/python/utils/io_util.py index 091e10e07..431394e5e 100644 --- a/easy_rec/python/utils/io_util.py +++ b/easy_rec/python/utils/io_util.py @@ -185,3 +185,24 @@ def read_data_from_json_path(json_path): else: logging.info('json_path not exists, return None') return None + +def filter_unknown_args(flags, args): + """Filter unknown args.""" + defined_flags = set(flag.name for flag in flags._flags().values()) + logging.info('defined arguments: %s', ', '.join(defined_flags)) + logging.info('actual arguments: %s', ', '.join(args[1:])) + known_args = [args[0]] + unknown = False + for arg in args[1:]: + if arg.startswith('--'): + flag_name = arg.split('=')[0][2:] + if flag_name in defined_flags: + known_args.append(arg) + unknown = False + else: + unknown = True + logging.warning('Ignore unknown arg: %s' % arg) + elif not unknown: + known_args.append(arg) + logging.info('keep arguments: %s', ', '.join(known_args[1:])) + return known_args From 74b1edcc600e273e9446e33a8e7b9938134bb220 Mon Sep 17 00:00:00 2001 From: "weisu.yxd" Date: Fri, 3 Jan 2025 12:53:23 +0800 Subject: [PATCH 04/16] fix bug --- easy_rec/python/utils/io_util.py | 76 +++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 16 deletions(-) diff --git a/easy_rec/python/utils/io_util.py b/easy_rec/python/utils/io_util.py index 431394e5e..cfe20d4ac 100644 --- a/easy_rec/python/utils/io_util.py +++ b/easy_rec/python/utils/io_util.py @@ -186,23 +186,67 @@ def read_data_from_json_path(json_path): logging.info('json_path not exists, return None') return None + +def convert_tf_flags_to_argparse(flags): + """Convert tf.app.flags.FLAGS to argparse.ArgumentParser. + + Args: + flags: tf.app.flags.FLAGS + Returns: + argparse.ArgumentParser: configurate ArgumentParser object + """ + import argparse + import ast + parser = argparse.ArgumentParser() + + args = set() + for flag in flags._flags().values(): + flag_name = flag.name + if flag_name in args: + continue + args.add(flag_name) + default = flag.value + flag_type = type(default) + help_str = flag.help or '' + if flag_type == bool: + parser.add_argument( + '--' + flag_name, + dest=flag_name, + action='store_true' if default else 'store_false', + help=help_str) + elif flag_type == str: + if hasattr(flag, 'choices') and flag.choices: + parser.add_argument( + '--' + flag_name, + type=str, + choices=flag.choices, + default=default, + help=help_str) + else: + parser.add_argument( + '--' + flag_name, type=str, default=default, help=help_str) + elif flag_type in (list, dict): + parser.add_argument( + '--' + flag_name, + type=lambda s: ast.literal_eval(s), + default=default, + help=help_str) + else: + parser.add_argument( + '--' + flag_name, type=flag_type, default=default, help=help_str) + return parser + + def filter_unknown_args(flags, args): """Filter unknown args.""" - defined_flags = set(flag.name for flag in flags._flags().values()) - logging.info('defined arguments: %s', ', '.join(defined_flags)) - logging.info('actual arguments: %s', ', '.join(args[1:])) known_args = [args[0]] - unknown = False - for arg in args[1:]: - if arg.startswith('--'): - flag_name = arg.split('=')[0][2:] - if flag_name in defined_flags: - known_args.append(arg) - unknown = False - else: - unknown = True - logging.warning('Ignore unknown arg: %s' % arg) - elif not unknown: - known_args.append(arg) - logging.info('keep arguments: %s', ', '.join(known_args[1:])) + parser = convert_tf_flags_to_argparse(flags) + args, unknown = parser.parse_known_args(args) + if len(unknown) > 1: + logging.info('undefined arguments: %s', ', '.join(unknown[1:])) + for key, value in vars(args).items(): + if type(value) != bool and not value: + continue + known_args.append('--' + key + '=' + str(value)) + logging.info('defined arguments: %s', ', '.join(known_args[1:])) return known_args From 8ffbc29ab78a77b25ce5be5d9f0fbb9756195cad Mon Sep 17 00:00:00 2001 From: "weisu.yxd" Date: Fri, 3 Jan 2025 16:59:00 +0800 Subject: [PATCH 05/16] fix bug --- easy_rec/python/utils/io_util.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/easy_rec/python/utils/io_util.py b/easy_rec/python/utils/io_util.py index cfe20d4ac..d5a949630 100644 --- a/easy_rec/python/utils/io_util.py +++ b/easy_rec/python/utils/io_util.py @@ -199,15 +199,21 @@ def convert_tf_flags_to_argparse(flags): import ast parser = argparse.ArgumentParser() - args = set() + args = {} for flag in flags._flags().values(): flag_name = flag.name if flag_name in args: + args[flag_name][0] = True continue - args.add(flag_name) default = flag.value flag_type = type(default) help_str = flag.help or '' + args[flag_name] = [ + False, flag_type, default, help_str, + flag.choices if hasattr(flag, 'choices') else None + ] + + for flag_name, (multi, flag_type, default, help_str, choices) in args.items(): if flag_type == bool: parser.add_argument( '--' + flag_name, @@ -215,13 +221,16 @@ def convert_tf_flags_to_argparse(flags): action='store_true' if default else 'store_false', help=help_str) elif flag_type == str: - if hasattr(flag, 'choices') and flag.choices: + if choices: parser.add_argument( '--' + flag_name, type=str, - choices=flag.choices, + choices=choices, default=default, help=help_str) + elif multi: + parser.add_argument( + '--' + flag_name, type=str, default=default, help=help_str) else: parser.add_argument( '--' + flag_name, type=str, default=default, help=help_str) @@ -231,9 +240,12 @@ def convert_tf_flags_to_argparse(flags): type=lambda s: ast.literal_eval(s), default=default, help=help_str) - else: + elif flag_type in (int, float): parser.add_argument( '--' + flag_name, type=flag_type, default=default, help=help_str) + else: + parser.add_argument( + '--' + flag_name, type=str, default=default, help=help_str) return parser @@ -245,7 +257,7 @@ def filter_unknown_args(flags, args): if len(unknown) > 1: logging.info('undefined arguments: %s', ', '.join(unknown[1:])) for key, value in vars(args).items(): - if type(value) != bool and not value: + if type(value) in (list, dict) and not value: continue known_args.append('--' + key + '=' + str(value)) logging.info('defined arguments: %s', ', '.join(known_args[1:])) From 3f97db9aa40323e3b21fa6b9b6045af5ec80d703 Mon Sep 17 00:00:00 2001 From: "weisu.yxd" Date: Fri, 3 Jan 2025 17:03:34 +0800 Subject: [PATCH 06/16] fix bug --- easy_rec/python/utils/io_util.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/easy_rec/python/utils/io_util.py b/easy_rec/python/utils/io_util.py index d5a949630..5cef1d9ef 100644 --- a/easy_rec/python/utils/io_util.py +++ b/easy_rec/python/utils/io_util.py @@ -230,7 +230,11 @@ def convert_tf_flags_to_argparse(flags): help=help_str) elif multi: parser.add_argument( - '--' + flag_name, type=str, default=default, help=help_str) + '--' + flag_name, + type=str, + action='append', + default=default, + help=help_str) else: parser.add_argument( '--' + flag_name, type=str, default=default, help=help_str) From c7d7a0db51370b2b3223bf007e745e5184108dbd Mon Sep 17 00:00:00 2001 From: "weisu.yxd" Date: Fri, 3 Jan 2025 20:00:32 +0800 Subject: [PATCH 07/16] fix bug --- easy_rec/python/utils/io_util.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/easy_rec/python/utils/io_util.py b/easy_rec/python/utils/io_util.py index 5cef1d9ef..7b4666391 100644 --- a/easy_rec/python/utils/io_util.py +++ b/easy_rec/python/utils/io_util.py @@ -213,12 +213,24 @@ def convert_tf_flags_to_argparse(flags): flag.choices if hasattr(flag, 'choices') else None ] + def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + for flag_name, (multi, flag_type, default, help_str, choices) in args.items(): if flag_type == bool: parser.add_argument( '--' + flag_name, - dest=flag_name, - action='store_true' if default else 'store_false', + type=str2bool, + nargs='?', + const=True, + default=False, help=help_str) elif flag_type == str: if choices: From eed82839481ae0d89f2201ca06fd9fb757144ba5 Mon Sep 17 00:00:00 2001 From: "weisu.yxd" Date: Mon, 6 Jan 2025 09:56:40 +0800 Subject: [PATCH 08/16] fix bug --- easy_rec/python/utils/io_util.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/easy_rec/python/utils/io_util.py b/easy_rec/python/utils/io_util.py index 7b4666391..92c9c8a1f 100644 --- a/easy_rec/python/utils/io_util.py +++ b/easy_rec/python/utils/io_util.py @@ -273,6 +273,8 @@ def filter_unknown_args(flags, args): if len(unknown) > 1: logging.info('undefined arguments: %s', ', '.join(unknown[1:])) for key, value in vars(args).items(): + if value is None: + continue if type(value) in (list, dict) and not value: continue known_args.append('--' + key + '=' + str(value)) From d7a1860f9efcd6fcb3957bcfc8c027d1d6b78e95 Mon Sep 17 00:00:00 2001 From: yanzhen1233 Date: Thu, 16 Jan 2025 18:37:38 +0800 Subject: [PATCH 09/16] Build docker (#515) * Updated Docker image --- docker/Dockerfile_tf112 | 74 +++++++++++++++++++++++++++++++++++ docker/Dockerfile_tf115 | 36 +++++++++++++++++ docker/Dockerfile_tf212 | 36 +++++++++++++++++ examples/readme.md | 18 ++++----- requirements/runtime.txt | 3 +- scripts/build_docker_tf112.sh | 21 ++++++++++ scripts/build_docker_tf115.sh | 21 ++++++++++ scripts/build_docker_tf212.sh | 21 ++++++++++ 8 files changed, 220 insertions(+), 10 deletions(-) create mode 100644 docker/Dockerfile_tf112 create mode 100644 docker/Dockerfile_tf115 create mode 100644 docker/Dockerfile_tf212 create mode 100644 scripts/build_docker_tf112.sh create mode 100644 scripts/build_docker_tf115.sh create mode 100644 scripts/build_docker_tf212.sh diff --git a/docker/Dockerfile_tf112 b/docker/Dockerfile_tf112 new file mode 100644 index 000000000..9077e59e2 --- /dev/null +++ b/docker/Dockerfile_tf112 @@ -0,0 +1,74 @@ +#FROM tensorflow/tensorflow:1.12.0 +FROM my_tensorflow_base:1.12.0-py2 + +COPY docker/sources_18.04.list /etc/apt/sources.list + +# necessary for later commands to take effect +RUN md5sum /etc/apt/sources.list \ + && apt-get update \ + && apt-get install apt-utils inetutils-ping wget curl telnet vim strace libpq-dev curl libsasl2-dev gcc g++ unzip openjdk-8-jdk -y \ + && apt-get install build-essential cython -y \ + && pip install cython \ + && pip install setuptools_scm +# 检查 Cython 是否安装成功 +RUN python -c "import Cython" +RUN pip --version + +RUN mkdir /EasyRec +COPY requirements /EasyRec/requirements +COPY requirements.txt /EasyRec/ +COPY easy_rec /EasyRec/easy_rec/ +COPY setup.cfg /EasyRec/ +COPY setup.py /EasyRec/ +COPY MANIFEST.in /EasyRec/ +COPY README.md /EasyRec/ +COPY scripts /EasyRec/scripts + +RUN curl "http://easyrec.oss-cn-beijing.aliyuncs.com/tools/odpscmd_public_0.45.0.zip" -o /EasyRec/odpscmd_public.zip +RUN mkdir /usr/local/odps_clt/ && cd /usr/local/odps_clt/ && unzip /EasyRec/odpscmd_public.zip +RUN ln -s /usr/local/odps_clt/bin/odpscmd /usr/local/bin/odpscmd + +RUN pip install pystack-debugger idna kafka-python -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com +# 升级pip +RUN pip install --upgrade pip setuptools wheel + +# 安装 setuptools-rust 和 rustc +RUN pip install setuptools-rust +RUN pip install tensorflow_probability==0.5.0 +RUN apt-get update && apt-get install -y rustc +RUN apt-get update && \ + apt-get install -y rustc && \ + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y && \ + . $HOME/.cargo/env +# 安装 cryptography +RUN pip install cryptography +# 安装基础工具链与依赖项 +RUN apt-get update && \ + apt-get install -y build-essential libssl-dev libffi-dev python-dev && \ + apt-get install -y rustc cargo cmake curl + +# 设置国内的 Rust 镜像源 +RUN echo '[source.crates-io]\n' > $HOME/.cargo/config +RUN echo 'replace-with = "ustc"' >> $HOME/.cargo/config +RUN echo '[source.ustc]\n' >> $HOME/.cargo/config +RUN echo 'registry = "https://mirrors.ustc.edu.cn/crates.io-index"' >> $HOME/.cargo/config + +# 确保 curl 支持 HTTP2 +RUN curl -V + +# 显示安装好的工具链版本,确保已正确安装 +RUN rustc --version +RUN cargo --version +RUN cmake --version + +RUN pip install -r /EasyRec/requirements/runtime.txt -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com +RUN pip install -r /EasyRec/requirements/extra.txt -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com + +RUN pip install --user -U https://tfsmoke1.oss-cn-zhangjiakou.aliyuncs.com/tunnel_paiio/common_io/py2/common_io-0.1.0-cp27-cp27mu-linux_x86_64.whl +RUN pip install graphlearn + +RUN cd /EasyRec && python setup.py install +RUN rm -rf /EasyRec +RUN python -c "import easy_rec; import pyhive; import datahub; import kafka" + +COPY docker/hadoop_env.sh /opt/hadoop_env.sh \ No newline at end of file diff --git a/docker/Dockerfile_tf115 b/docker/Dockerfile_tf115 new file mode 100644 index 000000000..30d857726 --- /dev/null +++ b/docker/Dockerfile_tf115 @@ -0,0 +1,36 @@ +FROM datascience-registry.cn-beijing.cr.aliyuncs.com/tensorflow/tensorflow:1.15.5 + +COPY docker/sources_18.04.list /etc/apt/sources.list + +# necessary for later commands to take effect +RUN md5sum /etc/apt/sources.list + +RUN apt-get update +RUN apt-get install apt-utils inetutils-ping wget curl telnet vim strace libpq-dev curl libsasl2-dev gcc g++ unzip openjdk-8-jdk -y + +RUN mkdir /EasyRec +COPY requirements /EasyRec/requirements +COPY requirements.txt /EasyRec/ +COPY easy_rec /EasyRec/easy_rec/ +COPY setup.cfg /EasyRec/ +COPY setup.py /EasyRec/ +COPY MANIFEST.in /EasyRec/ +COPY README.md /EasyRec/ +COPY scripts /EasyRec/scripts + +RUN curl "http://easyrec.oss-cn-beijing.aliyuncs.com/tools/odpscmd_public_0.45.0.zip" -o /EasyRec/odpscmd_public.zip +RUN mkdir /usr/local/odps_clt/ && cd /usr/local/odps_clt/ && unzip /EasyRec/odpscmd_public.zip +RUN ln -s /usr/local/odps_clt/bin/odpscmd /usr/local/bin/odpscmd +RUN pip3 install --upgrade pip +RUN pip3 install pystack-debugger idna kafka-python -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com +RUN pip3 install -r /EasyRec/requirements/runtime.txt +RUN pip3 install -r /EasyRec/requirements/extra.txt -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com +RUN pip3 install http://easyrec.oss-cn-beijing.aliyuncs.com/3rdparty/graphlearn-1.1.0-cp36-cp36m-linux_x86_64.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com +RUN pip3 install http://easyrec.oss-cn-beijing.aliyuncs.com/releases/pai_automl-0.0.1rc1-py3-none-any.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com +RUN pip3 install http://easyrec.oss-cn-beijing.aliyuncs.com/3rdparty/common_io-0.3.0-cp36-cp36m-linux_x86_64.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com +RUN pip3 install tensorflow_probability==0.8 +RUN cd /EasyRec && pip install . +RUN rm -rf /EasyRec +RUN python -c "import easy_rec; easy_rec.help(); import pyhive; import datahub; import kafka" + +COPY docker/hadoop_env.sh /opt/hadoop_env.sh \ No newline at end of file diff --git a/docker/Dockerfile_tf212 b/docker/Dockerfile_tf212 new file mode 100644 index 000000000..49977d60b --- /dev/null +++ b/docker/Dockerfile_tf212 @@ -0,0 +1,36 @@ +FROM tensorflow/tensorflow:2.12.0 +COPY docker/sources_20.04.list /etc/apt/sources.list + +# necessary for later commands to take effect +RUN md5sum /etc/apt/sources.list + +RUN apt-get update +RUN apt-get install apt-utils inetutils-ping wget curl telnet vim strace libpq-dev curl libsasl2-dev gcc g++ unzip openjdk-8-jdk -y + +RUN mkdir /EasyRec +COPY requirements /EasyRec/requirements +COPY requirements.txt /EasyRec/ +COPY easy_rec /EasyRec/easy_rec/ +COPY setup.cfg /EasyRec/ +COPY setup.py /EasyRec/ +COPY MANIFEST.in /EasyRec/ +COPY README.md /EasyRec/ +COPY scripts /EasyRec/scripts + +RUN curl "http://easyrec.oss-cn-beijing.aliyuncs.com/tools/odpscmd_public_0.45.0.zip" -o /EasyRec/odpscmd_public.zip +RUN mkdir /usr/local/odps_clt/ && cd /usr/local/odps_clt/ && unzip /EasyRec/odpscmd_public.zip +RUN ln -s /usr/local/odps_clt/bin/odpscmd /usr/local/bin/odpscmd + +RUN pip3 install pystack-debugger idna kafka-python -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com +RUN pip3 install -r /EasyRec/requirements/runtime.txt -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com +RUN pip3 install -r /EasyRec/requirements/extra.txt -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com +RUN pip3 install https://easyrec.oss-cn-beijing.aliyuncs.com/3rdparty/graphlearn-1.1.0-cp38-cp38-linux_x86_64.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com +# RUN pip3 install http://easyrec.oss-cn-beijing.aliyuncs.com/releases/pai_automl-0.0.1rc1-py3-none-any.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com +RUN pip3 install tensorflow_probability==0.20.0 +#RUN pip3 install encodings +RUN pip3 install https://dlc-task.oss-cn-hangzhou.aliyuncs.com/whl/common_io-0.4.1%2Btunnel-py2.py3-none-any.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com +RUN cd /EasyRec && python setup.py install +RUN rm -rf /EasyRec +# RUN python -c "import easy_rec; easy_rec.help(); import pyhive; import datahub; import kafka" + +COPY docker/hadoop_env.sh /opt/hadoop_env.sh \ No newline at end of file diff --git a/examples/readme.md b/examples/readme.md index bf936cf21..a587cc991 100644 --- a/examples/readme.md +++ b/examples/readme.md @@ -36,14 +36,14 @@ cd EasyRec -- Docker环境可选 (1) `python=3.6.9` + `tenserflow=1.15.5` -docker pull mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-tf1.15-0.7.4 -docker run -td --network host -v /local_path/EasyRec:/docker_path/EasyRec mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-tf1.15-0.7.4 +docker pull mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-tf1.15-0.8.5 +docker run -td --network host -v /local_path/EasyRec:/docker_path/EasyRec mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-tf1.15-0.8.5 docker exec -it bash -(2) `python=3.8.10` + `tenserflow=2.10.0` -docker pull mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py38-tf2.10-0.7.4 -docker run -td --network host -v /local_path/EasyRec:/docker_path/EasyRec mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py38-tf2.10-0.7.4 +(2) `python=3.8.10` + `tenserflow=2.12.0` +docker pull mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py38-tf2.12-0.8.5 +docker run -td --network host -v /local_path/EasyRec:/docker_path/EasyRec mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py38-tf2.12-0.8.5 docker exec -it bash ``` @@ -56,12 +56,12 @@ cd EasyRec -- Docker环境可选 (1) `python=3.6.9` + `tenserflow=1.15.5` -bash scripts/build_docker.sh +bash scripts/build_docker_tf115.sh sudo docker run -td --network host -v /local_path:/docker_path mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-tf1.15- -(2) `python=3.8.10` + `tenserflow=2.10.0` -bash scripts/build_docker_tf210.sh -sudo docker run -td --network host -v /local_path:/docker_path mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py38-tf2.10- +(2) `python=3.8.10` + `tenserflow=2.12.0` +bash scripts/build_docker_tf212.sh +sudo docker run -td --network host -v /local_path:/docker_path mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py38-tf2.12- sudo docker exec -it bash ``` diff --git a/requirements/runtime.txt b/requirements/runtime.txt index 8e6fa5616..402e16b6e 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -1,4 +1,5 @@ -eas_prediction +eas_prediction == 0.24; python_version < '3.0' +eas_prediction; python_version >= '3.0' future matplotlib numpy <= 1.23 diff --git a/scripts/build_docker_tf112.sh b/scripts/build_docker_tf112.sh new file mode 100644 index 000000000..5bad07df3 --- /dev/null +++ b/scripts/build_docker_tf112.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +bash scripts/gen_proto.sh +if [ $? -ne 0 ] +then + echo "gen proto failed" + exit 1 +fi + +version=`grep "__version__" easy_rec/version.py | awk '{ if($1 == "__version__") print $NF}'` +# strip "'" +version=${version//\'/} +echo "EasyRec Version: $version" + +if [ -z "$version" ] +then + echo "Failed to get EasyRec version" + exit 1 +fi + +sudo docker build --network=host . -f docker/Dockerfile_tf112 -t mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py27-tf1.12-${version} \ No newline at end of file diff --git a/scripts/build_docker_tf115.sh b/scripts/build_docker_tf115.sh new file mode 100644 index 000000000..a52616944 --- /dev/null +++ b/scripts/build_docker_tf115.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +bash scripts/gen_proto.sh +if [ $? -ne 0 ] +then + echo "gen proto failed" + exit 1 +fi + +version=`grep "__version__" easy_rec/version.py | awk '{ if($1 == "__version__") print $NF}'` +# strip "'" +version=${version//\'/} +echo "EasyRec Version: $version" + +if [ -z "$version" ] +then + echo "Failed to get EasyRec version" + exit 1 +fi + +sudo docker build --network=host . -f docker/Dockerfile_tf115 -t mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-tf1.15-${version} \ No newline at end of file diff --git a/scripts/build_docker_tf212.sh b/scripts/build_docker_tf212.sh new file mode 100644 index 000000000..50baf5aa3 --- /dev/null +++ b/scripts/build_docker_tf212.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +bash scripts/gen_proto.sh +if [ $? -ne 0 ] +then + echo "gen proto failed" + exit 1 +fi + +version=`grep "__version__" easy_rec/version.py | awk '{ if($1 == "__version__") print $NF}'` +# strip "'" +version=${version//\'/} +echo "EasyRec Version: $version" + +if [ -z "$version" ] +then + echo "Failed to get EasyRec version" + exit 1 +fi + +sudo docker build --network=host . -f docker/Dockerfile_tf212 -t mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py38-tf2.12-${version} \ No newline at end of file From 179cfa37843775731bece8c5276196bc376bf025 Mon Sep 17 00:00:00 2001 From: yangxudong Date: Fri, 17 Jan 2025 15:18:58 +0800 Subject: [PATCH 10/16] merge from master --- docker/Dockerfile_tf210 | 1 + docs/source/feature/feature.rst | 1 - docs/source/quick_start/local_tutorial.md | 34 +++++++++++++++++++++-- docs/source/vector_retrieve.md | 4 +-- 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/docker/Dockerfile_tf210 b/docker/Dockerfile_tf210 index 54c67b698..90b54adef 100644 --- a/docker/Dockerfile_tf210 +++ b/docker/Dockerfile_tf210 @@ -27,6 +27,7 @@ RUN pip3 install -r /EasyRec/requirements/runtime.txt -i http://mirrors.aliyun RUN pip3 install -r /EasyRec/requirements/extra.txt -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com RUN pip3 install https://easyrec.oss-cn-beijing.aliyuncs.com/3rdparty/graphlearn-1.1.0-cp38-cp38-linux_x86_64.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com # RUN pip3 install http://easyrec.oss-cn-beijing.aliyuncs.com/releases/pai_automl-0.0.1rc1-py3-none-any.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com +RUN pip3 install tensorflow_probability==0.18.0 RUN pip3 install https://dlc-task.oss-cn-hangzhou.aliyuncs.com/whl/common_io-0.4.1%2Btunnel-py2.py3-none-any.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com RUN cd /EasyRec && python setup.py install RUN rm -rf /EasyRec diff --git a/docs/source/feature/feature.rst b/docs/source/feature/feature.rst index 901fe6673..0f70989dd 100644 --- a/docs/source/feature/feature.rst +++ b/docs/source/feature/feature.rst @@ -136,7 +136,6 @@ RawFeature:连续值特征 features { input_names: "ctr" feature_type: RawFeature - embedding_dim: 8 } } diff --git a/docs/source/quick_start/local_tutorial.md b/docs/source/quick_start/local_tutorial.md index 443312ce9..9b2b0f0d4 100644 --- a/docs/source/quick_start/local_tutorial.md +++ b/docs/source/quick_start/local_tutorial.md @@ -8,20 +8,39 @@ #### 本地Anaconda安装 +温馨提示:**在搭载Apple芯片的MacBook上必须使用TensorFlow 2.5或更高版本**。 + Demo实验中使用的环境为 `python=3.6.8` + `tenserflow=1.12.0` ```bash conda create -n py36_tf12 python=3.6.8 conda activate py36_tf12 pip install tensorflow==1.12.0 +pip install tensorflow_probability==0.5.0 ``` +注意:必须要安装`tensorflow_probability`包,需要根据tensorflow的版本安装对应版本的`tensorflow_robability`包。 + +常见版本对应关系: + +| TensorFlow版本 | TensorFlowProbability版本 | +|--------------|-------------------------| +| 1.12 | 0.5.0 | +| 1.15 | 0.8.0 | +| 2.5.0 | 0.13.0 | +| 2.6.0 | 0.14.0 | +| 2.7.0 | 0.15.0 | +| 2.8.0 | 0.16.0 | +| 2.10 | 0.18.0 | +| 2.12 | 0.20.0 | + +其他版本对应关系请查看链接:[Releases · tensorflow/probability](https://github.com/tensorflow/probability/releases)。 + ```bash git clone https://github.com/alibaba/EasyRec.git cd EasyRec bash scripts/init.sh python setup.py install - ``` #### Docker镜像启动 @@ -33,13 +52,22 @@ Docker的环境为`python=3.6.9` + `tenserflow=1.15.5` ```bash git clone https://github.com/alibaba/EasyRec.git cd EasyRec -docker pull mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-tf1.15-0.7.4 -docker run -td --network host -v /local_path/EasyRec:/docker_path/EasyRec mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-tf1.15-0.7.4 +docker pull mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-tf1.15-0.8.5 +docker run -td --network host -v /local_path/EasyRec:/docker_path/EasyRec mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-tf1.15-0.8.5 docker exec -it bash ``` ##### 方法二:自行构建Docker镜像 +我们提供四个版本的tensorflow镜像构建示例,对应的脚步路径如下: + +- scripts/build_docker_tf112.sh +- scripts/build_docker_tf115.sh +- scripts/build_docker_tf210.sh +- scripts/build_docker_tf212.sh + +默认使用`tensorflow 1.15`的版本,示例脚本如下,请根据需要替换脚本路径: + ```bash git clone https://github.com/alibaba/EasyRec.git cd EasyRec diff --git a/docs/source/vector_retrieve.md b/docs/source/vector_retrieve.md index fe02a9358..8d3f7b909 100644 --- a/docs/source/vector_retrieve.md +++ b/docs/source/vector_retrieve.md @@ -37,7 +37,7 @@ pai -name easy_rec_ext -project algo_public_dev ## 使用示例 -### 1. 创建查询表 +### 1. 创建索引表 ```sql create table doc_table(pk BIGINT,vector string) partitioned by (pt string); @@ -53,7 +53,7 @@ VALUES ; ``` -### 2. 创建索引表 +### 2. 创建查询表 ```sql create table query_table(pk BIGINT,vector string) partitioned by (pt string); From 1f81f2bf1a4c6eca977d5fef388ca6319de2cbf9 Mon Sep 17 00:00:00 2001 From: yangxudong Date: Fri, 20 Dec 2024 14:52:39 +0800 Subject: [PATCH 11/16] fix bug of autoint model demo config --- examples/configs/autoint_on_movielens.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/configs/autoint_on_movielens.config b/examples/configs/autoint_on_movielens.config index cbf43729f..6435974e8 100644 --- a/examples/configs/autoint_on_movielens.config +++ b/examples/configs/autoint_on_movielens.config @@ -3,7 +3,7 @@ eval_input_path: "examples/data/movielens_1m/movies_test_data" model_dir: "examples/ckpt/autoint_on_movieslen_ckpt" train_config { - log_step_count_steps: 100 + log_step_count_steps: 1000 optimizer_config: { adam_optimizer: { learning_rate: { From c00d33ec46278fda903880a1abb328b475ec9af0 Mon Sep 17 00:00:00 2001 From: yangxudong Date: Wed, 25 Dec 2024 13:14:19 +0800 Subject: [PATCH 12/16] fix bug of wrong demo model configs --- examples/configs/autoint_on_movielens.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/configs/autoint_on_movielens.config b/examples/configs/autoint_on_movielens.config index 6435974e8..cbf43729f 100644 --- a/examples/configs/autoint_on_movielens.config +++ b/examples/configs/autoint_on_movielens.config @@ -3,7 +3,7 @@ eval_input_path: "examples/data/movielens_1m/movies_test_data" model_dir: "examples/ckpt/autoint_on_movieslen_ckpt" train_config { - log_step_count_steps: 1000 + log_step_count_steps: 100 optimizer_config: { adam_optimizer: { learning_rate: { From 8317df780c7184a3e23c1fd2d3cbec038977636d Mon Sep 17 00:00:00 2001 From: yangxudong Date: Fri, 17 Jan 2025 15:36:41 +0800 Subject: [PATCH 13/16] merge from master --- docs/source/feature/feature.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/source/feature/feature.rst b/docs/source/feature/feature.rst index 0f70989dd..384b8dc13 100644 --- a/docs/source/feature/feature.rst +++ b/docs/source/feature/feature.rst @@ -139,6 +139,18 @@ RawFeature:连续值特征 } } +也可以为每个RawFeature添加一个Field Embedding(原始特征值乘上一个可学习embedding参数),如下: + +.. code:: protobuf + + feature_config:{ + features { + input_names: "ctr" + feature_type: RawFeature + embedding_dim: 8 + } + } + 分箱组件使用方法见: `机器学习组件 `_ 也可以手动导入分箱信息。如下: From 9cd8ca18bb1dee62aea1e47d3a2a4748aa4b7325 Mon Sep 17 00:00:00 2001 From: yangxudong Date: Fri, 17 Jan 2025 16:55:56 +0800 Subject: [PATCH 14/16] merge from master --- docs/source/quick_start/local_tutorial.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/quick_start/local_tutorial.md b/docs/source/quick_start/local_tutorial.md index 9b2b0f0d4..51a242e9a 100644 --- a/docs/source/quick_start/local_tutorial.md +++ b/docs/source/quick_start/local_tutorial.md @@ -59,7 +59,7 @@ docker exec -it bash ##### 方法二:自行构建Docker镜像 -我们提供四个版本的tensorflow镜像构建示例,对应的脚步路径如下: +我们提供四个版本的tensorflow镜像构建示例,对应的脚本路径如下: - scripts/build_docker_tf112.sh - scripts/build_docker_tf115.sh From cd9fdcc4543ceab523ac1a547c4c4c37cc103162 Mon Sep 17 00:00:00 2001 From: yangxudong Date: Fri, 17 Jan 2025 16:58:13 +0800 Subject: [PATCH 15/16] merge from master --- docs/source/quick_start/local_tutorial.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/quick_start/local_tutorial.md b/docs/source/quick_start/local_tutorial.md index 51a242e9a..f0eb41256 100644 --- a/docs/source/quick_start/local_tutorial.md +++ b/docs/source/quick_start/local_tutorial.md @@ -8,7 +8,7 @@ #### 本地Anaconda安装 -温馨提示:**在搭载Apple芯片的MacBook上必须使用TensorFlow 2.5或更高版本**。 +温馨提示:**在搭载Apple芯片的MacBook上必须使用TensorFlow 2.5或更高版本**,安装方法请查看TF官方文档。 Demo实验中使用的环境为 `python=3.6.8` + `tenserflow=1.12.0` From 893577677802e42c2113fd33a55f1968672f295d Mon Sep 17 00:00:00 2001 From: yangxudong Date: Fri, 17 Jan 2025 18:11:08 +0800 Subject: [PATCH 16/16] merge from master --- docs/source/quick_start/local_tutorial.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/quick_start/local_tutorial.md b/docs/source/quick_start/local_tutorial.md index f0eb41256..b3a75fb00 100644 --- a/docs/source/quick_start/local_tutorial.md +++ b/docs/source/quick_start/local_tutorial.md @@ -8,7 +8,7 @@ #### 本地Anaconda安装 -温馨提示:**在搭载Apple芯片的MacBook上必须使用TensorFlow 2.5或更高版本**,安装方法请查看TF官方文档。 +温馨提示:**在搭载Apple M系列芯片的MacBook上必须使用TensorFlow 2.5或更高版本**,安装方法请查看TF官方文档。 Demo实验中使用的环境为 `python=3.6.8` + `tenserflow=1.12.0` @@ -19,7 +19,7 @@ pip install tensorflow==1.12.0 pip install tensorflow_probability==0.5.0 ``` -注意:必须要安装`tensorflow_probability`包,需要根据tensorflow的版本安装对应版本的`tensorflow_robability`包。 +注意:必须要安装`tensorflow_probability`包,需要根据tensorflow的版本安装对应版本的`tensorflow_probability`包。 常见版本对应关系: