diff --git a/docker/Dockerfile_tf112 b/docker/Dockerfile_tf112 index 9077e59e2..1db45a444 100644 --- a/docker/Dockerfile_tf112 +++ b/docker/Dockerfile_tf112 @@ -71,4 +71,4 @@ RUN cd /EasyRec && python setup.py install RUN rm -rf /EasyRec RUN python -c "import easy_rec; import pyhive; import datahub; import kafka" -COPY docker/hadoop_env.sh /opt/hadoop_env.sh \ No newline at end of file +COPY docker/hadoop_env.sh /opt/hadoop_env.sh diff --git a/docker/Dockerfile_tf115 b/docker/Dockerfile_tf115 index 30d857726..462bfc5a0 100644 --- a/docker/Dockerfile_tf115 +++ b/docker/Dockerfile_tf115 @@ -33,4 +33,4 @@ RUN cd /EasyRec && pip install . RUN rm -rf /EasyRec RUN python -c "import easy_rec; easy_rec.help(); import pyhive; import datahub; import kafka" -COPY docker/hadoop_env.sh /opt/hadoop_env.sh \ No newline at end of file +COPY docker/hadoop_env.sh /opt/hadoop_env.sh diff --git a/docker/Dockerfile_tf212 b/docker/Dockerfile_tf212 index 49977d60b..d6cc90003 100644 --- a/docker/Dockerfile_tf212 +++ b/docker/Dockerfile_tf212 @@ -33,4 +33,4 @@ RUN cd /EasyRec && python setup.py install RUN rm -rf /EasyRec # RUN python -c "import easy_rec; easy_rec.help(); import pyhive; import datahub; import kafka" -COPY docker/hadoop_env.sh /opt/hadoop_env.sh \ No newline at end of file +COPY docker/hadoop_env.sh /opt/hadoop_env.sh diff --git a/docs/source/models/dssm_derivatives.md b/docs/source/models/dssm_derivatives.md index 003b3439e..b4fb21069 100644 --- a/docs/source/models/dssm_derivatives.md +++ b/docs/source/models/dssm_derivatives.md @@ -109,13 +109,13 @@ model_config:{ features: { input_names: 'user_id' feature_type: IdFeature - embedding_dim: 32 + embedding_dim: 32 # user_id特征embedding维度 hash_bucket_size: 100000 } features: { input_names: 'adgroup_id' feature_type: IdFeature - embedding_dim: 32 + embedding_dim: 32 # item_id特征embedding维度 hash_bucket_size: 100000 } . @@ -123,12 +123,12 @@ model_config:{ . feature_groups: { - group_name: 'user_id_augment' + group_name: 'user_id_augment' # 增加user_augment特征组,对user_id特征进行embedding作为辅助向量 feature_names: 'user_id' wide_deep:DEEP } feature_groups: { - group_name: 'item_id_augment' + group_name: 'item_id_augment' # 增加item_augment特征组,对item_id特征进行embedding作为辅助向量 feature_names: 'adgroup_id' wide_deep:DEEP } @@ -137,19 +137,21 @@ model_config:{ user_tower { id: "user_id" dnn { - hidden_units: [ 128, 32] + hidden_units: [ 128, 32] # 输出维度需要保证和item_augment特征组的embedding维度一致 # dropout_ratio : [0.1, 0.1, 0.1, 0.1] } } item_tower { id: "adgroup_id" dnn { - hidden_units: [ 128, 32] + hidden_units: [ 128, 32] # 输出维度需要保证和user_augment特征组的embedding维度一致 } } simi_func: COSINE temperature: 0.01 l2_regularization: 1e-6 + amm_i_weight: 0.5 # AMM损失权重 + amm_u_weight: 0.5 } ``` diff --git a/easy_rec/python/model/match_model.py b/easy_rec/python/model/match_model.py index 2347f1aef..e9c4d2d44 100644 --- a/easy_rec/python/model/match_model.py +++ b/easy_rec/python/model/match_model.py @@ -238,14 +238,16 @@ def _build_list_wise_loss_graph(self): k in self._prediction_dict.keys() for k in ['augmented_p_u', 'augmented_p_i', 'augmented_a_u', 'augmented_a_i'] ]): - self._loss_dict['amm_loss_u'] = tf.reduce_mean( - tf.square(self._prediction_dict['augmented_a_u'] - - self._prediction_dict['augmented_p_i'][:batch_size]) * - sample_weights) / tf.reduce_mean(sample_weights) - self._loss_dict['amm_loss_i'] = tf.reduce_mean( - tf.square(self._prediction_dict['augmented_a_i'][:batch_size] - - self._prediction_dict['augmented_p_u']) * - sample_weights) / tf.reduce_mean(sample_weights) + self._loss_dict[ + 'amm_loss_u'] = self._model_config.amm_u_weight * tf.reduce_mean( + tf.square(self._prediction_dict['augmented_a_u'] - + self._prediction_dict['augmented_p_i'][:batch_size]) * + sample_weights) / tf.reduce_mean(sample_weights) + self._loss_dict[ + 'amm_loss_i'] = self._model_config.amm_i_weight * tf.reduce_mean( + tf.square(self._prediction_dict['augmented_a_i'][:batch_size] - + self._prediction_dict['augmented_p_u']) * + sample_weights) / tf.reduce_mean(sample_weights) else: raise ValueError('invalid loss type: %s' % str(self._loss_type)) diff --git a/easy_rec/python/protos/dat.proto b/easy_rec/python/protos/dat.proto index 3b30735b6..2325fbb87 100644 --- a/easy_rec/python/protos/dat.proto +++ b/easy_rec/python/protos/dat.proto @@ -18,4 +18,8 @@ message DAT { optional Similarity simi_func = 4 [default=COSINE]; required bool ignore_in_batch_neg_sam = 5 [default = false]; optional float temperature = 6 [default = 1.0]; + // loss weight for amm_i + required float amm_i_weight = 7 [default = 0.5]; + // loss weight for amm_u + required float amm_u_weight = 8 [default = 0.5]; } diff --git a/easy_rec/python/tools/faiss_index_pai.py b/easy_rec/python/tools/faiss_index_pai.py index b7eb66bc0..e9ebe3f89 100644 --- a/easy_rec/python/tools/faiss_index_pai.py +++ b/easy_rec/python/tools/faiss_index_pai.py @@ -9,6 +9,7 @@ import faiss import numpy as np import tensorflow as tf + from easy_rec.python.utils import io_util logging.basicConfig( diff --git a/easy_rec/python/tools/hit_rate_pai.py b/easy_rec/python/tools/hit_rate_pai.py index 5f97b3429..977df20be 100644 --- a/easy_rec/python/tools/hit_rate_pai.py +++ b/easy_rec/python/tools/hit_rate_pai.py @@ -18,6 +18,7 @@ from __future__ import print_function import sys + import tensorflow as tf from easy_rec.python.utils import io_util diff --git a/easy_rec/python/tools/split_model_pai.py b/easy_rec/python/tools/split_model_pai.py index cf1657deb..ded5f0bf4 100644 --- a/easy_rec/python/tools/split_model_pai.py +++ b/easy_rec/python/tools/split_model_pai.py @@ -12,6 +12,7 @@ from tensorflow.python.saved_model import signature_constants from tensorflow.python.tools import saved_model_utils from tensorflow.python.training import saver as tf_saver + from easy_rec.python.utils import io_util if tf.__version__ >= '2.0': diff --git a/easy_rec/python/tools/split_pdn_model_pai.py b/easy_rec/python/tools/split_pdn_model_pai.py index 849250b37..78932c297 100644 --- a/easy_rec/python/tools/split_pdn_model_pai.py +++ b/easy_rec/python/tools/split_pdn_model_pai.py @@ -13,6 +13,7 @@ from tensorflow.python.saved_model.utils_impl import get_variables_path from tensorflow.python.tools import saved_model_utils from tensorflow.python.training import saver as tf_saver + from easy_rec.python.utils import io_util FLAGS = tf.app.flags.FLAGS diff --git a/scripts/build_docker_tf112.sh b/scripts/build_docker_tf112.sh index 5bad07df3..345b1d5ea 100644 --- a/scripts/build_docker_tf112.sh +++ b/scripts/build_docker_tf112.sh @@ -18,4 +18,4 @@ then exit 1 fi -sudo docker build --network=host . -f docker/Dockerfile_tf112 -t mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py27-tf1.12-${version} \ No newline at end of file +sudo docker build --network=host . -f docker/Dockerfile_tf112 -t mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py27-tf1.12-${version} diff --git a/scripts/build_docker_tf115.sh b/scripts/build_docker_tf115.sh index a52616944..e6ef8667b 100644 --- a/scripts/build_docker_tf115.sh +++ b/scripts/build_docker_tf115.sh @@ -18,4 +18,4 @@ then exit 1 fi -sudo docker build --network=host . -f docker/Dockerfile_tf115 -t mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-tf1.15-${version} \ No newline at end of file +sudo docker build --network=host . -f docker/Dockerfile_tf115 -t mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-tf1.15-${version} diff --git a/scripts/build_docker_tf212.sh b/scripts/build_docker_tf212.sh index 50baf5aa3..e56bd1871 100644 --- a/scripts/build_docker_tf212.sh +++ b/scripts/build_docker_tf212.sh @@ -18,4 +18,4 @@ then exit 1 fi -sudo docker build --network=host . -f docker/Dockerfile_tf212 -t mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py38-tf2.12-${version} \ No newline at end of file +sudo docker build --network=host . -f docker/Dockerfile_tf212 -t mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py38-tf2.12-${version}