diff --git a/docs/images/models/dssm+senet.png b/docs/images/models/dssm+senet.png new file mode 100644 index 000000000..af57ad2e4 Binary files /dev/null and b/docs/images/models/dssm+senet.png differ diff --git a/docs/source/models/dssm_derivatives.md b/docs/source/models/dssm_derivatives.md new file mode 100644 index 000000000..d74aa9057 --- /dev/null +++ b/docs/source/models/dssm_derivatives.md @@ -0,0 +1,83 @@ +# DSSM衍生扩展模型 + +## DSSM + SENet + +### 简介 + +在推荐场景中,往往存在多种用户特征和物品特征,特征类型各不相同,各种特征经过embedding层后进入双塔模型的DNN层进行训练,在部分场景中甚至还会引入多模态embedding特征, 如图像和文本的embedding。 +然而各个特征对目标的影响不尽相同,有的特征重要性高,对模型整体表现影响大,有的特征则影响较小。因此当特征不断增多时,可以结合SENet自动学习每个特征的权重,增强重要信息到塔顶的能力。 + +![dssm+senet](../../images/models/dssm+senet.png) + +### 配置说明 + +```protobuf +model_config:{ + model_class: "DSSM_SENet" + feature_groups: { + group_name: 'user' + feature_names: 'user_id' + feature_names: 'cms_segid' + feature_names: 'cms_group_id' + feature_names: 'age_level' + feature_names: 'pvalue_level' + feature_names: 'shopping_level' + feature_names: 'occupation' + feature_names: 'new_user_class_level' + feature_names: 'tag_category_list' + feature_names: 'tag_brand_list' + wide_deep:DEEP + } + feature_groups: { + group_name: "item" + feature_names: 'adgroup_id' + feature_names: 'cate_id' + feature_names: 'campaign_id' + feature_names: 'customer' + feature_names: 'brand' + #feature_names: 'price' + #feature_names: 'pid' + wide_deep:DEEP + } + dssm_senet { + user_tower { + id: "user_id" + senet { + num_squeeze_group : 2 + reduction_ratio: 4 + } + dnn { + hidden_units: [128, 32] + } + } + item_tower { + id: "adgroup_id" + senet { + num_squeeze_group : 2 + reduction_ratio: 4 + } + dnn { + hidden_units: [128, 32] + } + } + simi_func: COSINE + scale_simi: false + temperature: 0.01 + l2_regularization: 1e-6 + } + loss_type: SOFTMAX_CROSS_ENTROPY + embedding_regularization: 5e-5 +} +``` + +- senet参数配置: + - num_squeeze_group: 每个特征embedding的分组个数, 默认为2 + - reduction_ratio: 维度压缩比例, 默认为4 + +### 示例Config + +[dssm_senet_on_taobao.config](https://github.com/alibaba/EasyRec/tree/master/examples/configs/dssm_senet_on_taobao.config) + +### 参考论文 + +[Squeeze-and-Excitation Networks](https://arxiv.org/abs/1709.01507) diff --git a/docs/source/models/recall.rst b/docs/source/models/recall.rst index 527c0db6a..86187ccdd 100644 --- a/docs/source/models/recall.rst +++ b/docs/source/models/recall.rst @@ -6,6 +6,7 @@ dssm dssm_neg_sampler + dssm_derivatives mind co_metric_learning_i2i pdn diff --git a/easy_rec/python/layers/senet.py b/easy_rec/python/layers/senet.py new file mode 100644 index 000000000..777079341 --- /dev/null +++ b/easy_rec/python/layers/senet.py @@ -0,0 +1,73 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import tensorflow as tf + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +class SENet: + """Squeeze and Excite Network. + + Input shape + - A list of 2D tensor with shape: ``(batch_size,embedding_size)``. + The ``embedding_size`` of each field can have different value. + + Args: + num_fields: int, number of fields. + num_squeeze_group: int, number of groups for squeeze. + reduction_ratio: int, reduction ratio for squeeze. + l2_reg: float, l2 regularizer for embedding. + name: str, name of the layer. + """ + + def __init__(self, + num_fields, + num_squeeze_group, + reduction_ratio, + l2_reg, + name='SENet'): + self.num_fields = num_fields + self.num_squeeze_group = num_squeeze_group + self.reduction_ratio = reduction_ratio + self._l2_reg = l2_reg + self._name = name + + def __call__(self, inputs): + g = self.num_squeeze_group + f = self.num_fields + r = self.reduction_ratio + reduction_size = max(1, f * g * 2 // r) + + emb_size = 0 + for input in inputs: + emb_size += int(input.shape[-1]) + + group_embs = [ + tf.reshape(emb, [-1, g, int(emb.shape[-1]) // g]) for emb in inputs + ] + + squeezed = [] + for emb in group_embs: + squeezed.append(tf.reduce_max(emb, axis=-1)) # [B, g] + squeezed.append(tf.reduce_mean(emb, axis=-1)) # [B, g] + z = tf.concat(squeezed, axis=1) # [bs, field_size * num_groups * 2] + + reduced = tf.layers.dense( + inputs=z, + units=reduction_size, + kernel_regularizer=self._l2_reg, + activation='relu', + name='%s/reduce' % self._name) + + excited_weights = tf.layers.dense( + inputs=reduced, + units=emb_size, + kernel_initializer='glorot_normal', + name='%s/excite' % self._name) + + # Re-weight + inputs = tf.concat(inputs, axis=-1) + output = inputs * excited_weights + + return output diff --git a/easy_rec/python/model/dssm_senet.py b/easy_rec/python/model/dssm_senet.py new file mode 100644 index 000000000..c84d52161 --- /dev/null +++ b/easy_rec/python/model/dssm_senet.py @@ -0,0 +1,143 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import tensorflow as tf + +from easy_rec.python.layers import dnn +from easy_rec.python.layers import senet +from easy_rec.python.model.dssm import DSSM +from easy_rec.python.model.match_model import MatchModel +from easy_rec.python.protos.loss_pb2 import LossType +from easy_rec.python.protos.simi_pb2 import Similarity +from easy_rec.python.utils.proto_util import copy_obj + +from easy_rec.python.protos.dssm_senet_pb2 import DSSM_SENet as DSSM_SENet_Config # NOQA + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 +losses = tf.losses + + +class DSSM_SENet(DSSM): + + def __init__(self, + model_config, + feature_configs, + features, + labels=None, + is_training=False): + + MatchModel.__init__(self, model_config, feature_configs, features, labels, + is_training) + + assert self._model_config.WhichOneof('model') == 'dssm_senet', \ + 'invalid model config: %s' % self._model_config.WhichOneof('model') + self._model_config = self._model_config.dssm_senet + assert isinstance(self._model_config, DSSM_SENet_Config) + + # copy_obj so that any modification will not affect original config + self.user_tower = copy_obj(self._model_config.user_tower) + + self.user_seq_features, self.user_plain_features, self.user_feature_list = self._input_layer( + self._feature_dict, 'user', is_combine=False) + self.user_num_fields = len(self.user_feature_list) + + # copy_obj so that any modification will not affect original config + self.item_tower = copy_obj(self._model_config.item_tower) + + self.item_seq_features, self.item_plain_features, self.item_feature_list = self._input_layer( + self._feature_dict, 'item', is_combine=False) + self.item_num_fields = len(self.item_feature_list) + + self._user_tower_emb = None + self._item_tower_emb = None + + def build_predict_graph(self): + user_senet = senet.SENet( + num_fields=self.user_num_fields, + num_squeeze_group=self.user_tower.senet.num_squeeze_group, + reduction_ratio=self.user_tower.senet.reduction_ratio, + l2_reg=self._l2_reg, + name='user_senet') + user_senet_output_list = user_senet(self.user_feature_list) + user_senet_output = tf.concat(user_senet_output_list, axis=-1) + + num_user_dnn_layer = len(self.user_tower.dnn.hidden_units) + last_user_hidden = self.user_tower.dnn.hidden_units.pop() + user_dnn = dnn.DNN(self.user_tower.dnn, self._l2_reg, 'user_dnn', + self._is_training) + user_tower_emb = user_dnn(user_senet_output) + user_tower_emb = tf.layers.dense( + inputs=user_tower_emb, + units=last_user_hidden, + kernel_regularizer=self._l2_reg, + name='user_dnn/dnn_%d' % (num_user_dnn_layer - 1)) + + item_senet = senet.SENet( + num_fields=self.item_num_fields, + num_squeeze_group=self.item_tower.senet.num_squeeze_group, + reduction_ratio=self.item_tower.senet.reduction_ratio, + l2_reg=self._l2_reg, + name='item_senet') + + item_senet_output_list = item_senet(self.item_feature_list) + item_senet_output = tf.concat(item_senet_output_list, axis=-1) + + num_item_dnn_layer = len(self.item_tower.dnn.hidden_units) + last_item_hidden = self.item_tower.dnn.hidden_units.pop() + item_dnn = dnn.DNN(self.item_tower.dnn, self._l2_reg, 'item_dnn', + self._is_training) + item_tower_emb = item_dnn(item_senet_output) + item_tower_emb = tf.layers.dense( + inputs=item_tower_emb, + units=last_item_hidden, + kernel_regularizer=self._l2_reg, + name='item_dnn/dnn_%d' % (num_item_dnn_layer - 1)) + + if self._model_config.simi_func == Similarity.COSINE: + user_tower_emb = self.norm(user_tower_emb) + item_tower_emb = self.norm(item_tower_emb) + temperature = self._model_config.temperature + else: + temperature = 1.0 + + user_item_sim = self.sim(user_tower_emb, item_tower_emb) / temperature + if self._model_config.scale_simi: + sim_w = tf.get_variable( + 'sim_w', + dtype=tf.float32, + shape=(1), + initializer=tf.ones_initializer()) + sim_b = tf.get_variable( + 'sim_b', + dtype=tf.float32, + shape=(1), + initializer=tf.zeros_initializer()) + y_pred = user_item_sim * tf.abs(sim_w) + sim_b + else: + y_pred = user_item_sim + + if self._is_point_wise: + y_pred = tf.reshape(y_pred, [-1]) + + if self._loss_type == LossType.CLASSIFICATION: + self._prediction_dict['logits'] = y_pred + self._prediction_dict['probs'] = tf.nn.sigmoid(y_pred) + elif self._loss_type == LossType.SOFTMAX_CROSS_ENTROPY: + y_pred = self._mask_in_batch(y_pred) + self._prediction_dict['logits'] = y_pred + self._prediction_dict['probs'] = tf.nn.softmax(y_pred) + else: + self._prediction_dict['y'] = y_pred + + self._prediction_dict['user_tower_emb'] = user_tower_emb + self._prediction_dict['item_tower_emb'] = item_tower_emb + self._prediction_dict['user_emb'] = tf.reduce_join( + tf.as_string(user_tower_emb), axis=-1, separator=',') + self._prediction_dict['item_emb'] = tf.reduce_join( + tf.as_string(item_tower_emb), axis=-1, separator=',') + return self._prediction_dict + + def build_output_dict(self): + output_dict = MatchModel.build_output_dict(self) + + return output_dict diff --git a/easy_rec/python/protos/dssm_senet.proto b/easy_rec/python/protos/dssm_senet.proto new file mode 100644 index 000000000..ee941104f --- /dev/null +++ b/easy_rec/python/protos/dssm_senet.proto @@ -0,0 +1,27 @@ +syntax = "proto2"; +package protos; + +import "easy_rec/python/protos/dnn.proto"; +import "easy_rec/python/protos/simi.proto"; +import "easy_rec/python/protos/layer.proto"; + +message DSSM_SENet_Tower { + required string id = 1; + required SENet senet = 2; + required DNN dnn = 3; + +}; + + +message DSSM_SENet { + required DSSM_SENet_Tower user_tower = 1; + required DSSM_SENet_Tower item_tower = 2; + required float l2_regularization = 3 [default = 1e-4]; + optional Similarity simi_func = 4 [default=COSINE]; + // add a layer for scaling the similarity + optional bool scale_simi = 5 [default = true]; + optional string item_id = 9; + required bool ignore_in_batch_neg_sam = 10 [default = false]; + // normalize user_tower_embedding and item_tower_embedding + optional float temperature = 11 [default = 1.0]; +} diff --git a/easy_rec/python/protos/easy_rec_model.proto b/easy_rec/python/protos/easy_rec_model.proto index 56f5b713e..6fee5ebea 100644 --- a/easy_rec/python/protos/easy_rec_model.proto +++ b/easy_rec/python/protos/easy_rec_model.proto @@ -27,6 +27,7 @@ import "easy_rec/python/protos/variational_dropout.proto"; import "easy_rec/python/protos/multi_tower_recall.proto"; import "easy_rec/python/protos/tower.proto"; import "easy_rec/python/protos/pdn.proto"; +import "easy_rec/python/protos/dssm_senet.proto"; // for input performance test message DummyModel { @@ -106,6 +107,7 @@ message EasyRecModel { DropoutNet dropoutnet = 203; CoMetricLearningI2I metric_learning = 204; PDN pdn = 205; + DSSM_SENet dssm_senet = 206; MMoE mmoe = 301; ESMM esmm = 302; diff --git a/easy_rec/python/test/train_eval_test.py b/easy_rec/python/test/train_eval_test.py index ca29fc89c..ca98a4192 100644 --- a/easy_rec/python/test/train_eval_test.py +++ b/easy_rec/python/test/train_eval_test.py @@ -1248,6 +1248,12 @@ def test_pdn(self): 'samples/model_config/pdn_on_taobao.config', self._test_dir) self.assertTrue(self._success) + @unittest.skipIf(gl is None, 'graphlearn is not installed') + def test_dssm_senet(self): + self._success = test_utils.test_single_train_eval( + 'samples/model_config/dssm_senet_on_taobao.config', self._test_dir) + self.assertTrue(self._success) + if __name__ == '__main__': tf.test.main() diff --git a/examples/configs/dssm_senet_on_taobao.config b/examples/configs/dssm_senet_on_taobao.config new file mode 100644 index 000000000..f8f415d1a --- /dev/null +++ b/examples/configs/dssm_senet_on_taobao.config @@ -0,0 +1,286 @@ +train_input_path: "data/test/tb_data/taobao_train_data" +eval_input_path: "data/test/tb_data/taobao_test_data" +model_dir: "experiments/dssm_senet_taobao_ckpt" + +train_config { + log_step_count_steps: 100 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 100 + sync_replicas: false + num_steps: 100 +} + +eval_config { + metrics_set: { + auc {} + } +} + +data_config { + input_fields { + input_name:'clk' + input_type: INT32 + } + input_fields { + input_name:'buy' + input_type: INT32 + } + input_fields { + input_name: 'pid' + input_type: STRING + } + input_fields { + input_name: 'adgroup_id' + input_type: STRING + } + input_fields { + input_name: 'cate_id' + input_type: STRING + } + input_fields { + input_name: 'campaign_id' + input_type: STRING + } + input_fields { + input_name: 'customer' + input_type: STRING + } + input_fields { + input_name: 'brand' + input_type: STRING + } + input_fields { + input_name: 'user_id' + input_type: STRING + } + input_fields { + input_name: 'cms_segid' + input_type: STRING + } + input_fields { + input_name: 'cms_group_id' + input_type: STRING + } + input_fields { + input_name: 'final_gender_code' + input_type: STRING + } + input_fields { + input_name: 'age_level' + input_type: STRING + } + input_fields { + input_name: 'pvalue_level' + input_type: STRING + } + input_fields { + input_name: 'shopping_level' + input_type: STRING + } + input_fields { + input_name: 'occupation' + input_type: STRING + } + input_fields { + input_name: 'new_user_class_level' + input_type: STRING + } + input_fields { + input_name: 'tag_category_list' + input_type: STRING + } + input_fields { + input_name: 'tag_brand_list' + input_type: STRING + } + input_fields { + input_name: 'price' + input_type: INT32 + } + + label_fields: 'clk' + batch_size: 4096 + num_epochs: 10000 + prefetch_size: 32 + input_type: CSVInput +} + +feature_config: { + features: { + input_names: 'pid' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'adgroup_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features: { + input_names: 'cate_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10000 + } + features: { + input_names: 'campaign_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features: { + input_names: 'customer' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features: { + input_names: 'brand' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features: { + input_names: 'user_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features: { + input_names: 'cms_segid' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'cms_group_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'final_gender_code' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'age_level' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'pvalue_level' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'shopping_level' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'occupation' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'new_user_class_level' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'tag_category_list' + feature_type: TagFeature + separator: '|' + hash_bucket_size: 100000 + embedding_dim: 16 + } + features: { + input_names: 'tag_brand_list' + feature_type: TagFeature + separator: '|' + hash_bucket_size: 100000 + embedding_dim: 16 + } + features: { + input_names: 'price' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 50 + } +} +model_config:{ + model_class: "DSSM_SENet" + feature_groups: { + group_name: 'user' + feature_names: 'user_id' + feature_names: 'cms_segid' + feature_names: 'cms_group_id' + feature_names: 'age_level' + feature_names: 'pvalue_level' + feature_names: 'shopping_level' + feature_names: 'occupation' + feature_names: 'new_user_class_level' + feature_names: 'tag_category_list' + feature_names: 'tag_brand_list' + wide_deep:DEEP + } + feature_groups: { + group_name: "item" + feature_names: 'adgroup_id' + feature_names: 'cate_id' + feature_names: 'campaign_id' + feature_names: 'customer' + feature_names: 'brand' + feature_names: 'price' + feature_names: 'pid' + wide_deep:DEEP + } + dssm_senet { + user_tower { + id: "user_id" + senet { + num_squeeze_group : 2 + reduction_ratio: 4 + } + dnn { + hidden_units: [256, 128, 64, 32] + } + } + item_tower { + id: "adgroup_id" + senet { + num_squeeze_group : 2 + reduction_ratio: 4 + } + dnn { + hidden_units: [256, 128, 64, 32] + } + } + l2_regularization: 1e-6 + } + embedding_regularization: 5e-5 +} + +export_config { +} diff --git a/samples/model_config/dssm_senet_on_taobao.config b/samples/model_config/dssm_senet_on_taobao.config new file mode 100644 index 000000000..3c059f6e2 --- /dev/null +++ b/samples/model_config/dssm_senet_on_taobao.config @@ -0,0 +1,321 @@ +train_input_path: "data/test/tb_data/taobao_train_data" +eval_input_path: "data/test/tb_data/taobao_test_data" +model_dir: "experiments/dssm_senet_taobao_ckpt" + +train_config { + log_step_count_steps: 200 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + # initial_learning_rate: 0.001 + initial_learning_rate: 0.0001 + decay_steps: 4000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 4000 + sync_replicas: false + num_steps: 100 +} + +eval_config { + + metrics_set: { + recall_at_topk { + topk: 50 + } + } + metrics_set: { + recall_at_topk { + topk: 10 + } + } + metrics_set: { + recall_at_topk { + topk: 5 + } + } + metrics_set: { + recall_at_topk { + topk: 1 + } + } +} + +data_config { + input_fields { + input_name:'clk' + input_type: INT32 + } + input_fields { + input_name:'buy' + input_type: INT32 + } + input_fields { + input_name: 'pid' + input_type: STRING + } + input_fields { + input_name: 'adgroup_id' + input_type: STRING + } + input_fields { + input_name: 'cate_id' + input_type: STRING + } + input_fields { + input_name: 'campaign_id' + input_type: STRING + } + input_fields { + input_name: 'customer' + input_type: STRING + } + input_fields { + input_name: 'brand' + input_type: STRING + } + input_fields { + input_name: 'user_id' + input_type: STRING + } + input_fields { + input_name: 'cms_segid' + input_type: STRING + } + input_fields { + input_name: 'cms_group_id' + input_type: STRING + } + input_fields { + input_name: 'final_gender_code' + input_type: STRING + } + input_fields { + input_name: 'age_level' + input_type: STRING + } + input_fields { + input_name: 'pvalue_level' + input_type: STRING + } + input_fields { + input_name: 'shopping_level' + input_type: STRING + } + input_fields { + input_name: 'occupation' + input_type: STRING + } + input_fields { + input_name: 'new_user_class_level' + input_type: STRING + } + input_fields { + input_name: 'tag_category_list' + input_type: STRING + } + input_fields { + input_name: 'tag_brand_list' + input_type: STRING + } + input_fields { + input_name: 'price' + input_type: INT32 + } + + label_fields: 'clk' + batch_size: 4096 + num_epochs: 10000 + prefetch_size: 32 + input_type: CSVInput + + negative_sampler { + input_path: 'data/test/tb_data/taobao_ad_feature_gl' + num_sample: 1024 + num_eval_sample: 2048 + attr_fields: 'adgroup_id' + attr_fields: 'cate_id' + attr_fields: 'campaign_id' + attr_fields: 'customer' + attr_fields: 'brand' + item_id_field: 'adgroup_id' + } +} + +feature_config: { + features: { + input_names: 'pid' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'adgroup_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features: { + input_names: 'cate_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10000 + } + features: { + input_names: 'campaign_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features: { + input_names: 'customer' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features: { + input_names: 'brand' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features: { + input_names: 'user_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features: { + input_names: 'cms_segid' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'cms_group_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'final_gender_code' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'age_level' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'pvalue_level' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'shopping_level' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'occupation' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'new_user_class_level' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features: { + input_names: 'tag_category_list' + feature_type: TagFeature + separator: '|' + hash_bucket_size: 100000 + embedding_dim: 16 + } + features: { + input_names: 'tag_brand_list' + feature_type: TagFeature + separator: '|' + hash_bucket_size: 100000 + embedding_dim: 16 + } + features: { + input_names: 'price' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 50 + } +} +model_config:{ + model_class: "DSSM_SENet" + feature_groups: { + group_name: 'user' + feature_names: 'user_id' + feature_names: 'cms_segid' + feature_names: 'cms_group_id' + feature_names: 'age_level' + feature_names: 'pvalue_level' + feature_names: 'shopping_level' + feature_names: 'occupation' + feature_names: 'new_user_class_level' + feature_names: 'tag_category_list' + feature_names: 'tag_brand_list' + wide_deep:DEEP + } + feature_groups: { + group_name: "item" + feature_names: 'adgroup_id' + feature_names: 'cate_id' + feature_names: 'campaign_id' + feature_names: 'customer' + feature_names: 'brand' + #feature_names: 'price' + #feature_names: 'pid' + wide_deep:DEEP + } + dssm_senet { + user_tower { + id: "user_id" + senet { + num_squeeze_group : 2 + reduction_ratio: 4 + } + dnn { + hidden_units: [ 128, 32] + } + } + item_tower { + id: "adgroup_id" + senet { + num_squeeze_group : 2 + reduction_ratio: 4 + } + dnn { + hidden_units: [128, 32] + } + } + simi_func: COSINE + scale_simi: false + temperature: 0.01 + l2_regularization: 1e-6 + } + loss_type: SOFTMAX_CROSS_ENTROPY + embedding_regularization: 5e-5 +} + +export_config { +}